| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920 |
- """
- PPT quality inspector and auto-fix engine.
- Inspects generated PPT for layout, visual, content, and data issues,
- then auto-fixes them iteratively until quality threshold is met.
- """
- import re
- from pptx import Presentation
- from pptx.util import Emu, Pt
- from pptx.dml.color import RGBColor
- from collections import Counter
- from quality_rules import (
- QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS,
- FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX,
- TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH,
- SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE,
- FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT,
- get_quality_label, calculate_score,
- )
- from page_layouts import calculate_fill_ratio, ensure_safe_position
- FORECAST_PAGE_TYPES = {
- 'forecast',
- 'prediction',
- 'plan',
- 'monthly_forecast',
- 'monthly_plan',
- 'next_month_plan',
- 'custom_forecast',
- 'custom_prediction',
- }
- class QualityIssue:
- def __init__(self, severity, category, page_index, description,
- rule_id='', auto_fixable=True, fix_data=None):
- self.severity = severity
- self.category = category
- self.page_index = page_index
- self.description = description
- self.rule_id = rule_id
- self.auto_fixable = auto_fixable
- self.fix_data = fix_data or {}
- def __repr__(self):
- return f"[{self.severity}] Page {self.page_index+1}: {self.description}"
- class QualityInspector:
- def __init__(self, theme_colors: dict = None, layout_context=None):
- self.theme_colors = theme_colors or {}
- self.layout_context = layout_context
- self.fix_count = 0
- self.fix_log = []
- def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]:
- issues = []
- issues += self._check_confirmation_alignment(prs, config)
- for page_idx, slide in enumerate(prs.slides):
- page_type = self._get_page_type(page_idx, config, len(prs.slides))
- issues += self._check_layout(slide, page_idx)
- issues += self._check_visual(slide, page_idx)
- issues += self._check_content(slide, page_idx, config, prs, page_type)
- issues += self._check_data(slide, page_idx, prs)
- return issues
- def _get_page_type(self, page_idx: int, config, total_slides: int) -> str:
- if config and hasattr(config, 'pages') and page_idx < len(config.pages):
- return config.pages[page_idx].page_type
- if page_idx == 0:
- return 'cover'
- if page_idx == total_slides - 1:
- return 'end'
- if page_idx == 1:
- return 'toc'
- return 'content'
- def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]:
- issues = []
- if not config:
- return issues
- selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)]
- if getattr(config, 'require_six_confirmations', False):
- confirmation = getattr(config, 'user_confirmation', None)
- if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete():
- issues.append(QualityIssue(
- 'critical', 'data', -1,
- '六项确认未完成,PPT 不应进入输出阶段',
- 'D006', False,
- {'type': 'confirmation_incomplete'}
- ))
- if config and getattr(config, 'page_count_range', None) and selected_pages:
- low, high = config.page_count_range
- if len(selected_pages) < low or len(selected_pages) > high:
- issues.append(QualityIssue(
- 'major', 'data', -1,
- f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内',
- 'D006', False,
- {'type': 'page_count_range'}
- ))
- if config and getattr(config, 'metrics', None) and len(selected_pages) > 0:
- selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)]
- if not selected_metrics:
- issues.append(QualityIssue(
- 'critical', 'data', -1,
- '未找到已确认的核心指标,无法验证输出一致性',
- 'D006', False,
- {'type': 'missing_metrics'}
- ))
- return issues
- def auto_fix(self, prs: Presentation, issues: list[QualityIssue]):
- fixable = [i for i in issues if i.auto_fixable]
- self.fix_count = 0
- self.fix_log = []
- for issue in fixable:
- try:
- if issue.page_index < 0:
- continue
- slide = prs.slides[issue.page_index]
- self._apply_fix(slide, issue, prs)
- self.fix_count += 1
- except Exception as e:
- self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}")
- return self.fix_count
- def _apply_fix(self, slide, issue, prs):
- category = issue.category
- if category == 'layout':
- self._fix_layout(slide, issue)
- elif category == 'visual':
- self._fix_visual(slide, issue)
- elif category == 'content':
- self._fix_content(slide, issue, prs)
- elif category == 'data':
- self._fix_data(slide, issue, prs)
- def generate_report(self, issues: list[QualityIssue], iteration: int = 1,
- total_pages: int = 0) -> str:
- lines = []
- lines.append('═' * 50)
- lines.append(f' PPT 质量自检报告 (第 {iteration} 轮)')
- lines.append('═' * 50)
- if not issues:
- lines.append('[PASS] 全部通过!未发现任何质量问题。')
- return '\n'.join(lines)
- by_page = {}
- for iss in issues:
- p = iss.page_index
- if p not in by_page:
- by_page[p] = []
- by_page[p].append(iss)
- for p_idx in sorted(by_page.keys()):
- page_issues = by_page[p_idx]
- sev_order = {'critical': 0, 'major': 1, 'minor': 2}
- page_issues.sort(key=lambda x: sev_order.get(x.severity, 3))
- has_critical = any(i.severity == 'critical' for i in page_issues)
- has_major = any(i.severity == 'major' for i in page_issues)
- if has_critical:
- icon = '[CRIT]'
- elif has_major:
- icon = '[MAJ]'
- else:
- icon = '[OK]'
- lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题')
- for iss in page_issues:
- sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '')
- status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else ''
- lines.append(f' ├─ {sev_icon} {iss.description}{status}')
- lines.append('─' * 50)
- by_sev = Counter(i.severity for i in issues)
- by_cat = Counter(i.category for i in issues)
- fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed'))
- score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
- label = get_quality_label(score)
- lines.append(f'总结: {len(issues)} 个问题 | '
- f'{by_sev.get("critical", 0)} 严重 + '
- f'{by_sev.get("major", 0)} 主要 + '
- f'{by_sev.get("minor", 0)} 次要')
- lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个')
- lines.append(f'最终质量评分: {score}/100 [{label}]')
- lines.append('═' * 50)
- return '\n'.join(lines)
- def quality_assured_build(self, build_fn, data, config, output_path,
- max_iterations=None, _attempt=0) -> tuple:
- max_iterations = max_iterations or config.max_fix_iterations
- total_pages = 0
- needs_rebuild = False
- rebuilt_once = False
- prs = None
- for iteration in range(1, max_iterations + 1):
- if iteration == 1 or needs_rebuild:
- if needs_rebuild:
- if rebuilt_once and iteration > 2:
- print(f'[INFO] 已尝试重建,不再继续重建以避免无限循环')
- needs_rebuild = False
- else:
- print(f'[REBUILD] 检测到需要重建的页面,触发重新生成...')
- rebuilt_once = True
- needs_rebuild = False
- prs = build_fn(data, config)
- total_pages = len(prs.slides)
- issues = self.inspect(prs, config)
- if not issues:
- print(f'[PASS] 第 {iteration} 次迭代:无问题,质量达标')
- break
- by_sev = Counter(i.severity for i in issues)
- print(f'[INSPECT] 第 {iteration} 次自检:{by_sev.get("critical",0)} 严重 + '
- f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要')
- fixable = [i for i in issues if i.auto_fixable]
- self.auto_fix(prs, fixable)
- print(f'[FIX] 自动修复了 {self.fix_count} 个问题')
- for issue in fixable:
- if issue.fix_data.get('needs_rebuild'):
- needs_rebuild = True
- print(f'[WARN] 检测到内容严重不足,将在下一轮迭代中重建')
- break
- unfixable = [i for i in issues if not i.auto_fixable]
- if unfixable:
- print(f'[WARN] {len(unfixable)} 个问题需人工确认')
- remaining = self.inspect(prs, config)
- if not remaining:
- print(f'[PASS] 第 {iteration} 次修复后:所有问题已解决')
- break
- has_critical = any(i.severity == 'critical' for i in remaining)
- has_major = any(i.severity == 'major' for i in remaining)
- if not has_critical and not has_major:
- print(f'[PASS] 第 {iteration} 次修复后:仅剩次要问题,质量达标')
- break
- if needs_rebuild and iteration < max_iterations:
- continue
- final_issues = self.inspect(prs, config)
- by_sev = Counter(i.severity for i in final_issues)
- by_cat = Counter(i.category for i in final_issues)
- score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
- label = get_quality_label(score)
- report = self.generate_report(final_issues, iteration, total_pages)
- print(report)
- if score >= config.quality_threshold:
- prs.save(output_path)
- print(f'[PASS] 高质量 PPT 已输出: {output_path}')
- else:
- has_critical_final = any(i.severity == 'critical' for i in final_issues)
- has_layout_critical = any(
- i.severity == 'critical' and i.category == 'layout'
- for i in final_issues
- )
- if has_layout_critical:
- raise RuntimeError(
- f'PPT 存在严重布局问题(评分 {score}),无法自动修复。'
- f'请检查页面配置和数据。'
- )
- prs.save(output_path)
- if has_critical_final:
- print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),'
- f'存在 {by_sev.get("critical", 0)} 个严重内容问题,建议补充分析数据后重新生成')
- else:
- print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),已输出但建议复核')
- return prs, final_issues
- def _check_layout(self, slide, page_idx) -> list[QualityIssue]:
- issues = []
- sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH
- sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT
- for shape in slide.shapes:
- l, t = int(shape.left), int(shape.top)
- w, h = int(shape.width), int(shape.height)
- if l < -100:
- issues.append(QualityIssue('critical', 'layout', page_idx,
- f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})',
- 'L001', True, {'shape': shape, 'type': 'left'}))
- if l + w > sw + 500:
- issues.append(QualityIssue('critical', 'layout', page_idx,
- f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})',
- 'L002', True, {'shape': shape, 'type': 'right'}))
- if t < -100:
- issues.append(QualityIssue('critical', 'layout', page_idx,
- f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})',
- 'L003', True, {'shape': shape, 'type': 'top'}))
- if t + h > sh + 500:
- issues.append(QualityIssue('critical', 'layout', page_idx,
- f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})',
- 'L004', True, {'shape': shape, 'type': 'bottom'}))
- if l < SAFE_MARGIN and l >= 0:
- if l == 0 and w >= sw * 0.8:
- continue
- if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000):
- continue
- if int(shape.top) > sh - Emu(500000):
- continue
- issues.append(QualityIssue('minor', 'layout', page_idx,
- f'形状"{_shape_name(shape)}"过于靠近左边缘',
- 'L007', True, {'shape': shape, 'type': 'edge_left'}))
- placeholder_pattern = re.compile(r'\{[^}]+\}')
- for shape in slide.shapes:
- if shape.has_text_frame:
- text = shape.text_frame.text
- if placeholder_pattern.search(text):
- issues.append(QualityIssue('critical', 'layout', page_idx,
- f'发现未替换占位符: "{text[:50]}"',
- 'L006', True, {'shape': shape, 'type': 'placeholder'}))
- empty_artifacts = self._find_empty_template_artifacts(slide)
- for shape in empty_artifacts:
- issues.append(QualityIssue(
- 'major', 'layout', page_idx,
- f'发现空模板组件残留: "{_shape_name(shape)}"',
- 'L008', True, {'shape': shape, 'type': 'empty_template_artifact'}
- ))
- shapes_list = list(slide.shapes)
- for i, a in enumerate(shapes_list):
- for b in shapes_list[i+1:]:
- if self._shapes_overlap(a, b):
- a_name = _shape_name(a)
- b_name = _shape_name(b)
- if self._is_intentional_overlap(a, b):
- continue
- issues.append(QualityIssue('major', 'layout', page_idx,
- f'形状"{a_name}"与"{b_name}"存在重叠',
- 'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'}))
- return issues
- def _check_visual(self, slide, page_idx) -> list[QualityIssue]:
- issues = []
- fonts_seen = {}
- for shape in slide.shapes:
- if not shape.has_text_frame:
- continue
- for para in shape.text_frame.paragraphs:
- for run in para.runs:
- if run.font.size:
- size_pt = run.font.size / 12700.0
- if size_pt < 6:
- issues.append(QualityIssue('major', 'visual', page_idx,
- f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"',
- 'V002', True, {'run': run, 'type': 'font_small'}))
- elif size_pt > 65:
- issues.append(QualityIssue('major', 'visual', page_idx,
- f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"',
- 'V003', True, {'run': run, 'type': 'font_large'}))
- if run.font.name:
- fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1
- if len(fonts_seen) > 3:
- issues.append(QualityIssue('minor', 'visual', page_idx,
- f'字体使用超过3种: {list(fonts_seen.keys())}',
- 'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen}))
- return issues
- def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]:
- # Resolve dynamic content top from layout context if available
- content_top_emu = None
- if self.layout_context:
- content_top_emu = self.layout_context.content_top
- issues = []
- if page_type in ('cover', 'end'):
- issues += self._check_text_overflow(slide, page_idx)
- return issues
- issues += self._check_dynamic_page_fit(page_idx, page_type, config)
- issues += self._check_core_metric_presence(slide, page_idx, page_type, config)
- if page_type == 'toc':
- content_shapes = [s for s in slide.shapes
- if s.has_text_frame and _is_in_content_area(s)]
- all_content_text = ''
- for shape in content_shapes:
- text = shape.text_frame.text.strip()
- if text:
- all_content_text += text + '\n'
- if len(all_content_text.strip()) < 30:
- issues.append(QualityIssue('minor', 'content', page_idx,
- '目录页内容过少',
- 'C008', False, {'type': 'empty_page'}))
- return issues
- fill_ratio = calculate_fill_ratio(slide, content_top_emu=content_top_emu)
- if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary') or page_type in FORECAST_PAGE_TYPES:
- if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']:
- issues.append(QualityIssue('critical', 'content', page_idx,
- f'页面内容严重不足,填充率仅 {fill_ratio:.1%},必须补充图表和分析文本',
- 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
- elif fill_ratio < FILL_RATIO_THRESHOLDS['low']:
- issues.append(QualityIssue('major', 'content', page_idx,
- f'页面留白偏多,填充率 {fill_ratio:.1%},需补充分析内容',
- 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
- elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2:
- issues.append(QualityIssue('minor', 'content', page_idx,
- f'页面填充率过低 {fill_ratio:.1%}',
- 'C001', False))
- content_shapes = [s for s in slide.shapes
- if s.has_text_frame and _is_in_content_area(s)]
- all_content_text = ''
- insight_blocks = 0
- for shape in content_shapes:
- tf = shape.text_frame
- full_text = tf.text.strip()
- if not full_text:
- continue
- all_content_text += full_text + '\n'
- for para in tf.paragraphs:
- para_text = para.text.strip()
- if para_text and len(para_text) >= TEXT_MIN_LENGTH:
- insight_blocks += 1
- total_content_chars = len(all_content_text.strip())
- text_lengths = [len(p.text.strip()) for s in content_shapes
- for p in s.text_frame.paragraphs if p.text.strip()]
- if total_content_chars < PAGE_MIN_TEXT_LENGTH:
- issues.append(QualityIssue('critical', 'content', page_idx,
- f'页面内容为空!所有文本框总字数仅 {total_content_chars} 字(要求≥{PAGE_MIN_TEXT_LENGTH}字)',
- 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
- elif total_content_chars < 200:
- issues.append(QualityIssue('major', 'content', page_idx,
- f'页面内容过少,总字数仅 {total_content_chars} 字,分析深度严重不足',
- 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
- if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH:
- issues.append(QualityIssue('critical', 'content', page_idx,
- f'分析文本过短(最长为 {max(text_lengths)} 字),需撰写≥{TEXT_MIN_LENGTH}字的深度分析',
- 'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)}))
- if insight_blocks < INSIGHT_MIN_COUNT:
- issues.append(QualityIssue('critical', 'content', page_idx,
- f'分析段数不足,仅 {insight_blocks} 段(要求≥{INSIGHT_MIN_COUNT}段)',
- 'C007', True, {'type': 'insight_count', 'count': insight_blocks}))
- has_title = False
- for shape in slide.shapes:
- if shape.has_text_frame:
- text = shape.text_frame.text
- try:
- sy = int(shape.top)
- except Exception:
- sy = 99999999
- if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000):
- if len(text.strip()) > 0 and not text.startswith('{'):
- has_title = True
- break
- if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结',
- '排行', '报告', '建议', '告警', '要点']):
- if sy < CONTENT_TOP_BASE + Emu(400000):
- has_title = True
- break
- if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1:
- issues.append(QualityIssue('critical', 'content', page_idx,
- '页面缺少标题', 'C006', True, {'type': 'missing_title'}))
- issues += self._check_text_overflow(slide, page_idx)
- has_chart = False
- for shape in slide.shapes:
- if shape.has_chart:
- has_chart = True
- break
- if has_chart and insight_blocks == 0 and page_idx >= 2:
- issues.append(QualityIssue('critical', 'content', page_idx,
- '页面有图表但完全缺少分析文本,图表数据需要被解读和说明',
- 'C009', True, {'type': 'chart_no_text'}))
- return issues
- def _check_text_overflow(self, slide, page_idx) -> list[QualityIssue]:
- issues = []
- for shape in slide.shapes:
- if shape.has_text_frame and self._is_text_overflowing(shape):
- issues.append(QualityIssue(
- 'major', 'content', page_idx,
- f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"',
- 'C004', True, {'shape': shape, 'type': 'text_overflow'}
- ))
- return issues
- def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]:
- issues = []
- profile = getattr(config, 'data_profiling', None) or {}
- if not profile:
- return issues
- time_cols = profile.get('time_columns', [])
- cat_cols = profile.get('category_columns', [])
- num_cols = profile.get('numeric_columns', [])
- if page_type == 'trend' and (not time_cols or not num_cols):
- issues.append(QualityIssue(
- 'critical', 'content', page_idx,
- '趋势页缺少可用时间列或数值列,需要重建或降级为摘要页',
- 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
- ))
- elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols):
- issues.append(QualityIssue(
- 'critical', 'content', page_idx,
- f'{page_type} 页缺少分类维度或数值列,需要重建或降级为摘要页',
- 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
- ))
- elif page_type == 'kpi_overview':
- selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
- if len(selected_metrics) > 6:
- issues.append(QualityIssue(
- 'minor', 'content', page_idx,
- f'核心指标数量 {len(selected_metrics)} 超过 6 个,KPI页应切换为紧凑布局或拆分展示',
- 'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)}
- ))
- elif len(selected_metrics) >= 4:
- issues.append(QualityIssue(
- 'minor', 'content', page_idx,
- f'核心指标数量 {len(selected_metrics)} 较多,建议使用紧凑布局以保留洞察区',
- 'C011', True, {'type': 'kpi_layout_compact_needed', 'count': len(selected_metrics)}
- ))
- return issues
- def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]:
- issues = []
- if page_type != 'kpi_overview' or not config:
- return issues
- selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
- if not selected_metrics:
- return issues
- slide_text = '\n'.join(
- shape.text_frame.text for shape in slide.shapes
- if shape.has_text_frame and shape.text_frame.text
- )
- missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text]
- if missing:
- issues.append(QualityIssue(
- 'critical', 'data', page_idx,
- 'KPI概览页缺少已确认核心指标:' + '、'.join(missing),
- 'D006', True, {'type': 'core_metric_missing', 'missing': missing}
- ))
- return issues
- def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]:
- issues = []
- if page_idx == 0:
- return issues
- for shape in slide.shapes:
- if shape.has_text_frame:
- text = shape.text_frame.text
- page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text)
- if page_pattern:
- current = int(page_pattern.group(1))
- total = int(page_pattern.group(2))
- if total == 0:
- issues.append(QualityIssue('major', 'data', page_idx,
- f'页码格式异常: {text.strip()}',
- 'D002', True, {'type': 'page_num'}))
- return issues
- def _fix_layout(self, slide, issue):
- fd = issue.fix_data
- if fd.get('type') in ('left', 'right', 'top', 'bottom'):
- shape = fd.get('shape')
- if shape:
- ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT)
- fd['fixed'] = True
- elif fd.get('type') == 'overlap':
- a, b = fd.get('shape_a'), fd.get('shape_b')
- if a and b:
- try:
- if int(b.left) < int(a.left) + int(a.width) + Emu(50000):
- b.left = int(a.left) + int(a.width) + Emu(152400)
- ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT)
- except Exception:
- pass
- fd['fixed'] = True
- elif fd.get('type') == 'placeholder':
- shape = fd.get('shape')
- if shape and shape.has_text_frame:
- text = shape.text_frame.text or ''
- # For KPI placeholders, remove the entire shape and nearby card backgrounds
- kpi_pattern = re.compile(r'\{kpi\d+_(label|value)\}')
- if kpi_pattern.search(text):
- # Remove this text shape
- self._remove_shape(shape)
- # Also remove nearby rounded rectangle backgrounds
- try:
- sx = int(shape.left)
- sy = int(shape.top)
- sw = int(shape.width)
- sh = int(shape.height)
- pad = 300000
- for other in list(slide.shapes):
- try:
- ox = int(other.left)
- oy = int(other.top)
- ow = int(other.width)
- oh = int(other.height)
- in_region = (
- ox >= sx - pad and ox + ow <= sx + sw + pad and
- oy >= sy - pad and oy + oh <= sy + sh + pad
- )
- if in_region and other != shape:
- # Check if it's a background shape (no text or empty text)
- if not other.has_text_frame or not (other.text_frame.text or '').strip():
- self._remove_shape(other)
- except Exception:
- pass
- except Exception:
- pass
- else:
- # For other placeholders, just clear the text
- for para in shape.text_frame.paragraphs:
- para.text = re.sub(r'\{[^}]+\}', '', para.text)
- fd['fixed'] = True
- elif fd.get('type') == 'edge_left':
- shape = fd.get('shape')
- if shape:
- try:
- w = int(shape.width)
- if w < SLIDE_WIDTH * 0.5:
- shape.left = SAFE_MARGIN
- except Exception:
- pass
- fd['fixed'] = True
- elif fd.get('type') == 'empty_template_artifact':
- shape = fd.get('shape')
- if shape:
- self._remove_shape(shape)
- fd['fixed'] = True
- def _fix_visual(self, slide, issue):
- fd = issue.fix_data
- if fd.get('type') == 'font_small':
- run = fd.get('run')
- if run:
- run.font.size = FONT_SIZE_MIN
- fd['fixed'] = True
- elif fd.get('type') == 'font_large':
- run = fd.get('run')
- if run:
- run.font.size = FONT_SIZE_MAX
- fd['fixed'] = True
- elif fd.get('type') == 'font_mixed':
- for shape in slide.shapes:
- if shape.has_text_frame:
- for para in shape.text_frame.paragraphs:
- for run in para.runs:
- run.font.name = DEFAULT_FONT
- fd['fixed'] = True
- def _fix_content(self, slide, issue, prs):
- fd = issue.fix_data
- if fd.get('type') == 'sparse':
- fill_ratio = fd.get('fill_ratio', 0)
- if fill_ratio < FILL_RATIO_THRESHOLDS['low']:
- try:
- box = slide.shapes.add_textbox(
- CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)),
- Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000))
- tf = box.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = (
- '[WARNING] 此页面内容不足,需补充深度分析内容。'
- '分析应包含:具体数据引用(含数值和单位)、'
- '与同类/历史/目标的对比分析、'
- '数据背后原因的至少2条解读、'
- '以及可执行的业务行动建议。'
- '请勿使用"要加强"、"进一步优化"等模糊措辞。'
- )
- p.font.size = Pt(12)
- p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00)
- p.font.name = DEFAULT_FONT
- p.font.bold = True
- fd['fixed'] = True
- fd['needs_rebuild'] = True
- except Exception:
- pass
- elif fd.get('type') == 'empty_page':
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- elif fd.get('type') == 'chart_no_text':
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- elif fd.get('type') == 'insight_count':
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- elif fd.get('type') == 'short_text':
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'):
- fd['fixed'] = True
- elif fd.get('type') == 'kpi_layout_compact_needed':
- fd['fixed'] = True
- elif fd.get('type') == 'core_metric_missing':
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- elif fd.get('type') == 'missing_title':
- try:
- box = slide.shapes.add_textbox(
- CONTENT_LEFT, Emu(914400),
- Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000))
- p = box.text_frame.paragraphs[0]
- p.text = '数据详情'
- p.font.size = Pt(24)
- p.font.bold = True
- p.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
- p.font.name = DEFAULT_FONT
- fd['fixed'] = True
- except Exception:
- pass
- elif fd.get('type') == 'text_overflow':
- shape = fd.get('shape')
- if shape and shape.has_text_frame:
- text_len = len(shape.text_frame.text or '')
- try:
- if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000):
- fd['needs_rebuild'] = True
- else:
- for para in shape.text_frame.paragraphs:
- for run in para.runs:
- if run.font.size and run.font.size > Pt(9):
- run.font.size = Pt(9)
- except Exception:
- fd['needs_rebuild'] = True
- fd['fixed'] = True
- def _fix_data(self, slide, issue, prs):
- fd = issue.fix_data
- if fd.get('type') == 'page_num':
- fd['fixed'] = True
- def _shapes_overlap(self, a, b) -> bool:
- ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height)
- bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height)
- if ax + aw <= bx or bx + bw <= ax:
- return False
- if ay + ah <= by or by + bh <= ay:
- return False
- return True
- def _is_intentional_overlap(self, a, b) -> bool:
- if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'):
- return True
- a_area = int(a.width) * int(a.height)
- b_area = int(b.width) * int(b.height)
- if a_area > b_area * 3 or b_area > a_area * 3:
- return True
- return False
- def _is_title_shape(self, shape) -> bool:
- if not shape.has_text_frame:
- return False
- try:
- y = int(shape.top)
- return y < int(CONTENT_TOP_BASE) + Emu(200000)
- except Exception:
- return False
- def _find_empty_template_artifacts(self, slide) -> list:
- artifacts = []
- shapes = list(slide.shapes)
- empty_text_boxes = []
- for shape in shapes:
- if shape.has_text_frame:
- text = (shape.text_frame.text or '').strip()
- if text:
- continue
- if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000):
- continue
- if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000):
- continue
- empty_text_boxes.append(shape)
- artifacts.append(shape)
- for shape in shapes:
- if shape.has_text_frame:
- continue
- try:
- is_large_soft_card = (
- int(shape.width) >= Emu(1000000) and
- int(shape.height) >= Emu(500000) and
- int(shape.top) < int(FOOTER_TOP) - Emu(400000)
- )
- if not is_large_soft_card:
- continue
- overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes)
- if overlaps_empty_text:
- artifacts.append(shape)
- except Exception:
- continue
- # Preserve order while de-duplicating.
- seen = set()
- unique = []
- for shape in artifacts:
- key = id(shape)
- if key not in seen:
- unique.append(shape)
- seen.add(key)
- return unique
- def _remove_shape(self, shape):
- el = shape.element
- el.getparent().remove(el)
- def _is_text_overflowing(self, shape) -> bool:
- if not shape.has_text_frame:
- return False
- text = shape.text_frame.text
- if not text.strip():
- return False
- if len(text) > 800:
- return True
- try:
- w = int(shape.width)
- h = int(shape.height)
- width_pt = max(1, w / 12700.0)
- max_font_pt = 10
- para_count = 0
- for para in shape.text_frame.paragraphs:
- if not para.text.strip():
- continue
- para_count += 1
- for run in para.runs:
- if run.font.size:
- max_font_pt = max(max_font_pt, run.font.size / 12700.0)
- chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15)))
- est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line)
- est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700)
- if est_height > h * 1.15:
- return True
- if h < Emu(200000) and len(text) > 80:
- return True
- except Exception:
- pass
- return False
- def _shape_name(shape):
- try:
- if shape.has_text_frame:
- return shape.text_frame.text[:20].replace('\n', ' ')
- except Exception:
- pass
- try:
- return shape.shape_type
- except Exception:
- pass
- return '无名形状'
- def _is_in_content_area(shape):
- try:
- return int(shape.top) >= int(CONTENT_TOP_BASE)
- except Exception:
- return False
- if __name__ == '__main__':
- print("QualityInspector module loaded")
- inspector = QualityInspector()
- print("Ready to inspect PPT files")
|