""" PPT quality inspector and auto-fix engine. Inspects generated PPT for layout, visual, content, and data issues, then auto-fixes them iteratively until quality threshold is met. """ import re from pptx import Presentation from pptx.util import Emu, Pt from pptx.dml.color import RGBColor from collections import Counter from quality_rules import ( QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS, FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX, TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH, SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT, get_quality_label, calculate_score, ) from page_layouts import calculate_fill_ratio, ensure_safe_position class QualityIssue: def __init__(self, severity, category, page_index, description, rule_id='', auto_fixable=True, fix_data=None): self.severity = severity self.category = category self.page_index = page_index self.description = description self.rule_id = rule_id self.auto_fixable = auto_fixable self.fix_data = fix_data or {} def __repr__(self): return f"[{self.severity}] Page {self.page_index+1}: {self.description}" class QualityInspector: def __init__(self, theme_colors: dict = None): self.theme_colors = theme_colors or {} self.fix_count = 0 self.fix_log = [] def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]: issues = [] issues += self._check_confirmation_alignment(prs, config) for page_idx, slide in enumerate(prs.slides): page_type = self._get_page_type(page_idx, config, len(prs.slides)) issues += self._check_layout(slide, page_idx) issues += self._check_visual(slide, page_idx) issues += self._check_content(slide, page_idx, config, prs, page_type) issues += self._check_data(slide, page_idx, prs) return issues def _get_page_type(self, page_idx: int, config, total_slides: int) -> str: if config and hasattr(config, 'pages') and page_idx < len(config.pages): return config.pages[page_idx].page_type if page_idx == 0: return 'cover' if page_idx == total_slides - 1: return 'end' if page_idx == 1: return 'toc' return 'content' def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]: issues = [] if not config: return issues selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)] if getattr(config, 'require_six_confirmations', False): confirmation = getattr(config, 'user_confirmation', None) if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete(): issues.append(QualityIssue( 'critical', 'data', -1, '六项确认未完成,PPT 不应进入输出阶段', 'D006', False, {'type': 'confirmation_incomplete'} )) if config and getattr(config, 'page_count_range', None) and selected_pages: low, high = config.page_count_range if len(selected_pages) < low or len(selected_pages) > high: issues.append(QualityIssue( 'major', 'data', -1, f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内', 'D006', False, {'type': 'page_count_range'} )) if config and getattr(config, 'metrics', None) and len(selected_pages) > 0: selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)] if not selected_metrics: issues.append(QualityIssue( 'critical', 'data', -1, '未找到已确认的核心指标,无法验证输出一致性', 'D006', False, {'type': 'missing_metrics'} )) return issues def auto_fix(self, prs: Presentation, issues: list[QualityIssue]): fixable = [i for i in issues if i.auto_fixable] self.fix_count = 0 self.fix_log = [] for issue in fixable: try: if issue.page_index < 0: continue slide = prs.slides[issue.page_index] self._apply_fix(slide, issue, prs) self.fix_count += 1 except Exception as e: self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}") return self.fix_count def _apply_fix(self, slide, issue, prs): category = issue.category if category == 'layout': self._fix_layout(slide, issue) elif category == 'visual': self._fix_visual(slide, issue) elif category == 'content': self._fix_content(slide, issue, prs) elif category == 'data': self._fix_data(slide, issue, prs) def generate_report(self, issues: list[QualityIssue], iteration: int = 1, total_pages: int = 0) -> str: lines = [] lines.append('═' * 50) lines.append(f' PPT 质量自检报告 (第 {iteration} 轮)') lines.append('═' * 50) if not issues: lines.append('[PASS] 全部通过!未发现任何质量问题。') return '\n'.join(lines) by_page = {} for iss in issues: p = iss.page_index if p not in by_page: by_page[p] = [] by_page[p].append(iss) for p_idx in sorted(by_page.keys()): page_issues = by_page[p_idx] sev_order = {'critical': 0, 'major': 1, 'minor': 2} page_issues.sort(key=lambda x: sev_order.get(x.severity, 3)) has_critical = any(i.severity == 'critical' for i in page_issues) has_major = any(i.severity == 'major' for i in page_issues) if has_critical: icon = '[CRIT]' elif has_major: icon = '[MAJ]' else: icon = '[OK]' lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题') for iss in page_issues: sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '') status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else '' lines.append(f' ├─ {sev_icon} {iss.description}{status}') lines.append('─' * 50) by_sev = Counter(i.severity for i in issues) by_cat = Counter(i.category for i in issues) fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed')) score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1)) label = get_quality_label(score) lines.append(f'总结: {len(issues)} 个问题 | ' f'{by_sev.get("critical", 0)} 严重 + ' f'{by_sev.get("major", 0)} 主要 + ' f'{by_sev.get("minor", 0)} 次要') lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个') lines.append(f'最终质量评分: {score}/100 [{label}]') lines.append('═' * 50) return '\n'.join(lines) def quality_assured_build(self, build_fn, data, config, output_path, max_iterations=None, _attempt=0) -> tuple: max_iterations = max_iterations or config.max_fix_iterations total_pages = 0 needs_rebuild = False rebuilt_once = False prs = None for iteration in range(1, max_iterations + 1): if iteration == 1 or needs_rebuild: if needs_rebuild: if rebuilt_once and iteration > 2: print(f'[INFO] 已尝试重建,不再继续重建以避免无限循环') needs_rebuild = False else: print(f'[REBUILD] 检测到需要重建的页面,触发重新生成...') rebuilt_once = True needs_rebuild = False prs = build_fn(data, config) total_pages = len(prs.slides) issues = self.inspect(prs, config) if not issues: print(f'[PASS] 第 {iteration} 次迭代:无问题,质量达标') break by_sev = Counter(i.severity for i in issues) print(f'[INSPECT] 第 {iteration} 次自检:{by_sev.get("critical",0)} 严重 + ' f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要') fixable = [i for i in issues if i.auto_fixable] self.auto_fix(prs, fixable) print(f'[FIX] 自动修复了 {self.fix_count} 个问题') for issue in fixable: if issue.fix_data.get('needs_rebuild'): needs_rebuild = True print(f'[WARN] 检测到内容严重不足,将在下一轮迭代中重建') break unfixable = [i for i in issues if not i.auto_fixable] if unfixable: print(f'[WARN] {len(unfixable)} 个问题需人工确认') remaining = self.inspect(prs, config) if not remaining: print(f'[PASS] 第 {iteration} 次修复后:所有问题已解决') break has_critical = any(i.severity == 'critical' for i in remaining) has_major = any(i.severity == 'major' for i in remaining) if not has_critical and not has_major: print(f'[PASS] 第 {iteration} 次修复后:仅剩次要问题,质量达标') break if needs_rebuild and iteration < max_iterations: continue final_issues = self.inspect(prs, config) by_sev = Counter(i.severity for i in final_issues) by_cat = Counter(i.category for i in final_issues) score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1)) label = get_quality_label(score) report = self.generate_report(final_issues, iteration, total_pages) print(report) if score >= config.quality_threshold: prs.save(output_path) print(f'[PASS] 高质量 PPT 已输出: {output_path}') else: has_critical_final = any(i.severity == 'critical' for i in final_issues) has_layout_critical = any( i.severity == 'critical' and i.category == 'layout' for i in final_issues ) if has_layout_critical: raise RuntimeError( f'PPT 存在严重布局问题(评分 {score}),无法自动修复。' f'请检查页面配置和数据。' ) prs.save(output_path) if has_critical_final: print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),' f'存在 {by_sev.get("critical", 0)} 个严重内容问题,建议补充分析数据后重新生成') else: print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),已输出但建议复核') return prs, final_issues def _check_layout(self, slide, page_idx) -> list[QualityIssue]: issues = [] sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT for shape in slide.shapes: l, t = int(shape.left), int(shape.top) w, h = int(shape.width), int(shape.height) if l < -100: issues.append(QualityIssue('critical', 'layout', page_idx, f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})', 'L001', True, {'shape': shape, 'type': 'left'})) if l + w > sw + 500: issues.append(QualityIssue('critical', 'layout', page_idx, f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})', 'L002', True, {'shape': shape, 'type': 'right'})) if t < -100: issues.append(QualityIssue('critical', 'layout', page_idx, f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})', 'L003', True, {'shape': shape, 'type': 'top'})) if t + h > sh + 500: issues.append(QualityIssue('critical', 'layout', page_idx, f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})', 'L004', True, {'shape': shape, 'type': 'bottom'})) if l < SAFE_MARGIN and l >= 0: if l == 0 and w >= sw * 0.8: continue if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000): continue if int(shape.top) > sh - Emu(500000): continue issues.append(QualityIssue('minor', 'layout', page_idx, f'形状"{_shape_name(shape)}"过于靠近左边缘', 'L007', True, {'shape': shape, 'type': 'edge_left'})) placeholder_pattern = re.compile(r'\{[^}]+\}') for shape in slide.shapes: if shape.has_text_frame: text = shape.text_frame.text if placeholder_pattern.search(text): issues.append(QualityIssue('critical', 'layout', page_idx, f'发现未替换占位符: "{text[:50]}"', 'L006', True, {'shape': shape, 'type': 'placeholder'})) empty_artifacts = self._find_empty_template_artifacts(slide) for shape in empty_artifacts: issues.append(QualityIssue( 'major', 'layout', page_idx, f'发现空模板组件残留: "{_shape_name(shape)}"', 'L008', True, {'shape': shape, 'type': 'empty_template_artifact'} )) shapes_list = list(slide.shapes) for i, a in enumerate(shapes_list): for b in shapes_list[i+1:]: if self._shapes_overlap(a, b): a_name = _shape_name(a) b_name = _shape_name(b) if self._is_intentional_overlap(a, b): continue issues.append(QualityIssue('major', 'layout', page_idx, f'形状"{a_name}"与"{b_name}"存在重叠', 'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'})) return issues def _check_visual(self, slide, page_idx) -> list[QualityIssue]: issues = [] fonts_seen = {} for shape in slide.shapes: if not shape.has_text_frame: continue for para in shape.text_frame.paragraphs: for run in para.runs: if run.font.size: size_pt = run.font.size / 12700.0 if size_pt < 6: issues.append(QualityIssue('major', 'visual', page_idx, f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"', 'V002', True, {'run': run, 'type': 'font_small'})) elif size_pt > 65: issues.append(QualityIssue('major', 'visual', page_idx, f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"', 'V003', True, {'run': run, 'type': 'font_large'})) if run.font.name: fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1 if len(fonts_seen) > 3: issues.append(QualityIssue('minor', 'visual', page_idx, f'字体使用超过3种: {list(fonts_seen.keys())}', 'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen})) return issues def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]: issues = [] if page_type in ('cover', 'end'): return issues issues += self._check_dynamic_page_fit(page_idx, page_type, config) issues += self._check_core_metric_presence(slide, page_idx, page_type, config) if page_type == 'toc': content_shapes = [s for s in slide.shapes if s.has_text_frame and _is_in_content_area(s)] all_content_text = '' for shape in content_shapes: text = shape.text_frame.text.strip() if text: all_content_text += text + '\n' if len(all_content_text.strip()) < 30: issues.append(QualityIssue('minor', 'content', page_idx, '目录页内容过少', 'C008', False, {'type': 'empty_page'})) return issues fill_ratio = calculate_fill_ratio(slide) if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary'): if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']: issues.append(QualityIssue('critical', 'content', page_idx, f'页面内容严重不足,填充率仅 {fill_ratio:.1%},必须补充图表和分析文本', 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio})) elif fill_ratio < FILL_RATIO_THRESHOLDS['low']: issues.append(QualityIssue('major', 'content', page_idx, f'页面留白偏多,填充率 {fill_ratio:.1%},需补充分析内容', 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio})) elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2: issues.append(QualityIssue('minor', 'content', page_idx, f'页面填充率过低 {fill_ratio:.1%}', 'C001', False)) content_shapes = [s for s in slide.shapes if s.has_text_frame and _is_in_content_area(s)] all_content_text = '' insight_blocks = 0 for shape in content_shapes: tf = shape.text_frame full_text = tf.text.strip() if not full_text: continue all_content_text += full_text + '\n' for para in tf.paragraphs: para_text = para.text.strip() if para_text and len(para_text) >= TEXT_MIN_LENGTH: insight_blocks += 1 total_content_chars = len(all_content_text.strip()) text_lengths = [len(p.text.strip()) for s in content_shapes for p in s.text_frame.paragraphs if p.text.strip()] if total_content_chars < PAGE_MIN_TEXT_LENGTH: issues.append(QualityIssue('critical', 'content', page_idx, f'页面内容为空!所有文本框总字数仅 {total_content_chars} 字(要求≥{PAGE_MIN_TEXT_LENGTH}字)', 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars})) elif total_content_chars < 200: issues.append(QualityIssue('major', 'content', page_idx, f'页面内容过少,总字数仅 {total_content_chars} 字,分析深度严重不足', 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars})) if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH: issues.append(QualityIssue('critical', 'content', page_idx, f'分析文本过短(最长为 {max(text_lengths)} 字),需撰写≥{TEXT_MIN_LENGTH}字的深度分析', 'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)})) if insight_blocks < INSIGHT_MIN_COUNT: issues.append(QualityIssue('critical', 'content', page_idx, f'分析段数不足,仅 {insight_blocks} 段(要求≥{INSIGHT_MIN_COUNT}段)', 'C007', True, {'type': 'insight_count', 'count': insight_blocks})) has_title = False for shape in slide.shapes: if shape.has_text_frame: text = shape.text_frame.text try: sy = int(shape.top) except Exception: sy = 99999999 if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000): if len(text.strip()) > 0 and not text.startswith('{'): has_title = True break if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结', '排行', '报告', '建议', '告警', '要点']): if sy < CONTENT_TOP_BASE + Emu(400000): has_title = True break if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1: issues.append(QualityIssue('critical', 'content', page_idx, '页面缺少标题', 'C006', True, {'type': 'missing_title'})) for shape in slide.shapes: if shape.has_text_frame: if self._is_text_overflowing(shape): issues.append(QualityIssue('major', 'content', page_idx, f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"', 'C004', True, {'shape': shape, 'type': 'text_overflow'})) has_chart = False for shape in slide.shapes: if shape.has_chart: has_chart = True break if has_chart and insight_blocks == 0 and page_idx >= 2: issues.append(QualityIssue('critical', 'content', page_idx, '页面有图表但完全缺少分析文本,图表数据需要被解读和说明', 'C009', True, {'type': 'chart_no_text'})) return issues def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]: issues = [] profile = getattr(config, 'data_profiling', None) or {} if not profile: return issues time_cols = profile.get('time_columns', []) cat_cols = profile.get('category_columns', []) num_cols = profile.get('numeric_columns', []) if page_type == 'trend' and (not time_cols or not num_cols): issues.append(QualityIssue( 'critical', 'content', page_idx, '趋势页缺少可用时间列或数值列,需要重建或降级为摘要页', 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type} )) elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols): issues.append(QualityIssue( 'critical', 'content', page_idx, f'{page_type} 页缺少分类维度或数值列,需要重建或降级为摘要页', 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type} )) elif page_type == 'kpi_overview': selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)] if len(selected_metrics) > 6: issues.append(QualityIssue( 'major', 'content', page_idx, f'核心指标数量 {len(selected_metrics)} 超过 6 个,KPI页应拆页或改为紧凑布局', 'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)} )) return issues def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]: issues = [] if page_type != 'kpi_overview' or not config: return issues selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)] if not selected_metrics: return issues slide_text = '\n'.join( shape.text_frame.text for shape in slide.shapes if shape.has_text_frame and shape.text_frame.text ) missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text] if missing: issues.append(QualityIssue( 'critical', 'data', page_idx, 'KPI概览页缺少已确认核心指标:' + '、'.join(missing), 'D006', True, {'type': 'core_metric_missing', 'missing': missing} )) return issues def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]: issues = [] if page_idx == 0: return issues for shape in slide.shapes: if shape.has_text_frame: text = shape.text_frame.text page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text) if page_pattern: current = int(page_pattern.group(1)) total = int(page_pattern.group(2)) if total == 0: issues.append(QualityIssue('major', 'data', page_idx, f'页码格式异常: {text.strip()}', 'D002', True, {'type': 'page_num'})) return issues def _fix_layout(self, slide, issue): fd = issue.fix_data if fd.get('type') in ('left', 'right', 'top', 'bottom'): shape = fd.get('shape') if shape: ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT) fd['fixed'] = True elif fd.get('type') == 'overlap': a, b = fd.get('shape_a'), fd.get('shape_b') if a and b: try: if int(b.left) < int(a.left) + int(a.width) + Emu(50000): b.left = int(a.left) + int(a.width) + Emu(152400) ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT) except Exception: pass fd['fixed'] = True elif fd.get('type') == 'placeholder': shape = fd.get('shape') if shape and shape.has_text_frame: for para in shape.text_frame.paragraphs: para.text = re.sub(r'\{[^}]+\}', '', para.text) fd['fixed'] = True elif fd.get('type') == 'edge_left': shape = fd.get('shape') if shape: try: w = int(shape.width) if w < SLIDE_WIDTH * 0.5: shape.left = SAFE_MARGIN except Exception: pass fd['fixed'] = True elif fd.get('type') == 'empty_template_artifact': shape = fd.get('shape') if shape: self._remove_shape(shape) fd['fixed'] = True def _fix_visual(self, slide, issue): fd = issue.fix_data if fd.get('type') == 'font_small': run = fd.get('run') if run: run.font.size = FONT_SIZE_MIN fd['fixed'] = True elif fd.get('type') == 'font_large': run = fd.get('run') if run: run.font.size = FONT_SIZE_MAX fd['fixed'] = True elif fd.get('type') == 'font_mixed': for shape in slide.shapes: if shape.has_text_frame: for para in shape.text_frame.paragraphs: for run in para.runs: run.font.name = DEFAULT_FONT fd['fixed'] = True def _fix_content(self, slide, issue, prs): fd = issue.fix_data if fd.get('type') == 'sparse': fill_ratio = fd.get('fill_ratio', 0) if fill_ratio < FILL_RATIO_THRESHOLDS['low']: try: box = slide.shapes.add_textbox( CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)), Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000)) tf = box.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = ( '[WARNING] 此页面内容不足,需补充深度分析内容。' '分析应包含:具体数据引用(含数值和单位)、' '与同类/历史/目标的对比分析、' '数据背后原因的至少2条解读、' '以及可执行的业务行动建议。' '请勿使用"要加强"、"进一步优化"等模糊措辞。' ) p.font.size = Pt(12) p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00) p.font.name = DEFAULT_FONT p.font.bold = True fd['fixed'] = True fd['needs_rebuild'] = True except Exception: pass elif fd.get('type') == 'empty_page': fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') == 'chart_no_text': fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') == 'insight_count': fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') == 'short_text': fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'): fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') == 'core_metric_missing': fd['needs_rebuild'] = True fd['fixed'] = True elif fd.get('type') == 'missing_title': try: box = slide.shapes.add_textbox( CONTENT_LEFT, Emu(914400), Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000)) p = box.text_frame.paragraphs[0] p.text = '数据详情' p.font.size = Pt(24) p.font.bold = True p.font.color.rgb = RGBColor(0x33, 0x33, 0x33) p.font.name = DEFAULT_FONT fd['fixed'] = True except Exception: pass elif fd.get('type') == 'text_overflow': shape = fd.get('shape') if shape and shape.has_text_frame: text_len = len(shape.text_frame.text or '') try: if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000): fd['needs_rebuild'] = True else: for para in shape.text_frame.paragraphs: for run in para.runs: if run.font.size and run.font.size > Pt(9): run.font.size = Pt(9) except Exception: fd['needs_rebuild'] = True fd['fixed'] = True def _fix_data(self, slide, issue, prs): fd = issue.fix_data if fd.get('type') == 'page_num': fd['fixed'] = True def _shapes_overlap(self, a, b) -> bool: ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height) bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height) if ax + aw <= bx or bx + bw <= ax: return False if ay + ah <= by or by + bh <= ay: return False return True def _is_intentional_overlap(self, a, b) -> bool: if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'): return True a_area = int(a.width) * int(a.height) b_area = int(b.width) * int(b.height) if a_area > b_area * 3 or b_area > a_area * 3: return True return False def _is_title_shape(self, shape) -> bool: if not shape.has_text_frame: return False try: y = int(shape.top) return y < int(CONTENT_TOP_BASE) + Emu(200000) except Exception: return False def _find_empty_template_artifacts(self, slide) -> list: artifacts = [] shapes = list(slide.shapes) empty_text_boxes = [] for shape in shapes: if shape.has_text_frame: text = (shape.text_frame.text or '').strip() if text: continue if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000): continue if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000): continue empty_text_boxes.append(shape) artifacts.append(shape) for shape in shapes: if shape.has_text_frame: continue try: is_large_soft_card = ( int(shape.width) >= Emu(1000000) and int(shape.height) >= Emu(500000) and int(shape.top) < int(FOOTER_TOP) - Emu(400000) ) if not is_large_soft_card: continue overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes) if overlaps_empty_text: artifacts.append(shape) except Exception: continue # Preserve order while de-duplicating. seen = set() unique = [] for shape in artifacts: key = id(shape) if key not in seen: unique.append(shape) seen.add(key) return unique def _remove_shape(self, shape): el = shape.element el.getparent().remove(el) def _is_text_overflowing(self, shape) -> bool: if not shape.has_text_frame: return False text = shape.text_frame.text if not text.strip(): return False if len(text) > 800: return True try: w = int(shape.width) h = int(shape.height) width_pt = max(1, w / 12700.0) max_font_pt = 10 para_count = 0 for para in shape.text_frame.paragraphs: if not para.text.strip(): continue para_count += 1 for run in para.runs: if run.font.size: max_font_pt = max(max_font_pt, run.font.size / 12700.0) chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15))) est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line) est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700) if est_height > h * 1.15: return True if h < Emu(200000) and len(text) > 80: return True except Exception: pass return False def _shape_name(shape): try: if shape.has_text_frame: return shape.text_frame.text[:20].replace('\n', ' ') except Exception: pass try: return shape.shape_type except Exception: pass return '无名形状' def _is_in_content_area(shape): try: return int(shape.top) >= int(CONTENT_TOP_BASE) except Exception: return False if __name__ == '__main__': print("QualityInspector module loaded") inspector = QualityInspector() print("Ready to inspect PPT files")