quality_inspector.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. """
  2. PPT quality inspector and auto-fix engine.
  3. Inspects generated PPT for layout, visual, content, and data issues,
  4. then auto-fixes them iteratively until quality threshold is met.
  5. """
  6. import re
  7. from pptx import Presentation
  8. from pptx.util import Emu, Pt
  9. from pptx.dml.color import RGBColor
  10. from collections import Counter
  11. from quality_rules import (
  12. QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS,
  13. FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX,
  14. TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH,
  15. SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE,
  16. FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT,
  17. get_quality_label, calculate_score,
  18. )
  19. from page_layouts import calculate_fill_ratio, ensure_safe_position
  20. class QualityIssue:
  21. def __init__(self, severity, category, page_index, description,
  22. rule_id='', auto_fixable=True, fix_data=None):
  23. self.severity = severity
  24. self.category = category
  25. self.page_index = page_index
  26. self.description = description
  27. self.rule_id = rule_id
  28. self.auto_fixable = auto_fixable
  29. self.fix_data = fix_data or {}
  30. def __repr__(self):
  31. return f"[{self.severity}] Page {self.page_index+1}: {self.description}"
  32. class QualityInspector:
  33. def __init__(self, theme_colors: dict = None):
  34. self.theme_colors = theme_colors or {}
  35. self.fix_count = 0
  36. self.fix_log = []
  37. def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]:
  38. issues = []
  39. issues += self._check_confirmation_alignment(prs, config)
  40. for page_idx, slide in enumerate(prs.slides):
  41. page_type = self._get_page_type(page_idx, config, len(prs.slides))
  42. issues += self._check_layout(slide, page_idx)
  43. issues += self._check_visual(slide, page_idx)
  44. issues += self._check_content(slide, page_idx, config, prs, page_type)
  45. issues += self._check_data(slide, page_idx, prs)
  46. return issues
  47. def _get_page_type(self, page_idx: int, config, total_slides: int) -> str:
  48. if config and hasattr(config, 'pages') and page_idx < len(config.pages):
  49. return config.pages[page_idx].page_type
  50. if page_idx == 0:
  51. return 'cover'
  52. if page_idx == total_slides - 1:
  53. return 'end'
  54. if page_idx == 1:
  55. return 'toc'
  56. return 'content'
  57. def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]:
  58. issues = []
  59. if not config:
  60. return issues
  61. selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)]
  62. if getattr(config, 'require_six_confirmations', False):
  63. confirmation = getattr(config, 'user_confirmation', None)
  64. if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete():
  65. issues.append(QualityIssue(
  66. 'critical', 'data', -1,
  67. '六项确认未完成,PPT 不应进入输出阶段',
  68. 'D006', False,
  69. {'type': 'confirmation_incomplete'}
  70. ))
  71. if config and getattr(config, 'page_count_range', None) and selected_pages:
  72. low, high = config.page_count_range
  73. if len(selected_pages) < low or len(selected_pages) > high:
  74. issues.append(QualityIssue(
  75. 'major', 'data', -1,
  76. f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内',
  77. 'D006', False,
  78. {'type': 'page_count_range'}
  79. ))
  80. if config and getattr(config, 'metrics', None) and len(selected_pages) > 0:
  81. selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)]
  82. if not selected_metrics:
  83. issues.append(QualityIssue(
  84. 'critical', 'data', -1,
  85. '未找到已确认的核心指标,无法验证输出一致性',
  86. 'D006', False,
  87. {'type': 'missing_metrics'}
  88. ))
  89. return issues
  90. def auto_fix(self, prs: Presentation, issues: list[QualityIssue]):
  91. fixable = [i for i in issues if i.auto_fixable]
  92. self.fix_count = 0
  93. self.fix_log = []
  94. for issue in fixable:
  95. try:
  96. if issue.page_index < 0:
  97. continue
  98. slide = prs.slides[issue.page_index]
  99. self._apply_fix(slide, issue, prs)
  100. self.fix_count += 1
  101. except Exception as e:
  102. self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}")
  103. return self.fix_count
  104. def _apply_fix(self, slide, issue, prs):
  105. category = issue.category
  106. if category == 'layout':
  107. self._fix_layout(slide, issue)
  108. elif category == 'visual':
  109. self._fix_visual(slide, issue)
  110. elif category == 'content':
  111. self._fix_content(slide, issue, prs)
  112. elif category == 'data':
  113. self._fix_data(slide, issue, prs)
  114. def generate_report(self, issues: list[QualityIssue], iteration: int = 1,
  115. total_pages: int = 0) -> str:
  116. lines = []
  117. lines.append('═' * 50)
  118. lines.append(f' PPT 质量自检报告 (第 {iteration} 轮)')
  119. lines.append('═' * 50)
  120. if not issues:
  121. lines.append('[PASS] 全部通过!未发现任何质量问题。')
  122. return '\n'.join(lines)
  123. by_page = {}
  124. for iss in issues:
  125. p = iss.page_index
  126. if p not in by_page:
  127. by_page[p] = []
  128. by_page[p].append(iss)
  129. for p_idx in sorted(by_page.keys()):
  130. page_issues = by_page[p_idx]
  131. sev_order = {'critical': 0, 'major': 1, 'minor': 2}
  132. page_issues.sort(key=lambda x: sev_order.get(x.severity, 3))
  133. has_critical = any(i.severity == 'critical' for i in page_issues)
  134. has_major = any(i.severity == 'major' for i in page_issues)
  135. if has_critical:
  136. icon = '[CRIT]'
  137. elif has_major:
  138. icon = '[MAJ]'
  139. else:
  140. icon = '[OK]'
  141. lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题')
  142. for iss in page_issues:
  143. sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '')
  144. status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else ''
  145. lines.append(f' ├─ {sev_icon} {iss.description}{status}')
  146. lines.append('─' * 50)
  147. by_sev = Counter(i.severity for i in issues)
  148. by_cat = Counter(i.category for i in issues)
  149. fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed'))
  150. score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
  151. label = get_quality_label(score)
  152. lines.append(f'总结: {len(issues)} 个问题 | '
  153. f'{by_sev.get("critical", 0)} 严重 + '
  154. f'{by_sev.get("major", 0)} 主要 + '
  155. f'{by_sev.get("minor", 0)} 次要')
  156. lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个')
  157. lines.append(f'最终质量评分: {score}/100 [{label}]')
  158. lines.append('═' * 50)
  159. return '\n'.join(lines)
  160. def quality_assured_build(self, build_fn, data, config, output_path,
  161. max_iterations=None, _attempt=0) -> tuple:
  162. max_iterations = max_iterations or config.max_fix_iterations
  163. total_pages = 0
  164. needs_rebuild = False
  165. rebuilt_once = False
  166. prs = None
  167. for iteration in range(1, max_iterations + 1):
  168. if iteration == 1 or needs_rebuild:
  169. if needs_rebuild:
  170. if rebuilt_once and iteration > 2:
  171. print(f'[INFO] 已尝试重建,不再继续重建以避免无限循环')
  172. needs_rebuild = False
  173. else:
  174. print(f'[REBUILD] 检测到需要重建的页面,触发重新生成...')
  175. rebuilt_once = True
  176. needs_rebuild = False
  177. prs = build_fn(data, config)
  178. total_pages = len(prs.slides)
  179. issues = self.inspect(prs, config)
  180. if not issues:
  181. print(f'[PASS] 第 {iteration} 次迭代:无问题,质量达标')
  182. break
  183. by_sev = Counter(i.severity for i in issues)
  184. print(f'[INSPECT] 第 {iteration} 次自检:{by_sev.get("critical",0)} 严重 + '
  185. f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要')
  186. fixable = [i for i in issues if i.auto_fixable]
  187. self.auto_fix(prs, fixable)
  188. print(f'[FIX] 自动修复了 {self.fix_count} 个问题')
  189. for issue in fixable:
  190. if issue.fix_data.get('needs_rebuild'):
  191. needs_rebuild = True
  192. print(f'[WARN] 检测到内容严重不足,将在下一轮迭代中重建')
  193. break
  194. unfixable = [i for i in issues if not i.auto_fixable]
  195. if unfixable:
  196. print(f'[WARN] {len(unfixable)} 个问题需人工确认')
  197. remaining = self.inspect(prs, config)
  198. if not remaining:
  199. print(f'[PASS] 第 {iteration} 次修复后:所有问题已解决')
  200. break
  201. has_critical = any(i.severity == 'critical' for i in remaining)
  202. has_major = any(i.severity == 'major' for i in remaining)
  203. if not has_critical and not has_major:
  204. print(f'[PASS] 第 {iteration} 次修复后:仅剩次要问题,质量达标')
  205. break
  206. if needs_rebuild and iteration < max_iterations:
  207. continue
  208. final_issues = self.inspect(prs, config)
  209. by_sev = Counter(i.severity for i in final_issues)
  210. by_cat = Counter(i.category for i in final_issues)
  211. score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
  212. label = get_quality_label(score)
  213. report = self.generate_report(final_issues, iteration, total_pages)
  214. print(report)
  215. if score >= config.quality_threshold:
  216. prs.save(output_path)
  217. print(f'[PASS] 高质量 PPT 已输出: {output_path}')
  218. else:
  219. has_critical_final = any(i.severity == 'critical' for i in final_issues)
  220. has_layout_critical = any(
  221. i.severity == 'critical' and i.category == 'layout'
  222. for i in final_issues
  223. )
  224. if has_layout_critical:
  225. raise RuntimeError(
  226. f'PPT 存在严重布局问题(评分 {score}),无法自动修复。'
  227. f'请检查页面配置和数据。'
  228. )
  229. prs.save(output_path)
  230. if has_critical_final:
  231. print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),'
  232. f'存在 {by_sev.get("critical", 0)} 个严重内容问题,建议补充分析数据后重新生成')
  233. else:
  234. print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),已输出但建议复核')
  235. return prs, final_issues
  236. def _check_layout(self, slide, page_idx) -> list[QualityIssue]:
  237. issues = []
  238. sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH
  239. sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT
  240. for shape in slide.shapes:
  241. l, t = int(shape.left), int(shape.top)
  242. w, h = int(shape.width), int(shape.height)
  243. if l < -100:
  244. issues.append(QualityIssue('critical', 'layout', page_idx,
  245. f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})',
  246. 'L001', True, {'shape': shape, 'type': 'left'}))
  247. if l + w > sw + 500:
  248. issues.append(QualityIssue('critical', 'layout', page_idx,
  249. f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})',
  250. 'L002', True, {'shape': shape, 'type': 'right'}))
  251. if t < -100:
  252. issues.append(QualityIssue('critical', 'layout', page_idx,
  253. f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})',
  254. 'L003', True, {'shape': shape, 'type': 'top'}))
  255. if t + h > sh + 500:
  256. issues.append(QualityIssue('critical', 'layout', page_idx,
  257. f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})',
  258. 'L004', True, {'shape': shape, 'type': 'bottom'}))
  259. if l < SAFE_MARGIN and l >= 0:
  260. if l == 0 and w >= sw * 0.8:
  261. continue
  262. if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000):
  263. continue
  264. if int(shape.top) > sh - Emu(500000):
  265. continue
  266. issues.append(QualityIssue('minor', 'layout', page_idx,
  267. f'形状"{_shape_name(shape)}"过于靠近左边缘',
  268. 'L007', True, {'shape': shape, 'type': 'edge_left'}))
  269. placeholder_pattern = re.compile(r'\{[^}]+\}')
  270. for shape in slide.shapes:
  271. if shape.has_text_frame:
  272. text = shape.text_frame.text
  273. if placeholder_pattern.search(text):
  274. issues.append(QualityIssue('critical', 'layout', page_idx,
  275. f'发现未替换占位符: "{text[:50]}"',
  276. 'L006', True, {'shape': shape, 'type': 'placeholder'}))
  277. empty_artifacts = self._find_empty_template_artifacts(slide)
  278. for shape in empty_artifacts:
  279. issues.append(QualityIssue(
  280. 'major', 'layout', page_idx,
  281. f'发现空模板组件残留: "{_shape_name(shape)}"',
  282. 'L008', True, {'shape': shape, 'type': 'empty_template_artifact'}
  283. ))
  284. shapes_list = list(slide.shapes)
  285. for i, a in enumerate(shapes_list):
  286. for b in shapes_list[i+1:]:
  287. if self._shapes_overlap(a, b):
  288. a_name = _shape_name(a)
  289. b_name = _shape_name(b)
  290. if self._is_intentional_overlap(a, b):
  291. continue
  292. issues.append(QualityIssue('major', 'layout', page_idx,
  293. f'形状"{a_name}"与"{b_name}"存在重叠',
  294. 'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'}))
  295. return issues
  296. def _check_visual(self, slide, page_idx) -> list[QualityIssue]:
  297. issues = []
  298. fonts_seen = {}
  299. for shape in slide.shapes:
  300. if not shape.has_text_frame:
  301. continue
  302. for para in shape.text_frame.paragraphs:
  303. for run in para.runs:
  304. if run.font.size:
  305. size_pt = run.font.size / 12700.0
  306. if size_pt < 6:
  307. issues.append(QualityIssue('major', 'visual', page_idx,
  308. f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"',
  309. 'V002', True, {'run': run, 'type': 'font_small'}))
  310. elif size_pt > 65:
  311. issues.append(QualityIssue('major', 'visual', page_idx,
  312. f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"',
  313. 'V003', True, {'run': run, 'type': 'font_large'}))
  314. if run.font.name:
  315. fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1
  316. if len(fonts_seen) > 3:
  317. issues.append(QualityIssue('minor', 'visual', page_idx,
  318. f'字体使用超过3种: {list(fonts_seen.keys())}',
  319. 'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen}))
  320. return issues
  321. def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]:
  322. issues = []
  323. if page_type in ('cover', 'end'):
  324. return issues
  325. issues += self._check_dynamic_page_fit(page_idx, page_type, config)
  326. issues += self._check_core_metric_presence(slide, page_idx, page_type, config)
  327. if page_type == 'toc':
  328. content_shapes = [s for s in slide.shapes
  329. if s.has_text_frame and _is_in_content_area(s)]
  330. all_content_text = ''
  331. for shape in content_shapes:
  332. text = shape.text_frame.text.strip()
  333. if text:
  334. all_content_text += text + '\n'
  335. if len(all_content_text.strip()) < 30:
  336. issues.append(QualityIssue('minor', 'content', page_idx,
  337. '目录页内容过少',
  338. 'C008', False, {'type': 'empty_page'}))
  339. return issues
  340. fill_ratio = calculate_fill_ratio(slide)
  341. if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary'):
  342. if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']:
  343. issues.append(QualityIssue('critical', 'content', page_idx,
  344. f'页面内容严重不足,填充率仅 {fill_ratio:.1%},必须补充图表和分析文本',
  345. 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
  346. elif fill_ratio < FILL_RATIO_THRESHOLDS['low']:
  347. issues.append(QualityIssue('major', 'content', page_idx,
  348. f'页面留白偏多,填充率 {fill_ratio:.1%},需补充分析内容',
  349. 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
  350. elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2:
  351. issues.append(QualityIssue('minor', 'content', page_idx,
  352. f'页面填充率过低 {fill_ratio:.1%}',
  353. 'C001', False))
  354. content_shapes = [s for s in slide.shapes
  355. if s.has_text_frame and _is_in_content_area(s)]
  356. all_content_text = ''
  357. insight_blocks = 0
  358. for shape in content_shapes:
  359. tf = shape.text_frame
  360. full_text = tf.text.strip()
  361. if not full_text:
  362. continue
  363. all_content_text += full_text + '\n'
  364. for para in tf.paragraphs:
  365. para_text = para.text.strip()
  366. if para_text and len(para_text) >= TEXT_MIN_LENGTH:
  367. insight_blocks += 1
  368. total_content_chars = len(all_content_text.strip())
  369. text_lengths = [len(p.text.strip()) for s in content_shapes
  370. for p in s.text_frame.paragraphs if p.text.strip()]
  371. if total_content_chars < PAGE_MIN_TEXT_LENGTH:
  372. issues.append(QualityIssue('critical', 'content', page_idx,
  373. f'页面内容为空!所有文本框总字数仅 {total_content_chars} 字(要求≥{PAGE_MIN_TEXT_LENGTH}字)',
  374. 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
  375. elif total_content_chars < 200:
  376. issues.append(QualityIssue('major', 'content', page_idx,
  377. f'页面内容过少,总字数仅 {total_content_chars} 字,分析深度严重不足',
  378. 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
  379. if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH:
  380. issues.append(QualityIssue('critical', 'content', page_idx,
  381. f'分析文本过短(最长为 {max(text_lengths)} 字),需撰写≥{TEXT_MIN_LENGTH}字的深度分析',
  382. 'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)}))
  383. if insight_blocks < INSIGHT_MIN_COUNT:
  384. issues.append(QualityIssue('critical', 'content', page_idx,
  385. f'分析段数不足,仅 {insight_blocks} 段(要求≥{INSIGHT_MIN_COUNT}段)',
  386. 'C007', True, {'type': 'insight_count', 'count': insight_blocks}))
  387. has_title = False
  388. for shape in slide.shapes:
  389. if shape.has_text_frame:
  390. text = shape.text_frame.text
  391. try:
  392. sy = int(shape.top)
  393. except Exception:
  394. sy = 99999999
  395. if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000):
  396. if len(text.strip()) > 0 and not text.startswith('{'):
  397. has_title = True
  398. break
  399. if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结',
  400. '排行', '报告', '建议', '告警', '要点']):
  401. if sy < CONTENT_TOP_BASE + Emu(400000):
  402. has_title = True
  403. break
  404. if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1:
  405. issues.append(QualityIssue('critical', 'content', page_idx,
  406. '页面缺少标题', 'C006', True, {'type': 'missing_title'}))
  407. for shape in slide.shapes:
  408. if shape.has_text_frame:
  409. if self._is_text_overflowing(shape):
  410. issues.append(QualityIssue('major', 'content', page_idx,
  411. f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"',
  412. 'C004', True, {'shape': shape, 'type': 'text_overflow'}))
  413. has_chart = False
  414. for shape in slide.shapes:
  415. if shape.has_chart:
  416. has_chart = True
  417. break
  418. if has_chart and insight_blocks == 0 and page_idx >= 2:
  419. issues.append(QualityIssue('critical', 'content', page_idx,
  420. '页面有图表但完全缺少分析文本,图表数据需要被解读和说明',
  421. 'C009', True, {'type': 'chart_no_text'}))
  422. return issues
  423. def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]:
  424. issues = []
  425. profile = getattr(config, 'data_profiling', None) or {}
  426. if not profile:
  427. return issues
  428. time_cols = profile.get('time_columns', [])
  429. cat_cols = profile.get('category_columns', [])
  430. num_cols = profile.get('numeric_columns', [])
  431. if page_type == 'trend' and (not time_cols or not num_cols):
  432. issues.append(QualityIssue(
  433. 'critical', 'content', page_idx,
  434. '趋势页缺少可用时间列或数值列,需要重建或降级为摘要页',
  435. 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
  436. ))
  437. elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols):
  438. issues.append(QualityIssue(
  439. 'critical', 'content', page_idx,
  440. f'{page_type} 页缺少分类维度或数值列,需要重建或降级为摘要页',
  441. 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
  442. ))
  443. elif page_type == 'kpi_overview':
  444. selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
  445. if len(selected_metrics) > 6:
  446. issues.append(QualityIssue(
  447. 'major', 'content', page_idx,
  448. f'核心指标数量 {len(selected_metrics)} 超过 6 个,KPI页应拆页或改为紧凑布局',
  449. 'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)}
  450. ))
  451. return issues
  452. def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]:
  453. issues = []
  454. if page_type != 'kpi_overview' or not config:
  455. return issues
  456. selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
  457. if not selected_metrics:
  458. return issues
  459. slide_text = '\n'.join(
  460. shape.text_frame.text for shape in slide.shapes
  461. if shape.has_text_frame and shape.text_frame.text
  462. )
  463. missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text]
  464. if missing:
  465. issues.append(QualityIssue(
  466. 'critical', 'data', page_idx,
  467. 'KPI概览页缺少已确认核心指标:' + '、'.join(missing),
  468. 'D006', True, {'type': 'core_metric_missing', 'missing': missing}
  469. ))
  470. return issues
  471. def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]:
  472. issues = []
  473. if page_idx == 0:
  474. return issues
  475. for shape in slide.shapes:
  476. if shape.has_text_frame:
  477. text = shape.text_frame.text
  478. page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text)
  479. if page_pattern:
  480. current = int(page_pattern.group(1))
  481. total = int(page_pattern.group(2))
  482. if total == 0:
  483. issues.append(QualityIssue('major', 'data', page_idx,
  484. f'页码格式异常: {text.strip()}',
  485. 'D002', True, {'type': 'page_num'}))
  486. return issues
  487. def _fix_layout(self, slide, issue):
  488. fd = issue.fix_data
  489. if fd.get('type') in ('left', 'right', 'top', 'bottom'):
  490. shape = fd.get('shape')
  491. if shape:
  492. ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT)
  493. fd['fixed'] = True
  494. elif fd.get('type') == 'overlap':
  495. a, b = fd.get('shape_a'), fd.get('shape_b')
  496. if a and b:
  497. try:
  498. if int(b.left) < int(a.left) + int(a.width) + Emu(50000):
  499. b.left = int(a.left) + int(a.width) + Emu(152400)
  500. ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT)
  501. except Exception:
  502. pass
  503. fd['fixed'] = True
  504. elif fd.get('type') == 'placeholder':
  505. shape = fd.get('shape')
  506. if shape and shape.has_text_frame:
  507. for para in shape.text_frame.paragraphs:
  508. para.text = re.sub(r'\{[^}]+\}', '', para.text)
  509. fd['fixed'] = True
  510. elif fd.get('type') == 'edge_left':
  511. shape = fd.get('shape')
  512. if shape:
  513. try:
  514. w = int(shape.width)
  515. if w < SLIDE_WIDTH * 0.5:
  516. shape.left = SAFE_MARGIN
  517. except Exception:
  518. pass
  519. fd['fixed'] = True
  520. elif fd.get('type') == 'empty_template_artifact':
  521. shape = fd.get('shape')
  522. if shape:
  523. self._remove_shape(shape)
  524. fd['fixed'] = True
  525. def _fix_visual(self, slide, issue):
  526. fd = issue.fix_data
  527. if fd.get('type') == 'font_small':
  528. run = fd.get('run')
  529. if run:
  530. run.font.size = FONT_SIZE_MIN
  531. fd['fixed'] = True
  532. elif fd.get('type') == 'font_large':
  533. run = fd.get('run')
  534. if run:
  535. run.font.size = FONT_SIZE_MAX
  536. fd['fixed'] = True
  537. elif fd.get('type') == 'font_mixed':
  538. for shape in slide.shapes:
  539. if shape.has_text_frame:
  540. for para in shape.text_frame.paragraphs:
  541. for run in para.runs:
  542. run.font.name = DEFAULT_FONT
  543. fd['fixed'] = True
  544. def _fix_content(self, slide, issue, prs):
  545. fd = issue.fix_data
  546. if fd.get('type') == 'sparse':
  547. fill_ratio = fd.get('fill_ratio', 0)
  548. if fill_ratio < FILL_RATIO_THRESHOLDS['low']:
  549. try:
  550. box = slide.shapes.add_textbox(
  551. CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)),
  552. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000))
  553. tf = box.text_frame
  554. tf.word_wrap = True
  555. p = tf.paragraphs[0]
  556. p.text = (
  557. '[WARNING] 此页面内容不足,需补充深度分析内容。'
  558. '分析应包含:具体数据引用(含数值和单位)、'
  559. '与同类/历史/目标的对比分析、'
  560. '数据背后原因的至少2条解读、'
  561. '以及可执行的业务行动建议。'
  562. '请勿使用"要加强"、"进一步优化"等模糊措辞。'
  563. )
  564. p.font.size = Pt(12)
  565. p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00)
  566. p.font.name = DEFAULT_FONT
  567. p.font.bold = True
  568. fd['fixed'] = True
  569. fd['needs_rebuild'] = True
  570. except Exception:
  571. pass
  572. elif fd.get('type') == 'empty_page':
  573. fd['needs_rebuild'] = True
  574. fd['fixed'] = True
  575. elif fd.get('type') == 'chart_no_text':
  576. fd['needs_rebuild'] = True
  577. fd['fixed'] = True
  578. elif fd.get('type') == 'insight_count':
  579. fd['needs_rebuild'] = True
  580. fd['fixed'] = True
  581. elif fd.get('type') == 'short_text':
  582. fd['needs_rebuild'] = True
  583. fd['fixed'] = True
  584. elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'):
  585. fd['needs_rebuild'] = True
  586. fd['fixed'] = True
  587. elif fd.get('type') == 'core_metric_missing':
  588. fd['needs_rebuild'] = True
  589. fd['fixed'] = True
  590. elif fd.get('type') == 'missing_title':
  591. try:
  592. box = slide.shapes.add_textbox(
  593. CONTENT_LEFT, Emu(914400),
  594. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000))
  595. p = box.text_frame.paragraphs[0]
  596. p.text = '数据详情'
  597. p.font.size = Pt(24)
  598. p.font.bold = True
  599. p.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
  600. p.font.name = DEFAULT_FONT
  601. fd['fixed'] = True
  602. except Exception:
  603. pass
  604. elif fd.get('type') == 'text_overflow':
  605. shape = fd.get('shape')
  606. if shape and shape.has_text_frame:
  607. text_len = len(shape.text_frame.text or '')
  608. try:
  609. if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000):
  610. fd['needs_rebuild'] = True
  611. else:
  612. for para in shape.text_frame.paragraphs:
  613. for run in para.runs:
  614. if run.font.size and run.font.size > Pt(9):
  615. run.font.size = Pt(9)
  616. except Exception:
  617. fd['needs_rebuild'] = True
  618. fd['fixed'] = True
  619. def _fix_data(self, slide, issue, prs):
  620. fd = issue.fix_data
  621. if fd.get('type') == 'page_num':
  622. fd['fixed'] = True
  623. def _shapes_overlap(self, a, b) -> bool:
  624. ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height)
  625. bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height)
  626. if ax + aw <= bx or bx + bw <= ax:
  627. return False
  628. if ay + ah <= by or by + bh <= ay:
  629. return False
  630. return True
  631. def _is_intentional_overlap(self, a, b) -> bool:
  632. if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'):
  633. return True
  634. a_area = int(a.width) * int(a.height)
  635. b_area = int(b.width) * int(b.height)
  636. if a_area > b_area * 3 or b_area > a_area * 3:
  637. return True
  638. return False
  639. def _is_title_shape(self, shape) -> bool:
  640. if not shape.has_text_frame:
  641. return False
  642. try:
  643. y = int(shape.top)
  644. return y < int(CONTENT_TOP_BASE) + Emu(200000)
  645. except Exception:
  646. return False
  647. def _find_empty_template_artifacts(self, slide) -> list:
  648. artifacts = []
  649. shapes = list(slide.shapes)
  650. empty_text_boxes = []
  651. for shape in shapes:
  652. if shape.has_text_frame:
  653. text = (shape.text_frame.text or '').strip()
  654. if text:
  655. continue
  656. if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000):
  657. continue
  658. if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000):
  659. continue
  660. empty_text_boxes.append(shape)
  661. artifacts.append(shape)
  662. for shape in shapes:
  663. if shape.has_text_frame:
  664. continue
  665. try:
  666. is_large_soft_card = (
  667. int(shape.width) >= Emu(1000000) and
  668. int(shape.height) >= Emu(500000) and
  669. int(shape.top) < int(FOOTER_TOP) - Emu(400000)
  670. )
  671. if not is_large_soft_card:
  672. continue
  673. overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes)
  674. if overlaps_empty_text:
  675. artifacts.append(shape)
  676. except Exception:
  677. continue
  678. # Preserve order while de-duplicating.
  679. seen = set()
  680. unique = []
  681. for shape in artifacts:
  682. key = id(shape)
  683. if key not in seen:
  684. unique.append(shape)
  685. seen.add(key)
  686. return unique
  687. def _remove_shape(self, shape):
  688. el = shape.element
  689. el.getparent().remove(el)
  690. def _is_text_overflowing(self, shape) -> bool:
  691. if not shape.has_text_frame:
  692. return False
  693. text = shape.text_frame.text
  694. if not text.strip():
  695. return False
  696. if len(text) > 800:
  697. return True
  698. try:
  699. w = int(shape.width)
  700. h = int(shape.height)
  701. width_pt = max(1, w / 12700.0)
  702. max_font_pt = 10
  703. para_count = 0
  704. for para in shape.text_frame.paragraphs:
  705. if not para.text.strip():
  706. continue
  707. para_count += 1
  708. for run in para.runs:
  709. if run.font.size:
  710. max_font_pt = max(max_font_pt, run.font.size / 12700.0)
  711. chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15)))
  712. est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line)
  713. est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700)
  714. if est_height > h * 1.15:
  715. return True
  716. if h < Emu(200000) and len(text) > 80:
  717. return True
  718. except Exception:
  719. pass
  720. return False
  721. def _shape_name(shape):
  722. try:
  723. if shape.has_text_frame:
  724. return shape.text_frame.text[:20].replace('\n', ' ')
  725. except Exception:
  726. pass
  727. try:
  728. return shape.shape_type
  729. except Exception:
  730. pass
  731. return '无名形状'
  732. def _is_in_content_area(shape):
  733. try:
  734. return int(shape.top) >= int(CONTENT_TOP_BASE)
  735. except Exception:
  736. return False
  737. if __name__ == '__main__':
  738. print("QualityInspector module loaded")
  739. inspector = QualityInspector()
  740. print("Ready to inspect PPT files")