quality_inspector.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920
  1. """
  2. PPT quality inspector and auto-fix engine.
  3. Inspects generated PPT for layout, visual, content, and data issues,
  4. then auto-fixes them iteratively until quality threshold is met.
  5. """
  6. import re
  7. from pptx import Presentation
  8. from pptx.util import Emu, Pt
  9. from pptx.dml.color import RGBColor
  10. from collections import Counter
  11. from quality_rules import (
  12. QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS,
  13. FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX,
  14. TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH,
  15. SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE,
  16. FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT,
  17. get_quality_label, calculate_score,
  18. )
  19. from page_layouts import calculate_fill_ratio, ensure_safe_position
  20. FORECAST_PAGE_TYPES = {
  21. 'forecast',
  22. 'prediction',
  23. 'plan',
  24. 'monthly_forecast',
  25. 'monthly_plan',
  26. 'next_month_plan',
  27. 'custom_forecast',
  28. 'custom_prediction',
  29. }
  30. class QualityIssue:
  31. def __init__(self, severity, category, page_index, description,
  32. rule_id='', auto_fixable=True, fix_data=None):
  33. self.severity = severity
  34. self.category = category
  35. self.page_index = page_index
  36. self.description = description
  37. self.rule_id = rule_id
  38. self.auto_fixable = auto_fixable
  39. self.fix_data = fix_data or {}
  40. def __repr__(self):
  41. return f"[{self.severity}] Page {self.page_index+1}: {self.description}"
  42. class QualityInspector:
  43. def __init__(self, theme_colors: dict = None, layout_context=None):
  44. self.theme_colors = theme_colors or {}
  45. self.layout_context = layout_context
  46. self.fix_count = 0
  47. self.fix_log = []
  48. def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]:
  49. issues = []
  50. issues += self._check_confirmation_alignment(prs, config)
  51. for page_idx, slide in enumerate(prs.slides):
  52. page_type = self._get_page_type(page_idx, config, len(prs.slides))
  53. issues += self._check_layout(slide, page_idx)
  54. issues += self._check_visual(slide, page_idx)
  55. issues += self._check_content(slide, page_idx, config, prs, page_type)
  56. issues += self._check_data(slide, page_idx, prs)
  57. return issues
  58. def _get_page_type(self, page_idx: int, config, total_slides: int) -> str:
  59. if config and hasattr(config, 'pages') and page_idx < len(config.pages):
  60. return config.pages[page_idx].page_type
  61. if page_idx == 0:
  62. return 'cover'
  63. if page_idx == total_slides - 1:
  64. return 'end'
  65. if page_idx == 1:
  66. return 'toc'
  67. return 'content'
  68. def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]:
  69. issues = []
  70. if not config:
  71. return issues
  72. selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)]
  73. if getattr(config, 'require_six_confirmations', False):
  74. confirmation = getattr(config, 'user_confirmation', None)
  75. if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete():
  76. issues.append(QualityIssue(
  77. 'critical', 'data', -1,
  78. '六项确认未完成,PPT 不应进入输出阶段',
  79. 'D006', False,
  80. {'type': 'confirmation_incomplete'}
  81. ))
  82. if config and getattr(config, 'page_count_range', None) and selected_pages:
  83. low, high = config.page_count_range
  84. if len(selected_pages) < low or len(selected_pages) > high:
  85. issues.append(QualityIssue(
  86. 'major', 'data', -1,
  87. f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内',
  88. 'D006', False,
  89. {'type': 'page_count_range'}
  90. ))
  91. if config and getattr(config, 'metrics', None) and len(selected_pages) > 0:
  92. selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)]
  93. if not selected_metrics:
  94. issues.append(QualityIssue(
  95. 'critical', 'data', -1,
  96. '未找到已确认的核心指标,无法验证输出一致性',
  97. 'D006', False,
  98. {'type': 'missing_metrics'}
  99. ))
  100. return issues
  101. def auto_fix(self, prs: Presentation, issues: list[QualityIssue]):
  102. fixable = [i for i in issues if i.auto_fixable]
  103. self.fix_count = 0
  104. self.fix_log = []
  105. for issue in fixable:
  106. try:
  107. if issue.page_index < 0:
  108. continue
  109. slide = prs.slides[issue.page_index]
  110. self._apply_fix(slide, issue, prs)
  111. self.fix_count += 1
  112. except Exception as e:
  113. self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}")
  114. return self.fix_count
  115. def _apply_fix(self, slide, issue, prs):
  116. category = issue.category
  117. if category == 'layout':
  118. self._fix_layout(slide, issue)
  119. elif category == 'visual':
  120. self._fix_visual(slide, issue)
  121. elif category == 'content':
  122. self._fix_content(slide, issue, prs)
  123. elif category == 'data':
  124. self._fix_data(slide, issue, prs)
  125. def generate_report(self, issues: list[QualityIssue], iteration: int = 1,
  126. total_pages: int = 0) -> str:
  127. lines = []
  128. lines.append('═' * 50)
  129. lines.append(f' PPT 质量自检报告 (第 {iteration} 轮)')
  130. lines.append('═' * 50)
  131. if not issues:
  132. lines.append('[PASS] 全部通过!未发现任何质量问题。')
  133. return '\n'.join(lines)
  134. by_page = {}
  135. for iss in issues:
  136. p = iss.page_index
  137. if p not in by_page:
  138. by_page[p] = []
  139. by_page[p].append(iss)
  140. for p_idx in sorted(by_page.keys()):
  141. page_issues = by_page[p_idx]
  142. sev_order = {'critical': 0, 'major': 1, 'minor': 2}
  143. page_issues.sort(key=lambda x: sev_order.get(x.severity, 3))
  144. has_critical = any(i.severity == 'critical' for i in page_issues)
  145. has_major = any(i.severity == 'major' for i in page_issues)
  146. if has_critical:
  147. icon = '[CRIT]'
  148. elif has_major:
  149. icon = '[MAJ]'
  150. else:
  151. icon = '[OK]'
  152. lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题')
  153. for iss in page_issues:
  154. sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '')
  155. status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else ''
  156. lines.append(f' ├─ {sev_icon} {iss.description}{status}')
  157. lines.append('─' * 50)
  158. by_sev = Counter(i.severity for i in issues)
  159. by_cat = Counter(i.category for i in issues)
  160. fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed'))
  161. score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
  162. label = get_quality_label(score)
  163. lines.append(f'总结: {len(issues)} 个问题 | '
  164. f'{by_sev.get("critical", 0)} 严重 + '
  165. f'{by_sev.get("major", 0)} 主要 + '
  166. f'{by_sev.get("minor", 0)} 次要')
  167. lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个')
  168. lines.append(f'最终质量评分: {score}/100 [{label}]')
  169. lines.append('═' * 50)
  170. return '\n'.join(lines)
  171. def quality_assured_build(self, build_fn, data, config, output_path,
  172. max_iterations=None, _attempt=0) -> tuple:
  173. max_iterations = max_iterations or config.max_fix_iterations
  174. total_pages = 0
  175. needs_rebuild = False
  176. rebuilt_once = False
  177. prs = None
  178. for iteration in range(1, max_iterations + 1):
  179. if iteration == 1 or needs_rebuild:
  180. if needs_rebuild:
  181. if rebuilt_once and iteration > 2:
  182. print(f'[INFO] 已尝试重建,不再继续重建以避免无限循环')
  183. needs_rebuild = False
  184. else:
  185. print(f'[REBUILD] 检测到需要重建的页面,触发重新生成...')
  186. rebuilt_once = True
  187. needs_rebuild = False
  188. prs = build_fn(data, config)
  189. total_pages = len(prs.slides)
  190. issues = self.inspect(prs, config)
  191. if not issues:
  192. print(f'[PASS] 第 {iteration} 次迭代:无问题,质量达标')
  193. break
  194. by_sev = Counter(i.severity for i in issues)
  195. print(f'[INSPECT] 第 {iteration} 次自检:{by_sev.get("critical",0)} 严重 + '
  196. f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要')
  197. fixable = [i for i in issues if i.auto_fixable]
  198. self.auto_fix(prs, fixable)
  199. print(f'[FIX] 自动修复了 {self.fix_count} 个问题')
  200. for issue in fixable:
  201. if issue.fix_data.get('needs_rebuild'):
  202. needs_rebuild = True
  203. print(f'[WARN] 检测到内容严重不足,将在下一轮迭代中重建')
  204. break
  205. unfixable = [i for i in issues if not i.auto_fixable]
  206. if unfixable:
  207. print(f'[WARN] {len(unfixable)} 个问题需人工确认')
  208. remaining = self.inspect(prs, config)
  209. if not remaining:
  210. print(f'[PASS] 第 {iteration} 次修复后:所有问题已解决')
  211. break
  212. has_critical = any(i.severity == 'critical' for i in remaining)
  213. has_major = any(i.severity == 'major' for i in remaining)
  214. if not has_critical and not has_major:
  215. print(f'[PASS] 第 {iteration} 次修复后:仅剩次要问题,质量达标')
  216. break
  217. if needs_rebuild and iteration < max_iterations:
  218. continue
  219. final_issues = self.inspect(prs, config)
  220. by_sev = Counter(i.severity for i in final_issues)
  221. by_cat = Counter(i.category for i in final_issues)
  222. score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
  223. label = get_quality_label(score)
  224. report = self.generate_report(final_issues, iteration, total_pages)
  225. print(report)
  226. if score >= config.quality_threshold:
  227. prs.save(output_path)
  228. print(f'[PASS] 高质量 PPT 已输出: {output_path}')
  229. else:
  230. has_critical_final = any(i.severity == 'critical' for i in final_issues)
  231. has_layout_critical = any(
  232. i.severity == 'critical' and i.category == 'layout'
  233. for i in final_issues
  234. )
  235. if has_layout_critical:
  236. raise RuntimeError(
  237. f'PPT 存在严重布局问题(评分 {score}),无法自动修复。'
  238. f'请检查页面配置和数据。'
  239. )
  240. prs.save(output_path)
  241. if has_critical_final:
  242. print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),'
  243. f'存在 {by_sev.get("critical", 0)} 个严重内容问题,建议补充分析数据后重新生成')
  244. else:
  245. print(f'[WARN] 质量评分 {score}(低于阈值 {config.quality_threshold}),已输出但建议复核')
  246. return prs, final_issues
  247. def _check_layout(self, slide, page_idx) -> list[QualityIssue]:
  248. issues = []
  249. sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH
  250. sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT
  251. for shape in slide.shapes:
  252. l, t = int(shape.left), int(shape.top)
  253. w, h = int(shape.width), int(shape.height)
  254. if l < -100:
  255. issues.append(QualityIssue('critical', 'layout', page_idx,
  256. f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})',
  257. 'L001', True, {'shape': shape, 'type': 'left'}))
  258. if l + w > sw + 500:
  259. issues.append(QualityIssue('critical', 'layout', page_idx,
  260. f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})',
  261. 'L002', True, {'shape': shape, 'type': 'right'}))
  262. if t < -100:
  263. issues.append(QualityIssue('critical', 'layout', page_idx,
  264. f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})',
  265. 'L003', True, {'shape': shape, 'type': 'top'}))
  266. if t + h > sh + 500:
  267. issues.append(QualityIssue('critical', 'layout', page_idx,
  268. f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})',
  269. 'L004', True, {'shape': shape, 'type': 'bottom'}))
  270. if l < SAFE_MARGIN and l >= 0:
  271. if l == 0 and w >= sw * 0.8:
  272. continue
  273. if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000):
  274. continue
  275. if int(shape.top) > sh - Emu(500000):
  276. continue
  277. issues.append(QualityIssue('minor', 'layout', page_idx,
  278. f'形状"{_shape_name(shape)}"过于靠近左边缘',
  279. 'L007', True, {'shape': shape, 'type': 'edge_left'}))
  280. placeholder_pattern = re.compile(r'\{[^}]+\}')
  281. for shape in slide.shapes:
  282. if shape.has_text_frame:
  283. text = shape.text_frame.text
  284. if placeholder_pattern.search(text):
  285. issues.append(QualityIssue('critical', 'layout', page_idx,
  286. f'发现未替换占位符: "{text[:50]}"',
  287. 'L006', True, {'shape': shape, 'type': 'placeholder'}))
  288. empty_artifacts = self._find_empty_template_artifacts(slide)
  289. for shape in empty_artifacts:
  290. issues.append(QualityIssue(
  291. 'major', 'layout', page_idx,
  292. f'发现空模板组件残留: "{_shape_name(shape)}"',
  293. 'L008', True, {'shape': shape, 'type': 'empty_template_artifact'}
  294. ))
  295. shapes_list = list(slide.shapes)
  296. for i, a in enumerate(shapes_list):
  297. for b in shapes_list[i+1:]:
  298. if self._shapes_overlap(a, b):
  299. a_name = _shape_name(a)
  300. b_name = _shape_name(b)
  301. if self._is_intentional_overlap(a, b):
  302. continue
  303. issues.append(QualityIssue('major', 'layout', page_idx,
  304. f'形状"{a_name}"与"{b_name}"存在重叠',
  305. 'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'}))
  306. return issues
  307. def _check_visual(self, slide, page_idx) -> list[QualityIssue]:
  308. issues = []
  309. fonts_seen = {}
  310. for shape in slide.shapes:
  311. if not shape.has_text_frame:
  312. continue
  313. for para in shape.text_frame.paragraphs:
  314. for run in para.runs:
  315. if run.font.size:
  316. size_pt = run.font.size / 12700.0
  317. if size_pt < 6:
  318. issues.append(QualityIssue('major', 'visual', page_idx,
  319. f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"',
  320. 'V002', True, {'run': run, 'type': 'font_small'}))
  321. elif size_pt > 65:
  322. issues.append(QualityIssue('major', 'visual', page_idx,
  323. f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"',
  324. 'V003', True, {'run': run, 'type': 'font_large'}))
  325. if run.font.name:
  326. fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1
  327. if len(fonts_seen) > 3:
  328. issues.append(QualityIssue('minor', 'visual', page_idx,
  329. f'字体使用超过3种: {list(fonts_seen.keys())}',
  330. 'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen}))
  331. return issues
  332. def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]:
  333. # Resolve dynamic content top from layout context if available
  334. content_top_emu = None
  335. if self.layout_context:
  336. content_top_emu = self.layout_context.content_top
  337. issues = []
  338. if page_type in ('cover', 'end'):
  339. issues += self._check_text_overflow(slide, page_idx)
  340. return issues
  341. issues += self._check_dynamic_page_fit(page_idx, page_type, config)
  342. issues += self._check_core_metric_presence(slide, page_idx, page_type, config)
  343. if page_type == 'toc':
  344. content_shapes = [s for s in slide.shapes
  345. if s.has_text_frame and _is_in_content_area(s)]
  346. all_content_text = ''
  347. for shape in content_shapes:
  348. text = shape.text_frame.text.strip()
  349. if text:
  350. all_content_text += text + '\n'
  351. if len(all_content_text.strip()) < 30:
  352. issues.append(QualityIssue('minor', 'content', page_idx,
  353. '目录页内容过少',
  354. 'C008', False, {'type': 'empty_page'}))
  355. return issues
  356. fill_ratio = calculate_fill_ratio(slide, content_top_emu=content_top_emu)
  357. if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary') or page_type in FORECAST_PAGE_TYPES:
  358. if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']:
  359. issues.append(QualityIssue('critical', 'content', page_idx,
  360. f'页面内容严重不足,填充率仅 {fill_ratio:.1%},必须补充图表和分析文本',
  361. 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
  362. elif fill_ratio < FILL_RATIO_THRESHOLDS['low']:
  363. issues.append(QualityIssue('major', 'content', page_idx,
  364. f'页面留白偏多,填充率 {fill_ratio:.1%},需补充分析内容',
  365. 'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
  366. elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2:
  367. issues.append(QualityIssue('minor', 'content', page_idx,
  368. f'页面填充率过低 {fill_ratio:.1%}',
  369. 'C001', False))
  370. content_shapes = [s for s in slide.shapes
  371. if s.has_text_frame and _is_in_content_area(s)]
  372. all_content_text = ''
  373. insight_blocks = 0
  374. for shape in content_shapes:
  375. tf = shape.text_frame
  376. full_text = tf.text.strip()
  377. if not full_text:
  378. continue
  379. all_content_text += full_text + '\n'
  380. for para in tf.paragraphs:
  381. para_text = para.text.strip()
  382. if para_text and len(para_text) >= TEXT_MIN_LENGTH:
  383. insight_blocks += 1
  384. total_content_chars = len(all_content_text.strip())
  385. text_lengths = [len(p.text.strip()) for s in content_shapes
  386. for p in s.text_frame.paragraphs if p.text.strip()]
  387. if total_content_chars < PAGE_MIN_TEXT_LENGTH:
  388. issues.append(QualityIssue('critical', 'content', page_idx,
  389. f'页面内容为空!所有文本框总字数仅 {total_content_chars} 字(要求≥{PAGE_MIN_TEXT_LENGTH}字)',
  390. 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
  391. elif total_content_chars < 200:
  392. issues.append(QualityIssue('major', 'content', page_idx,
  393. f'页面内容过少,总字数仅 {total_content_chars} 字,分析深度严重不足',
  394. 'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
  395. if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH:
  396. issues.append(QualityIssue('critical', 'content', page_idx,
  397. f'分析文本过短(最长为 {max(text_lengths)} 字),需撰写≥{TEXT_MIN_LENGTH}字的深度分析',
  398. 'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)}))
  399. if insight_blocks < INSIGHT_MIN_COUNT:
  400. issues.append(QualityIssue('critical', 'content', page_idx,
  401. f'分析段数不足,仅 {insight_blocks} 段(要求≥{INSIGHT_MIN_COUNT}段)',
  402. 'C007', True, {'type': 'insight_count', 'count': insight_blocks}))
  403. has_title = False
  404. for shape in slide.shapes:
  405. if shape.has_text_frame:
  406. text = shape.text_frame.text
  407. try:
  408. sy = int(shape.top)
  409. except Exception:
  410. sy = 99999999
  411. if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000):
  412. if len(text.strip()) > 0 and not text.startswith('{'):
  413. has_title = True
  414. break
  415. if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结',
  416. '排行', '报告', '建议', '告警', '要点']):
  417. if sy < CONTENT_TOP_BASE + Emu(400000):
  418. has_title = True
  419. break
  420. if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1:
  421. issues.append(QualityIssue('critical', 'content', page_idx,
  422. '页面缺少标题', 'C006', True, {'type': 'missing_title'}))
  423. issues += self._check_text_overflow(slide, page_idx)
  424. has_chart = False
  425. for shape in slide.shapes:
  426. if shape.has_chart:
  427. has_chart = True
  428. break
  429. if has_chart and insight_blocks == 0 and page_idx >= 2:
  430. issues.append(QualityIssue('critical', 'content', page_idx,
  431. '页面有图表但完全缺少分析文本,图表数据需要被解读和说明',
  432. 'C009', True, {'type': 'chart_no_text'}))
  433. return issues
  434. def _check_text_overflow(self, slide, page_idx) -> list[QualityIssue]:
  435. issues = []
  436. for shape in slide.shapes:
  437. if shape.has_text_frame and self._is_text_overflowing(shape):
  438. issues.append(QualityIssue(
  439. 'major', 'content', page_idx,
  440. f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"',
  441. 'C004', True, {'shape': shape, 'type': 'text_overflow'}
  442. ))
  443. return issues
  444. def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]:
  445. issues = []
  446. profile = getattr(config, 'data_profiling', None) or {}
  447. if not profile:
  448. return issues
  449. time_cols = profile.get('time_columns', [])
  450. cat_cols = profile.get('category_columns', [])
  451. num_cols = profile.get('numeric_columns', [])
  452. if page_type == 'trend' and (not time_cols or not num_cols):
  453. issues.append(QualityIssue(
  454. 'critical', 'content', page_idx,
  455. '趋势页缺少可用时间列或数值列,需要重建或降级为摘要页',
  456. 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
  457. ))
  458. elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols):
  459. issues.append(QualityIssue(
  460. 'critical', 'content', page_idx,
  461. f'{page_type} 页缺少分类维度或数值列,需要重建或降级为摘要页',
  462. 'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
  463. ))
  464. elif page_type == 'kpi_overview':
  465. selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
  466. if len(selected_metrics) > 6:
  467. issues.append(QualityIssue(
  468. 'minor', 'content', page_idx,
  469. f'核心指标数量 {len(selected_metrics)} 超过 6 个,KPI页应切换为紧凑布局或拆分展示',
  470. 'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)}
  471. ))
  472. elif len(selected_metrics) >= 4:
  473. issues.append(QualityIssue(
  474. 'minor', 'content', page_idx,
  475. f'核心指标数量 {len(selected_metrics)} 较多,建议使用紧凑布局以保留洞察区',
  476. 'C011', True, {'type': 'kpi_layout_compact_needed', 'count': len(selected_metrics)}
  477. ))
  478. return issues
  479. def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]:
  480. issues = []
  481. if page_type != 'kpi_overview' or not config:
  482. return issues
  483. selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
  484. if not selected_metrics:
  485. return issues
  486. slide_text = '\n'.join(
  487. shape.text_frame.text for shape in slide.shapes
  488. if shape.has_text_frame and shape.text_frame.text
  489. )
  490. missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text]
  491. if missing:
  492. issues.append(QualityIssue(
  493. 'critical', 'data', page_idx,
  494. 'KPI概览页缺少已确认核心指标:' + '、'.join(missing),
  495. 'D006', True, {'type': 'core_metric_missing', 'missing': missing}
  496. ))
  497. return issues
  498. def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]:
  499. issues = []
  500. if page_idx == 0:
  501. return issues
  502. for shape in slide.shapes:
  503. if shape.has_text_frame:
  504. text = shape.text_frame.text
  505. page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text)
  506. if page_pattern:
  507. current = int(page_pattern.group(1))
  508. total = int(page_pattern.group(2))
  509. if total == 0:
  510. issues.append(QualityIssue('major', 'data', page_idx,
  511. f'页码格式异常: {text.strip()}',
  512. 'D002', True, {'type': 'page_num'}))
  513. return issues
  514. def _fix_layout(self, slide, issue):
  515. fd = issue.fix_data
  516. if fd.get('type') in ('left', 'right', 'top', 'bottom'):
  517. shape = fd.get('shape')
  518. if shape:
  519. ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT)
  520. fd['fixed'] = True
  521. elif fd.get('type') == 'overlap':
  522. a, b = fd.get('shape_a'), fd.get('shape_b')
  523. if a and b:
  524. try:
  525. if int(b.left) < int(a.left) + int(a.width) + Emu(50000):
  526. b.left = int(a.left) + int(a.width) + Emu(152400)
  527. ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT)
  528. except Exception:
  529. pass
  530. fd['fixed'] = True
  531. elif fd.get('type') == 'placeholder':
  532. shape = fd.get('shape')
  533. if shape and shape.has_text_frame:
  534. text = shape.text_frame.text or ''
  535. # For KPI placeholders, remove the entire shape and nearby card backgrounds
  536. kpi_pattern = re.compile(r'\{kpi\d+_(label|value)\}')
  537. if kpi_pattern.search(text):
  538. # Remove this text shape
  539. self._remove_shape(shape)
  540. # Also remove nearby rounded rectangle backgrounds
  541. try:
  542. sx = int(shape.left)
  543. sy = int(shape.top)
  544. sw = int(shape.width)
  545. sh = int(shape.height)
  546. pad = 300000
  547. for other in list(slide.shapes):
  548. try:
  549. ox = int(other.left)
  550. oy = int(other.top)
  551. ow = int(other.width)
  552. oh = int(other.height)
  553. in_region = (
  554. ox >= sx - pad and ox + ow <= sx + sw + pad and
  555. oy >= sy - pad and oy + oh <= sy + sh + pad
  556. )
  557. if in_region and other != shape:
  558. # Check if it's a background shape (no text or empty text)
  559. if not other.has_text_frame or not (other.text_frame.text or '').strip():
  560. self._remove_shape(other)
  561. except Exception:
  562. pass
  563. except Exception:
  564. pass
  565. else:
  566. # For other placeholders, just clear the text
  567. for para in shape.text_frame.paragraphs:
  568. para.text = re.sub(r'\{[^}]+\}', '', para.text)
  569. fd['fixed'] = True
  570. elif fd.get('type') == 'edge_left':
  571. shape = fd.get('shape')
  572. if shape:
  573. try:
  574. w = int(shape.width)
  575. if w < SLIDE_WIDTH * 0.5:
  576. shape.left = SAFE_MARGIN
  577. except Exception:
  578. pass
  579. fd['fixed'] = True
  580. elif fd.get('type') == 'empty_template_artifact':
  581. shape = fd.get('shape')
  582. if shape:
  583. self._remove_shape(shape)
  584. fd['fixed'] = True
  585. def _fix_visual(self, slide, issue):
  586. fd = issue.fix_data
  587. if fd.get('type') == 'font_small':
  588. run = fd.get('run')
  589. if run:
  590. run.font.size = FONT_SIZE_MIN
  591. fd['fixed'] = True
  592. elif fd.get('type') == 'font_large':
  593. run = fd.get('run')
  594. if run:
  595. run.font.size = FONT_SIZE_MAX
  596. fd['fixed'] = True
  597. elif fd.get('type') == 'font_mixed':
  598. for shape in slide.shapes:
  599. if shape.has_text_frame:
  600. for para in shape.text_frame.paragraphs:
  601. for run in para.runs:
  602. run.font.name = DEFAULT_FONT
  603. fd['fixed'] = True
  604. def _fix_content(self, slide, issue, prs):
  605. fd = issue.fix_data
  606. if fd.get('type') == 'sparse':
  607. fill_ratio = fd.get('fill_ratio', 0)
  608. if fill_ratio < FILL_RATIO_THRESHOLDS['low']:
  609. try:
  610. box = slide.shapes.add_textbox(
  611. CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)),
  612. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000))
  613. tf = box.text_frame
  614. tf.word_wrap = True
  615. p = tf.paragraphs[0]
  616. p.text = (
  617. '[WARNING] 此页面内容不足,需补充深度分析内容。'
  618. '分析应包含:具体数据引用(含数值和单位)、'
  619. '与同类/历史/目标的对比分析、'
  620. '数据背后原因的至少2条解读、'
  621. '以及可执行的业务行动建议。'
  622. '请勿使用"要加强"、"进一步优化"等模糊措辞。'
  623. )
  624. p.font.size = Pt(12)
  625. p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00)
  626. p.font.name = DEFAULT_FONT
  627. p.font.bold = True
  628. fd['fixed'] = True
  629. fd['needs_rebuild'] = True
  630. except Exception:
  631. pass
  632. elif fd.get('type') == 'empty_page':
  633. fd['needs_rebuild'] = True
  634. fd['fixed'] = True
  635. elif fd.get('type') == 'chart_no_text':
  636. fd['needs_rebuild'] = True
  637. fd['fixed'] = True
  638. elif fd.get('type') == 'insight_count':
  639. fd['needs_rebuild'] = True
  640. fd['fixed'] = True
  641. elif fd.get('type') == 'short_text':
  642. fd['needs_rebuild'] = True
  643. fd['fixed'] = True
  644. elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'):
  645. fd['fixed'] = True
  646. elif fd.get('type') == 'kpi_layout_compact_needed':
  647. fd['fixed'] = True
  648. elif fd.get('type') == 'core_metric_missing':
  649. fd['needs_rebuild'] = True
  650. fd['fixed'] = True
  651. elif fd.get('type') == 'missing_title':
  652. try:
  653. box = slide.shapes.add_textbox(
  654. CONTENT_LEFT, Emu(914400),
  655. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000))
  656. p = box.text_frame.paragraphs[0]
  657. p.text = '数据详情'
  658. p.font.size = Pt(24)
  659. p.font.bold = True
  660. p.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
  661. p.font.name = DEFAULT_FONT
  662. fd['fixed'] = True
  663. except Exception:
  664. pass
  665. elif fd.get('type') == 'text_overflow':
  666. shape = fd.get('shape')
  667. if shape and shape.has_text_frame:
  668. text_len = len(shape.text_frame.text or '')
  669. try:
  670. if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000):
  671. fd['needs_rebuild'] = True
  672. else:
  673. for para in shape.text_frame.paragraphs:
  674. for run in para.runs:
  675. if run.font.size and run.font.size > Pt(9):
  676. run.font.size = Pt(9)
  677. except Exception:
  678. fd['needs_rebuild'] = True
  679. fd['fixed'] = True
  680. def _fix_data(self, slide, issue, prs):
  681. fd = issue.fix_data
  682. if fd.get('type') == 'page_num':
  683. fd['fixed'] = True
  684. def _shapes_overlap(self, a, b) -> bool:
  685. ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height)
  686. bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height)
  687. if ax + aw <= bx or bx + bw <= ax:
  688. return False
  689. if ay + ah <= by or by + bh <= ay:
  690. return False
  691. return True
  692. def _is_intentional_overlap(self, a, b) -> bool:
  693. if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'):
  694. return True
  695. a_area = int(a.width) * int(a.height)
  696. b_area = int(b.width) * int(b.height)
  697. if a_area > b_area * 3 or b_area > a_area * 3:
  698. return True
  699. return False
  700. def _is_title_shape(self, shape) -> bool:
  701. if not shape.has_text_frame:
  702. return False
  703. try:
  704. y = int(shape.top)
  705. return y < int(CONTENT_TOP_BASE) + Emu(200000)
  706. except Exception:
  707. return False
  708. def _find_empty_template_artifacts(self, slide) -> list:
  709. artifacts = []
  710. shapes = list(slide.shapes)
  711. empty_text_boxes = []
  712. for shape in shapes:
  713. if shape.has_text_frame:
  714. text = (shape.text_frame.text or '').strip()
  715. if text:
  716. continue
  717. if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000):
  718. continue
  719. if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000):
  720. continue
  721. empty_text_boxes.append(shape)
  722. artifacts.append(shape)
  723. for shape in shapes:
  724. if shape.has_text_frame:
  725. continue
  726. try:
  727. is_large_soft_card = (
  728. int(shape.width) >= Emu(1000000) and
  729. int(shape.height) >= Emu(500000) and
  730. int(shape.top) < int(FOOTER_TOP) - Emu(400000)
  731. )
  732. if not is_large_soft_card:
  733. continue
  734. overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes)
  735. if overlaps_empty_text:
  736. artifacts.append(shape)
  737. except Exception:
  738. continue
  739. # Preserve order while de-duplicating.
  740. seen = set()
  741. unique = []
  742. for shape in artifacts:
  743. key = id(shape)
  744. if key not in seen:
  745. unique.append(shape)
  746. seen.add(key)
  747. return unique
  748. def _remove_shape(self, shape):
  749. el = shape.element
  750. el.getparent().remove(el)
  751. def _is_text_overflowing(self, shape) -> bool:
  752. if not shape.has_text_frame:
  753. return False
  754. text = shape.text_frame.text
  755. if not text.strip():
  756. return False
  757. if len(text) > 800:
  758. return True
  759. try:
  760. w = int(shape.width)
  761. h = int(shape.height)
  762. width_pt = max(1, w / 12700.0)
  763. max_font_pt = 10
  764. para_count = 0
  765. for para in shape.text_frame.paragraphs:
  766. if not para.text.strip():
  767. continue
  768. para_count += 1
  769. for run in para.runs:
  770. if run.font.size:
  771. max_font_pt = max(max_font_pt, run.font.size / 12700.0)
  772. chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15)))
  773. est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line)
  774. est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700)
  775. if est_height > h * 1.15:
  776. return True
  777. if h < Emu(200000) and len(text) > 80:
  778. return True
  779. except Exception:
  780. pass
  781. return False
  782. def _shape_name(shape):
  783. try:
  784. if shape.has_text_frame:
  785. return shape.text_frame.text[:20].replace('\n', ' ')
  786. except Exception:
  787. pass
  788. try:
  789. return shape.shape_type
  790. except Exception:
  791. pass
  792. return '无名形状'
  793. def _is_in_content_area(shape):
  794. try:
  795. return int(shape.top) >= int(CONTENT_TOP_BASE)
  796. except Exception:
  797. return False
  798. if __name__ == '__main__':
  799. print("QualityInspector module loaded")
  800. inspector = QualityInspector()
  801. print("Ready to inspect PPT files")