ppt_builder.py 101 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152
  1. """
  2. PPT builder: assemble daily/weekly/monthly reports by duplicating master templates
  3. and filling charts, tables, KPI cards, and structured insight text blocks.
  4. Key design principle: Conclusion-first page titles + structured multi-paragraph
  5. insights (title + body per paragraph) aligned with reference PPT style.
  6. """
  7. import copy
  8. import os
  9. import sys
  10. import re as re_module
  11. from pathlib import Path
  12. from datetime import datetime, timedelta
  13. sys.path.insert(0, str(Path(__file__).parent))
  14. from pptx import Presentation
  15. from pptx.util import Emu, Pt
  16. from pptx.dml.color import RGBColor
  17. from pptx.enum.text import PP_ALIGN
  18. from pptx.enum.shapes import MSO_SHAPE
  19. from data_loader import load_generic_excel
  20. from metrics_calculator import (
  21. calc_generic_metrics, calc_generic_trend, calc_generic_distribution,
  22. calc_generic_ranking, generate_generic_insights,
  23. )
  24. from chart_factory import (
  25. add_column_chart, add_bar_chart, add_line_chart, add_doughnut_chart,
  26. add_pie_chart, add_funnel_chart, add_horizontal_bar_chart,
  27. add_grouped_bar_chart, add_table
  28. )
  29. from page_layouts import (
  30. get_kpi_grid, get_chart_left_zone, get_insight_right_zone,
  31. get_full_width_zone, get_two_column_zones, LayoutContext,
  32. )
  33. from template_parser import (
  34. parse_template, get_builtin_template_profile,
  35. PLACEHOLDER_ALIASES, _matches_any_placeholder,
  36. )
  37. from quality_inspector import QualityInspector
  38. from theme_manager import theme_to_rgb_colors, get_theme
  39. from report_config import (
  40. ReportConfig, PageDef, MetricDef, PeriodType, ChartType,
  41. validate_six_confirmations,
  42. )
  43. from quality_rules import SLIDE_WIDTH, SLIDE_HEIGHT, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP
  44. # Colors — aligned with reference design theme YAML
  45. C_PRIMARY = RGBColor(0x1E, 0x3A, 0x5F)
  46. C_ACCENT = RGBColor(0x10, 0xB9, 0x81)
  47. C_ACCENT_NEG = RGBColor(0xEF, 0x44, 0x44)
  48. C_SECONDARY = RGBColor(0x64, 0x74, 0x8B)
  49. C_DARK = RGBColor(0x1F, 0x3A, 0x5C)
  50. C_WHITE = RGBColor(0xFF, 0xFF, 0xFF)
  51. C_GRAY_BG = RGBColor(0xF2, 0xF2, 0xF2)
  52. C_TEXT = RGBColor(0x33, 0x33, 0x33)
  53. C_TEXT_GRAY = RGBColor(0x66, 0x66, 0x66)
  54. C_LINE = RGBColor(0xD9, 0xD9, 0xD9)
  55. C_CARD_BG = RGBColor(0xE7, 0xF0, 0xF7)
  56. C_GREEN = RGBColor(0x10, 0xB9, 0x81)
  57. C_RED = RGBColor(0xEF, 0x44, 0x44)
  58. C_ORANGE = RGBColor(0xED, 0x7D, 0x31)
  59. # ==============================================================================
  60. # MASTER / SLIDE HELPERS
  61. # ==============================================================================
  62. def get_master_template(report_type: str) -> str:
  63. """Route report type to corresponding master template."""
  64. base = os.path.join(os.path.dirname(__file__), '..', 'assets')
  65. template_map = {
  66. 'daily': os.path.join(base, 'report-master.pptx'),
  67. 'weekly': os.path.join(base, 'weekly-master.pptx'),
  68. 'monthly': os.path.join(base, 'monthly-master.pptx'),
  69. }
  70. path = template_map.get(report_type, template_map['daily'])
  71. if os.path.exists(path):
  72. return os.path.abspath(path)
  73. # Fallbacks
  74. for fallback in [template_map['daily']]:
  75. if os.path.exists(fallback):
  76. return os.path.abspath(fallback)
  77. raise FileNotFoundError(f"Master template not found for {report_type}")
  78. def _resolve_master_template(config: ReportConfig) -> str:
  79. if getattr(config, 'template_path', ''):
  80. return os.path.abspath(config.template_path)
  81. period_type = getattr(config, 'period_type', None)
  82. report_type = getattr(period_type, 'value', period_type) or 'daily'
  83. return get_master_template(report_type)
  84. def _resolve_template_profile(config: ReportConfig):
  85. """Resolve TemplateProfile from config (cached or parse on demand)."""
  86. if getattr(config, 'template_profile', None):
  87. return config.template_profile
  88. if getattr(config, 'template_path', ''):
  89. return parse_template(config.template_path)
  90. period_type = getattr(config, 'period_type', None)
  91. report_type = getattr(period_type, 'value', period_type) or 'daily'
  92. return get_builtin_template_profile(report_type)
  93. def _resolve_colors(config: ReportConfig, profile) -> dict:
  94. """Three-tier color resolution: user theme > template theme > defaults."""
  95. # If user explicitly configured a theme and opted out of template theme
  96. if config.theme and not getattr(config, 'use_template_theme', True):
  97. return theme_to_rgb_colors(config.theme)
  98. # Try template-extracted theme
  99. from theme_manager import extract_theme_from_template, ThemeConfig
  100. template_theme = extract_theme_from_template(profile)
  101. if template_theme:
  102. return theme_to_rgb_colors(template_theme)
  103. # Fallback to user theme or default
  104. if config.theme:
  105. return theme_to_rgb_colors(config.theme)
  106. # Ultimate fallback: hard-coded defaults packaged as a theme
  107. return theme_to_rgb_colors(ThemeConfig())
  108. def _resolve_fonts(config: ReportConfig, profile) -> dict:
  109. """Three-tier font resolution: user config > template fonts > defaults."""
  110. result = {
  111. 'title_font': '微软雅黑',
  112. 'body_font': '微软雅黑',
  113. 'number_font': 'Arial',
  114. }
  115. # Template fonts
  116. detected = getattr(profile, 'detected_fonts', {})
  117. if detected.get('title_font'):
  118. result['title_font'] = detected['title_font']
  119. if detected.get('body_font'):
  120. result['body_font'] = detected['body_font']
  121. if detected.get('number_font'):
  122. result['number_font'] = detected['number_font']
  123. # User override via theme config
  124. if config.theme:
  125. if getattr(config.theme, 'title_font', ''):
  126. result['title_font'] = config.theme.title_font
  127. if getattr(config.theme, 'body_font', ''):
  128. result['body_font'] = config.theme.body_font
  129. if getattr(config.theme, 'number_font', ''):
  130. result['number_font'] = config.theme.number_font
  131. return result
  132. def _duplicate_master_slide(prs, profile, page_type: str, keep_shapes: bool = False):
  133. """Duplicate the appropriate master slide for the given page_type.
  134. keep_shapes=True: keep layout-inherited placeholders (cover/toc/end pages).
  135. keep_shapes=False: remove layout placeholders and copy from source (content pages).
  136. """
  137. idx = profile.get_master_index_for(page_type)
  138. if 0 <= idx < len(prs.slides):
  139. source = prs.slides[idx]
  140. else:
  141. source = prs.slides[0]
  142. return _duplicate_slide(prs, source, keep_shapes=keep_shapes)
  143. def _is_forecast_page_type(page_type: str) -> bool:
  144. normalized = str(page_type or '').lower()
  145. return normalized in {
  146. 'forecast',
  147. 'prediction',
  148. 'plan',
  149. 'monthly_forecast',
  150. 'monthly_plan',
  151. 'next_month_plan',
  152. 'custom_forecast',
  153. 'custom_prediction',
  154. }
  155. def _detect_content_top(slide) -> int:
  156. """Detect content start Y from a content slide template by reading {page_title} position."""
  157. page_title_bottom = Emu(1422400) # daily default
  158. for shape in slide.shapes:
  159. if shape.has_text_frame and '{page_title}' in shape.text_frame.text:
  160. page_title_bottom = shape.top + shape.height
  161. break
  162. # Gap: generous spacing between page title and content to avoid crowding
  163. gap = Emu(381000)
  164. return int(page_title_bottom) + int(gap)
  165. def _delete_template_slides(prs, count=None):
  166. """Delete original template slides from the presentation.
  167. count: number of original template slides to remove from the beginning.
  168. If None, auto-detect using a heuristic that looks for unreplaced placeholders.
  169. """
  170. if count is None:
  171. # Auto-detect: count leading slides that contain unreplaced placeholders
  172. # or have only template-specific content patterns.
  173. count = 0
  174. for slide in prs.slides:
  175. has_unreplaced_placeholder = False
  176. has_real_content = False
  177. for shape in slide.shapes:
  178. if shape.has_text_frame:
  179. text = shape.text_frame.text.strip()
  180. if text:
  181. if '{' in text and '}' in text:
  182. has_unreplaced_placeholder = True
  183. else:
  184. # Text like copyright, footer, etc. on template slides
  185. # is not "real content" in the report sense
  186. pass
  187. # If slide has unreplaced placeholders, it's an original template slide
  188. if has_unreplaced_placeholder:
  189. count += 1
  190. else:
  191. # Also check if slide is completely empty (some template slides
  192. # may have no placeholders at all)
  193. if len(slide.shapes) == 0:
  194. count += 1
  195. else:
  196. break
  197. # Ensure we don't delete all slides
  198. actual_count = min(count, len(prs.slides) - 1) if len(prs.slides) > 1 else 0
  199. for _ in range(actual_count):
  200. if len(prs.slides) == 0:
  201. break
  202. rId = prs.slides._sldIdLst[0].rId
  203. prs.part.drop_rel(rId)
  204. del prs.slides._sldIdLst[0]
  205. def copy_layout_decorative_shapes(slide, layout):
  206. """Copy non-placeholder decorative shapes from a layout to a slide.
  207. python-pptx's add_slide(layout) does NOT copy layout-level decorative
  208. shapes (gradient rectangles, logos, decorative lines) to the slide's
  209. spTree. PowerPoint renders them from the layout reference, but this
  210. is unreliable across PowerPoint versions.
  211. This function deep-copies all <p:sp> elements from the layout's spTree
  212. that do NOT contain a <p:ph> (placeholder) element into the slide's spTree.
  213. Args:
  214. slide: The slide to add shapes to (from prs.slides.add_slide(layout)).
  215. layout: The SlideLayout whose decorative shapes should be copied.
  216. Returns:
  217. int: Number of shapes copied.
  218. """
  219. from copy import deepcopy
  220. from pptx.oxml.ns import qn
  221. layout_spTree = layout._element.find(qn('p:cSld')).find(qn('p:spTree'))
  222. slide_spTree = slide._element.find(qn('p:cSld')).find(qn('p:spTree'))
  223. count = 0
  224. for child in list(layout_spTree):
  225. tag = child.tag.split('}')[-1] if '}' in child.tag else child.tag
  226. if tag == 'sp':
  227. # Check if this shape is a placeholder (has <p:ph> element)
  228. ph = child.find('.//' + qn('p:ph'))
  229. if ph is None:
  230. new_shape = deepcopy(child)
  231. slide_spTree.append(new_shape)
  232. count += 1
  233. return count
  234. def _duplicate_slide(prs, source_slide, keep_shapes: bool = False):
  235. # Use the SOURCE slide's own layout to preserve:
  236. # - layout-level background (gradient, color, image)
  237. # - layout-level shapes (company logo, decorative icons)
  238. # - theme colors, fonts
  239. # Previously used blank_layout which stripped all of the above.
  240. source_layout = source_slide.slide_layout
  241. new_slide = prs.slides.add_slide(source_layout)
  242. if not keep_shapes:
  243. # Remove layout-default shapes (placeholders) from the new slide —
  244. # they'll be replaced by shapes deep-copied from the source slide.
  245. # Layout-level decorative shapes (logos, backgrounds) are NOT in
  246. # slide.shapes and remain intact via layout inheritance.
  247. for shape in list(new_slide.shapes):
  248. sp = shape._element
  249. sp.getparent().remove(sp)
  250. # Copy slide-level background override if present (rare, but safe)
  251. try:
  252. src_cSld = source_slide._element.cSld
  253. new_cSld = new_slide._element.cSld
  254. if src_cSld.bg is not None:
  255. new_bg = copy.deepcopy(src_cSld.bg)
  256. if new_cSld.bg is not None:
  257. new_cSld.remove(new_cSld.bg)
  258. new_cSld.insert(0, new_bg)
  259. except Exception:
  260. pass
  261. if not keep_shapes:
  262. for shape in source_slide.shapes:
  263. el = shape.element
  264. new_el = copy.deepcopy(el)
  265. new_slide.shapes._spTree.insert_element_before(new_el, 'p:extLst')
  266. return new_slide
  267. def _replace_placeholder(slide, placeholder, new_text, fonts: dict = None):
  268. fonts = fonts or {}
  269. body_font = fonts.get('body_font', '微软雅黑')
  270. replacement = (
  271. _format_kpi_value_for_placeholder(new_text)
  272. if re_module.fullmatch(r'\{kpi\d+_value\}', placeholder)
  273. else str(new_text)
  274. )
  275. # Gather aliases for this placeholder
  276. aliases = PLACEHOLDER_ALIASES.get(placeholder, [])
  277. targets = [placeholder] + [a for a in aliases if a != placeholder]
  278. for shape in slide.shapes:
  279. if not shape.has_text_frame:
  280. continue
  281. for para in shape.text_frame.paragraphs:
  282. for target in targets:
  283. if target in para.text:
  284. para.text = para.text.replace(target, replacement)
  285. for run in para.runs:
  286. run.font.name = body_font
  287. break # only replace once per paragraph
  288. def _replace_all_placeholders(slide, mapping: dict, fonts: dict = None):
  289. for placeholder, new_text in mapping.items():
  290. _replace_placeholder(slide, placeholder, new_text, fonts)
  291. def _remove_shape(shape):
  292. """Remove a python-pptx shape from its parent tree."""
  293. el = shape.element
  294. el.getparent().remove(el)
  295. def _remove_slide(prs, slide):
  296. """Remove a slide from a presentation by its rId."""
  297. try:
  298. for i, s in enumerate(prs.slides):
  299. if s == slide:
  300. rId = prs.slides._sldIdLst[i].rId
  301. prs.part.drop_rel(rId)
  302. del prs.slides._sldIdLst[i]
  303. return True
  304. except Exception:
  305. pass
  306. return False
  307. def _safe_auto_shape_type(shape):
  308. try:
  309. return shape.auto_shape_type
  310. except (AttributeError, ValueError):
  311. return None
  312. def _remove_empty_cover_kpi_placeholders(slide):
  313. """
  314. Remove template KPI cards when generic cover data does not provide values.
  315. This prevents empty rounded rectangles from staying on the cover.
  316. """
  317. kpi_pattern = re_module.compile(r'\{kpi\d+_(label|value)\}')
  318. placeholder_shapes = [
  319. shape for shape in slide.shapes
  320. if shape.has_text_frame and kpi_pattern.search(shape.text_frame.text or '')
  321. ]
  322. if not placeholder_shapes:
  323. return
  324. x_min = min(int(shape.left) for shape in placeholder_shapes)
  325. x_max = max(int(shape.left) + int(shape.width) for shape in placeholder_shapes)
  326. y_min = min(int(shape.top) for shape in placeholder_shapes)
  327. y_max = max(int(shape.top) + int(shape.height) for shape in placeholder_shapes)
  328. pad = Emu(220000)
  329. to_remove = []
  330. for shape in slide.shapes:
  331. sx = int(shape.left)
  332. sy = int(shape.top)
  333. sw = int(shape.width)
  334. sh = int(shape.height)
  335. in_region = (
  336. sx >= x_min - pad and sx + sw <= x_max + pad and
  337. sy >= y_min - pad and sy + sh <= y_max + pad
  338. )
  339. is_text_placeholder = shape in placeholder_shapes
  340. is_empty_kpi_card = (
  341. in_region and
  342. _safe_auto_shape_type(shape) == MSO_SHAPE.ROUNDED_RECTANGLE
  343. )
  344. if is_text_placeholder or is_empty_kpi_card:
  345. to_remove.append(shape)
  346. for shape in to_remove:
  347. _remove_shape(shape)
  348. # ==============================================================================
  349. # NAVIGATION TABS
  350. # ==============================================================================
  351. def _add_nav_tabs(slide, tabs, active_index=0, slide_width=None,
  352. fonts=None, colors=None,
  353. tab_y=Emu(254000), tab_h=Emu(762000), underline_h=Emu(127000)):
  354. colors = colors or {}
  355. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  356. C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
  357. if slide_width is None:
  358. slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx')
  359. slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000)
  360. n = len(tabs)
  361. tab_w = Emu(int(slide_width) // n)
  362. for i, label in enumerate(tabs):
  363. x = Emu(i * int(tab_w))
  364. box = slide.shapes.add_textbox(x, tab_y, tab_w, tab_h)
  365. p = box.text_frame.paragraphs[0]
  366. p.text = label
  367. p.font.size = Pt(11)
  368. p.font.name = '微软雅黑'
  369. p.font.color.rgb = C_PRIMARY if i == active_index else C_TEXT_GRAY
  370. p.alignment = PP_ALIGN.CENTER
  371. if i == active_index:
  372. line = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, Emu(457200), tab_w, underline_h)
  373. line.fill.solid()
  374. line.fill.fore_color.rgb = C_PRIMARY
  375. line.line.fill.background()
  376. # ==============================================================================
  377. # KPI CARDS
  378. # ==============================================================================
  379. def _add_kpi_cards(slide, kpis, start_x=Emu(762000), start_y=Emu(1651000), fonts=None, colors=None):
  380. fonts = fonts or {}
  381. body_font = fonts.get("body_font", "微软雅黑")
  382. number_font = fonts.get("number_font", "Arial")
  383. colors = colors or {}
  384. C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7))
  385. C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
  386. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  387. positions = [
  388. (start_x, start_y),
  389. (Emu(5778500), start_y),
  390. (Emu(10795000), start_y),
  391. (start_x, Emu(start_y + 3429000)),
  392. (Emu(5778500), Emu(start_y + 3429000)),
  393. (Emu(10795000), Emu(start_y + 3429000)),
  394. ]
  395. for i, kpi in enumerate(kpis[:6]):
  396. if i >= len(positions):
  397. break
  398. x, y = positions[i]
  399. w, h = Emu(4699000), Emu(3048000)
  400. card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, x, y, w, h)
  401. card.fill.solid()
  402. card.fill.fore_color.rgb = C_CARD_BG
  403. card.line.fill.background()
  404. # Label
  405. lbl = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 228600), Emu(2540000), Emu(406400))
  406. p = lbl.text_frame.paragraphs[0]
  407. p.text = kpi.get('label', '')
  408. p.font.size = Pt(14)
  409. p.font.color.rgb = C_TEXT_GRAY
  410. p.font.name = '微软雅黑'
  411. # Value
  412. val = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 762000), Emu(2540000), Emu(698500))
  413. p = val.text_frame.paragraphs[0]
  414. p.text = str(kpi.get('value', ''))
  415. p.font.size = Pt(36)
  416. p.font.bold = True
  417. p.font.color.rgb = C_PRIMARY
  418. p.font.name = 'Arial'
  419. # Unit
  420. unit = kpi.get('unit', '')
  421. if unit:
  422. ubox = slide.shapes.add_textbox(Emu(x + 3048000), Emu(y + 1016000), Emu(508000), Emu(381000))
  423. p = ubox.text_frame.paragraphs[0]
  424. p.text = unit
  425. p.font.size = Pt(14)
  426. p.font.color.rgb = C_TEXT_GRAY
  427. p.font.name = '微软雅黑'
  428. # Change badge
  429. chg = kpi.get('change', '')
  430. if chg:
  431. cbox = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 1778000), Emu(4064000), Emu(304800))
  432. p = cbox.text_frame.paragraphs[0]
  433. p.text = chg
  434. p.font.size = Pt(12)
  435. chg_str = str(chg)
  436. is_positive = chg_str.startswith('+') or any(k in chg_str for k in ['↑', '提升', '增长', '上调', '增加', '大幅', '好', '突破', '达成', '优化'])
  437. is_negative = chg_str.startswith('-') or any(k in chg_str for k in ['↓', '下滑', '下降', '减少', '回落', '滞后', '堆积', '阻塞', '缺口', '延迟'])
  438. if is_negative:
  439. p.font.color.rgb = C_RED
  440. elif is_positive:
  441. p.font.color.rgb = C_GREEN
  442. else:
  443. p.font.color.rgb = C_TEXT_GRAY
  444. p.font.name = '微软雅黑'
  445. # Sub note with semantic background color tag (e.g. "日均51笔")
  446. sub = kpi.get('sub', '')
  447. if sub:
  448. sub_text = _truncate_text(sub, 20)
  449. tag_color = _sentiment_color(sub_text)
  450. tag_x = Emu(x + 508000)
  451. tag_y = Emu(y + 2159000)
  452. tag_w = Emu(min(len(sub_text) * 220000 + 400000, 3600000))
  453. tag_h = Emu(304800)
  454. if tag_color:
  455. tag_bg = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, tag_x, tag_y, tag_w, tag_h)
  456. tag_bg.fill.solid()
  457. tag_bg.fill.fore_color.rgb = tag_color
  458. tag_bg.line.fill.background()
  459. sbox = slide.shapes.add_textbox(tag_x, tag_y, tag_w, tag_h)
  460. p = sbox.text_frame.paragraphs[0]
  461. p.text = sub_text
  462. p.font.size = Pt(11)
  463. p.font.color.rgb = C_TEXT_GRAY
  464. p.font.name = '微软雅黑'
  465. p.alignment = PP_ALIGN.CENTER
  466. def _add_compact_kpi_cards(slide, kpis, start_x=Emu(CONTENT_LEFT), start_y=Emu(1651000),
  467. fonts=None, colors=None,
  468. max_cols=3, card_h=Emu(1780000), gap_x=Emu(254000),
  469. gap_y=Emu(254000)):
  470. colors = colors or {}
  471. C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7))
  472. C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
  473. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  474. """Draw compact KPI cards so generic overview pages preserve room for insight text."""
  475. if not kpis:
  476. return 0
  477. content_w = SLIDE_WIDTH - 2 * CONTENT_LEFT
  478. cols = min(max_cols, max(1, len(kpis)))
  479. card_w = int((content_w - (cols - 1) * int(gap_x)) / cols)
  480. rows = (len(kpis) + cols - 1) // cols
  481. for i, kpi in enumerate(kpis):
  482. row = i // cols
  483. col = i % cols
  484. x = int(start_x) + col * (card_w + int(gap_x))
  485. y = int(start_y) + row * (int(card_h) + int(gap_y))
  486. card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, Emu(x), Emu(y), Emu(card_w), card_h)
  487. card.fill.solid()
  488. card.fill.fore_color.rgb = C_CARD_BG
  489. card.line.fill.background()
  490. label = _truncate_text(kpi.get('label', ''), 14)
  491. lbl = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 180000), Emu(card_w - 560000), Emu(330000))
  492. p = lbl.text_frame.paragraphs[0]
  493. p.text = label
  494. p.font.size = Pt(11)
  495. p.font.color.rgb = C_TEXT_GRAY
  496. p.font.name = '微软雅黑'
  497. value = _truncate_text(str(kpi.get('value', '')), 16)
  498. val = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 570000), Emu(card_w - 1000000), Emu(560000))
  499. p = val.text_frame.paragraphs[0]
  500. p.text = value
  501. p.font.size = Pt(24 if len(value) <= 10 else 20)
  502. p.font.bold = True
  503. p.font.color.rgb = C_PRIMARY
  504. p.font.name = 'Arial'
  505. unit = kpi.get('unit', '')
  506. if unit:
  507. ubox = slide.shapes.add_textbox(Emu(x + card_w - 820000), Emu(y + 710000), Emu(540000), Emu(330000))
  508. p = ubox.text_frame.paragraphs[0]
  509. p.text = _truncate_text(str(unit), 4)
  510. p.font.size = Pt(10)
  511. p.font.color.rgb = C_TEXT_GRAY
  512. p.font.name = '微软雅黑'
  513. sub_text = kpi.get('sub') or kpi.get('change') or '核心指标'
  514. sub = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 1230000), Emu(card_w - 560000), Emu(330000))
  515. p = sub.text_frame.paragraphs[0]
  516. p.text = _truncate_text(str(sub_text), 24)
  517. p.font.size = Pt(9)
  518. p.font.color.rgb = C_TEXT_GRAY
  519. p.font.name = '微软雅黑'
  520. return int(start_y) + rows * int(card_h) + (rows - 1) * int(gap_y)
  521. # ==============================================================================
  522. # TEXT BLOCKS
  523. # ==============================================================================
  524. def _add_text_block(slide, title, body, left, top, width, height,
  525. fonts=None, colors=None,
  526. title_size=Pt(14), body_size=Pt(11), line_space=Pt(6)):
  527. colors = colors or {}
  528. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  529. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  530. """Single text box with title + body."""
  531. box = slide.shapes.add_textbox(left, top, width, height)
  532. tf = box.text_frame
  533. tf.word_wrap = True
  534. p = tf.paragraphs[0]
  535. p.text = title
  536. p.font.size = title_size
  537. p.font.bold = True
  538. p.font.color.rgb = C_PRIMARY if title else C_TEXT
  539. p.font.name = '微软雅黑'
  540. if body:
  541. p2 = tf.add_paragraph()
  542. p2.text = body
  543. p2.font.size = body_size
  544. p2.font.color.rgb = C_TEXT
  545. p2.font.name = '微软雅黑'
  546. p2.space_before = line_space
  547. p2.line_spacing = 1.3
  548. def _estimate_text_height(items, title_size_pt, body_size_pt, width_emu,
  549. line_spacing=1.15, title_extra=1.3):
  550. """Estimate rendered text height in EMU for adaptive font sizing."""
  551. width_pt = width_emu / 12700.0
  552. chars_per_line_body = max(10, int(width_pt / (body_size_pt * 1.15)))
  553. chars_per_line_title = max(10, int(width_pt / (title_size_pt * 1.15)))
  554. line_height_body = int(body_size_pt * line_spacing * 12700)
  555. line_height_title = int(title_size_pt * title_extra * 12700)
  556. total = 0
  557. for item in items:
  558. title = item.get('title', '')
  559. content = item.get('content', '')
  560. title_lines = max(1, (len(title) + chars_per_line_title - 1) // chars_per_line_title)
  561. content_lines = max(1, (len(content) + chars_per_line_body - 1) // chars_per_line_body)
  562. total += title_lines * line_height_title + content_lines * line_height_body + int(6 * 12700)
  563. return total
  564. def _add_structured_insight(slide, items, left, top, width, height,
  565. fonts=None, colors=None,
  566. title_size=Pt(12), body_size=Pt(11),
  567. max_items=None, min_body_size=Pt(9)):
  568. colors = colors or {}
  569. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  570. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  571. """
  572. High-density structured multi-paragraph insight block.
  573. items: list of {'title': str, 'content': str}
  574. Features:
  575. - No truncation; full content rendered
  576. - No max_items limit by default (render all)
  577. - Auto-shrink body font to fit within height (down to min_body_size)
  578. - Compact line spacing (1.15) to maximize density
  579. - Each bullet has emoji + bold title + normal body
  580. """
  581. if not items:
  582. return
  583. # Adaptive font sizing: shrink body_size until it fits
  584. target_height = int(height)
  585. # title_size/body_size may be EMU integers or Pt objects; normalize to pt
  586. _ts = float(title_size) / 12700.0 if float(title_size) > 1000 else float(title_size)
  587. _bs = float(body_size) / 12700.0 if float(body_size) > 1000 else float(body_size)
  588. _min_bs = float(min_body_size) / 12700.0 if float(min_body_size) > 1000 else float(min_body_size)
  589. ts_pt = _ts
  590. bs_pt = _bs
  591. min_bs_pt = _min_bs
  592. # Binary-search-like shrink to fit
  593. while bs_pt > min_bs_pt:
  594. est = _estimate_text_height(items, ts_pt, bs_pt, int(width))
  595. if est <= target_height:
  596. break
  597. bs_pt -= 0.5
  598. ts_pt = max(bs_pt + 1, ts_pt - 0.25)
  599. box = slide.shapes.add_textbox(left, top, width, height)
  600. tf = box.text_frame
  601. tf.word_wrap = True
  602. first = True
  603. for item in items[:max_items] if max_items else items:
  604. if not first:
  605. spacer = tf.add_paragraph()
  606. spacer.text = ''
  607. spacer.space_before = Pt(3)
  608. title = item.get('title', '')
  609. emoji = _emoji_for_item(title)
  610. # Avoid double emoji
  611. if emoji and title.startswith(emoji):
  612. emoji = ''
  613. title_text = f'{emoji} {title}' if emoji else title
  614. p = tf.paragraphs[0] if first else tf.add_paragraph()
  615. p.text = title_text
  616. p.font.size = Pt(ts_pt)
  617. p.font.bold = True
  618. p.font.color.rgb = C_PRIMARY
  619. p.font.name = '微软雅黑'
  620. p.line_spacing = 1.15
  621. first = False
  622. content = item.get('content', '')
  623. if content:
  624. p2 = tf.add_paragraph()
  625. p2.text = content
  626. p2.font.size = Pt(bs_pt)
  627. p2.font.color.rgb = C_TEXT
  628. p2.font.name = '微软雅黑'
  629. p2.line_spacing = 1.15
  630. p2.space_before = Pt(1)
  631. def _ensure_min_insight_items(items, profile=None, metrics=None, min_count=2,
  632. context_label='本页'):
  633. """Guarantee enough long-form insight blocks for quality self-check."""
  634. cleaned = []
  635. for item in items or []:
  636. title = str(item.get('title', '')).strip()
  637. content = str(item.get('content', '')).strip()
  638. if title or content:
  639. cleaned.append({'title': title or '分析说明', 'content': content})
  640. profile = profile or {}
  641. metrics = metrics or {}
  642. total_rows = profile.get('total_rows', 0)
  643. numeric_count = len(profile.get('numeric_columns', []) or [])
  644. category_count = len(profile.get('category_columns', []) or [])
  645. fallback_pool = [
  646. {
  647. 'title': f'{context_label}数据基础',
  648. 'content': f'本页基于当前数据画像进行归纳,覆盖 {total_rows or "若干"} 条记录、'
  649. f'{numeric_count} 个数值指标和 {category_count} 个分类维度。'
  650. f'当原始数据字段较少或业务指标尚未形成充分拆解时,报告优先呈现已经确认的核心指标,'
  651. f'并将可验证的数据范围、维度覆盖和后续分析口径写入页面,避免出现空白页或模板占位内容。',
  652. },
  653. {
  654. 'title': f'{context_label}行动建议',
  655. 'content': f'建议围绕已确认的核心指标建立持续跟踪机制:先核对指标口径与数据字段映射,'
  656. f'再按时间、区域、部门或客户等维度拆解异常变化,最后将发现转化为责任人、截止时间和复盘频率明确的行动项。'
  657. f'如果后续补充历史同期或目标值数据,可进一步增加同比、环比和达成率判断。',
  658. },
  659. {
  660. 'title': f'{context_label}风险提示',
  661. 'content': f'若数据源存在缺失值、合并表头、人工备注列或统计口径变化,自动生成的结论需要结合业务确认进行复核。'
  662. f'建议在报告发布前重点检查核心指标是否全部出现、图表数值是否与原表一致、长文本是否仍在页面安全区域内,'
  663. f'以保证美观度和决策可信度同时达标。',
  664. },
  665. ]
  666. used_titles = {item['title'] for item in cleaned}
  667. for fallback in fallback_pool:
  668. if len(cleaned) >= min_count:
  669. break
  670. if fallback['title'] not in used_titles:
  671. cleaned.append(fallback)
  672. used_titles.add(fallback['title'])
  673. return cleaned
  674. # ==============================================================================
  675. # ALERT / ACTION / ISSUE / GOAL CARDS
  676. # ==============================================================================
  677. def _add_alert_cards(slide, alerts, start_y=Emu(1651000), fonts=None, colors=None):
  678. colors = colors or {}
  679. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  680. C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44))
  681. C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31))
  682. C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B))
  683. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  684. colors = {'严重': C_RED, '警告': C_ORANGE, '关注': C_PRIMARY, '中度': C_ORANGE, '一般': C_SECONDARY}
  685. positions = [Emu(762000), Emu(5778500), Emu(10795000)]
  686. for i, alert in enumerate(alerts[:3]):
  687. x = positions[i]
  688. y = start_y
  689. lvl = alert.get('level', '关注')
  690. c = colors.get(lvl, C_PRIMARY)
  691. bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(2286000))
  692. bar.fill.solid()
  693. bar.fill.fore_color.rgb = c
  694. bar.line.fill.background()
  695. tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(4064000), Emu(406400))
  696. p = tbox.text_frame.paragraphs[0]
  697. p.text = alert.get('title', '')
  698. p.font.size = Pt(15)
  699. p.font.bold = True
  700. p.font.color.rgb = C_TEXT
  701. p.font.name = '微软雅黑'
  702. dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 762000), Emu(4064000), Emu(1270000))
  703. tf = dbox.text_frame
  704. tf.word_wrap = True
  705. p = tf.paragraphs[0]
  706. p.text = alert.get('detail', '')
  707. p.font.size = Pt(11)
  708. p.font.color.rgb = C_TEXT
  709. p.font.name = '微软雅黑'
  710. def _add_action_cards(slide, actions, start_y=Emu(2540000), fonts=None, colors=None):
  711. colors = colors or {}
  712. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  713. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  714. positions = [Emu(762000), Emu(5778500), Emu(10795000)]
  715. for i, act in enumerate(actions[:3]):
  716. x = positions[i]
  717. y = start_y
  718. bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(406400))
  719. bar.fill.solid()
  720. bar.fill.fore_color.rgb = C_PRIMARY
  721. bar.line.fill.background()
  722. tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 952500), Emu(4064000), Emu(406400))
  723. p = tbox.text_frame.paragraphs[0]
  724. p.text = act.get('title', '')
  725. p.font.size = Pt(17)
  726. p.font.bold = True
  727. p.font.color.rgb = C_TEXT
  728. p.font.name = '微软雅黑'
  729. dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1524000), Emu(4064000), Emu(3429000))
  730. tf = dbox.text_frame
  731. tf.word_wrap = True
  732. p = tf.paragraphs[0]
  733. p.text = act.get('detail', '')
  734. p.font.size = Pt(11)
  735. p.font.color.rgb = C_TEXT
  736. p.font.name = '微软雅黑'
  737. p.line_spacing = 1.3
  738. def _add_issue_cards(slide, issues, start_y=Emu(1524000), fonts=None, colors=None):
  739. colors = colors or {}
  740. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  741. C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44))
  742. C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31))
  743. C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B))
  744. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  745. colors = {'严重': C_RED, '中度': C_ORANGE, '轻度': C_PRIMARY, '一般': C_SECONDARY}
  746. for i, issue in enumerate(issues[:3]):
  747. x = Emu(762000)
  748. y = Emu(int(start_y) + i * (1778000 + 254000))
  749. sev = issue.get('severity', '中度')
  750. c = colors.get(sev, C_ORANGE)
  751. bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(1778000))
  752. bar.fill.solid()
  753. bar.fill.fore_color.rgb = c
  754. bar.line.fill.background()
  755. sbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(660400), Emu(304800))
  756. p = sbox.text_frame.paragraphs[0]
  757. p.text = sev
  758. p.font.size = Pt(11)
  759. p.font.bold = True
  760. p.font.color.rgb = c
  761. p.font.name = '微软雅黑'
  762. tbox = slide.shapes.add_textbox(Emu(x + 1778000), Emu(y + 228600), Emu(13462000), Emu(355600))
  763. p = tbox.text_frame.paragraphs[0]
  764. p.text = issue.get('title', '')
  765. p.font.size = Pt(13)
  766. p.font.bold = True
  767. p.font.color.rgb = C_TEXT
  768. p.font.name = '微软雅黑'
  769. dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 698500), Emu(14224000), Emu(355600))
  770. p = dbox.text_frame.paragraphs[0]
  771. p.text = issue.get('detail', '')
  772. p.font.size = Pt(11)
  773. p.font.color.rgb = C_TEXT
  774. p.font.name = '微软雅黑'
  775. abox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1193800), Emu(14224000), Emu(609600))
  776. tf = abox.text_frame
  777. tf.word_wrap = True
  778. p = tf.paragraphs[0]
  779. p.text = f"建议措施:{issue.get('action', '')}"
  780. p.font.size = Pt(11)
  781. p.font.color.rgb = C_TEXT_GRAY
  782. p.font.name = '微软雅黑'
  783. def _add_goal_cards(slide, goals, start_y=Emu(1524000), fonts=None, colors=None):
  784. colors = colors or {}
  785. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  786. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  787. C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
  788. sy = int(start_y)
  789. positions = [
  790. (Emu(762000), Emu(sy)),
  791. (Emu(8318500), Emu(sy)),
  792. (Emu(762000), Emu(sy + 1879600)),
  793. (Emu(8318500), Emu(sy + 1879600)),
  794. ]
  795. icon_chars = ['🎯', '💰', '🚀', '⚡']
  796. for i, goal in enumerate(goals[:4]):
  797. x, y = positions[i]
  798. gid = goal.get('id', f'G{i+1}')
  799. gbox = slide.shapes.add_textbox(x, Emu(y + 101600), Emu(635000), Emu(355600))
  800. p = gbox.text_frame.paragraphs[0]
  801. p.text = f"{icon_chars[i % len(icon_chars)]} {gid}"
  802. p.font.size = Pt(16)
  803. p.font.bold = True
  804. p.font.color.rgb = C_PRIMARY
  805. p.font.name = 'Arial'
  806. tbox = slide.shapes.add_textbox(Emu(x + 863600), Emu(y + 101600), Emu(6096000), Emu(355600))
  807. p = tbox.text_frame.paragraphs[0]
  808. p.text = goal.get('title', '')
  809. p.font.size = Pt(14)
  810. p.font.bold = True
  811. p.font.color.rgb = C_TEXT
  812. p.font.name = '微软雅黑'
  813. dbox = slide.shapes.add_textbox(Emu(x + 228600), Emu(y + 571500), Emu(6731000), Emu(863600))
  814. tf = dbox.text_frame
  815. tf.word_wrap = True
  816. p = tf.paragraphs[0]
  817. p.text = goal.get('detail', '')
  818. p.font.size = Pt(11)
  819. p.font.color.rgb = C_TEXT_GRAY
  820. p.font.name = '微软雅黑'
  821. p.line_spacing = 1.3
  822. def _add_summary_text(slide, text, left=Emu(1016000), top=Emu(5435600), width=Emu(14224000), height=Emu(1270000), fonts=None, colors=None):
  823. colors = colors or {}
  824. C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
  825. box = slide.shapes.add_textbox(left, top, width, height)
  826. tf = box.text_frame
  827. tf.word_wrap = True
  828. p = tf.paragraphs[0]
  829. p.text = text
  830. p.font.size = Pt(12)
  831. p.font.color.rgb = C_TEXT
  832. p.font.name = '微软雅黑'
  833. p.line_spacing = 1.3
  834. # ==============================================================================
  835. # TEXT / LAYOUT HELPERS
  836. # ==============================================================================
  837. def _truncate_text(text, max_chars=60):
  838. """Truncate text to max_chars, appending '...' if truncated."""
  839. if not text:
  840. return text
  841. if len(text) > max_chars:
  842. return text[:max_chars - 1] + '...'
  843. return text
  844. def _format_kpi_value_for_placeholder(value, max_chars=16):
  845. """
  846. KPI value placeholders are fixed-size number slots. If upstream passes a
  847. category list, compact it to a count instead of letting it overflow.
  848. """
  849. if value is None:
  850. return ''
  851. text = str(value).strip()
  852. if len(text) <= max_chars:
  853. return text
  854. list_text = text.strip().strip('[]()(){}')
  855. tokens = [
  856. token.strip().strip("'\"“”‘’")
  857. for token in re_module.split(r'[、,,;;\n/]+', list_text)
  858. ]
  859. tokens = [token for token in tokens if token]
  860. if len(tokens) >= 3:
  861. return f'{len(tokens)}项'
  862. return _truncate_text(text, max_chars)
  863. def _sentiment_color(text):
  864. """Return a light background color based on text sentiment."""
  865. if not text:
  866. return None
  867. text = str(text)
  868. positive_words = ['提升', '增长', '上调', '增加', '高', '好', '大幅', '冲刺', '领跑', '上升', '扩大', '优化', '改善', '突破', '达成']
  869. negative_words = ['下滑', '下降', '减少', '低', '差', '回落', '下滑', '滞后', '堆积', '阻塞', '缺口', '延迟', '超期', '逾期', '风险', '警告']
  870. pos_score = sum(1 for w in positive_words if w in text)
  871. neg_score = sum(1 for w in negative_words if w in text)
  872. if neg_score > pos_score:
  873. return RGBColor(0xFE, 0xE2, 0xE2) # light red ~ #EF444420
  874. if pos_score > neg_score:
  875. return RGBColor(0xD1, 0xFA, 0xE5) # light green ~ #10B98120
  876. return None
  877. import re
  878. def _emoji_for_item(title):
  879. """Return an emoji prefix based on title keywords."""
  880. if not title:
  881. return '📈'
  882. title = str(title)
  883. # Skip if title already starts with an emoji
  884. if re.match(r'^[\U0001F300-\U0001F9FF\u2600-\u26FF\u2700-\u27BF]', title):
  885. return ''
  886. if any(k in title for k in ['风险', '警告', '关注', '下滑', '下降', '延迟', '超期', '缺口', '阻塞']):
  887. return '⚠️'
  888. if any(k in title for k in ['建议', '措施', '行动', '协调', '对接']):
  889. return '💡'
  890. if any(k in title for k in ['目标', '计划', '冲刺', '展望', '聚焦']):
  891. return '🎯'
  892. if any(k in title for k in ['增长', '上升', '提升', '峰值', '领跑', '突破', '活跃', '好转']):
  893. return '📈'
  894. return '💡'
  895. def _add_footer_if_missing(slide, footer_text, slide_width=None, fonts=None, colors=None):
  896. colors = colors or {}
  897. C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
  898. C_WHITE = colors.get('white', RGBColor(0xFF, 0xFF, 0xFF))
  899. if slide_width is None:
  900. slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx')
  901. slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000)
  902. # Check if footer already exists
  903. has_footer = False
  904. for shape in slide.shapes:
  905. if shape.has_text_frame and '数据来源' in shape.text_frame.text:
  906. has_footer = True
  907. break
  908. if has_footer:
  909. return
  910. bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, Emu(8824000), slide_width, Emu(320000))
  911. bar.fill.solid()
  912. bar.fill.fore_color.rgb = C_PRIMARY
  913. bar.line.fill.background()
  914. box = slide.shapes.add_textbox(Emu(762000), Emu(8824000), Emu(14000000), Emu(320000))
  915. p = box.text_frame.paragraphs[0]
  916. p.text = footer_text
  917. p.font.size = Pt(10)
  918. p.font.color.rgb = C_WHITE
  919. p.font.name = '微软雅黑'
  920. def _ensure_word_wrap_all(slide, fonts: dict = None):
  921. """Enable word_wrap on all text frames in a slide."""
  922. fonts = fonts or {}
  923. body_font = fonts.get('body_font', '微软雅黑')
  924. for shape in slide.shapes:
  925. if shape.has_text_frame:
  926. shape.text_frame.word_wrap = True
  927. for para in shape.text_frame.paragraphs:
  928. for run in para.runs:
  929. run.font.name = body_font
  930. # ==============================================================================
  931. # MATH HELPERS
  932. # ==============================================================================
  933. def _pct_val(curr, prev):
  934. if prev and prev != 0:
  935. return (curr - prev) / prev * 100
  936. return None
  937. def _format_pct(pct, with_sign=True, suffix='%', zero_suffix=''):
  938. """Safely format a percentage value. Returns '—' if pct is None."""
  939. if pct is None:
  940. return '—'
  941. sign = '+' if with_sign and pct >= 0 else ''
  942. return f"{sign}{pct:.1f}{suffix}{zero_suffix}"
  943. def _pct_str(curr, prev):
  944. if prev and prev != 0:
  945. pct = round((curr - prev) / prev * 100, 1)
  946. sign = '+' if pct >= 0 else ''
  947. return f"{sign}{pct}% vs 上期"
  948. return "—"
  949. def _safe_div(a, b):
  950. return round(a / b, 1) if b else 0
  951. # ==============================================================================
  952. # DYNAMIC / UNIVERSAL REPORT BUILDER
  953. # ==============================================================================
  954. def build_report(data_file: str, config: ReportConfig, output_path: str) -> str:
  955. master_path = _resolve_master_template(config)
  956. prs = Presentation(master_path)
  957. original_slide_count = len(prs.slides)
  958. df = load_generic_excel(data_file)
  959. if config.require_six_confirmations:
  960. confirmation_issues = validate_six_confirmations(config, list(df.columns))
  961. if confirmation_issues:
  962. raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues))
  963. data_profile = config.data_profiling or {}
  964. # Resolve template profile and dynamic layout context
  965. template_profile = _resolve_template_profile(config)
  966. ctx = LayoutContext.from_template_profile(template_profile)
  967. colors = _resolve_colors(config, template_profile)
  968. fonts = _resolve_fonts(config, template_profile)
  969. metrics = calc_generic_metrics(df, config)
  970. content_top = template_profile.get_content_top('content')
  971. total_pages = len([p for p in config.pages if p.selected])
  972. if total_pages == 0:
  973. total_pages = len(config.pages)
  974. for page_idx, page_def in enumerate(config.pages):
  975. if not page_def.selected:
  976. continue
  977. page_num = page_idx + 1
  978. if page_def.page_type == 'cover':
  979. _build_cover_page(prs, config, colors, fonts, template_profile)
  980. elif page_def.page_type == 'toc':
  981. _build_toc_page(prs, config, colors, fonts, template_profile)
  982. elif page_def.page_type == 'kpi_overview':
  983. _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df, data_profile, ctx)
  984. elif page_def.page_type == 'trend':
  985. if not _build_trend_page(prs, config, df, data_profile, colors, fonts, content_top, ctx):
  986. _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  987. elif page_def.page_type == 'distribution':
  988. if not _build_distribution_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx):
  989. _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  990. elif page_def.page_type == 'ranking':
  991. if not _build_ranking_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx):
  992. _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  993. elif page_def.page_type == 'summary':
  994. _build_summary_page(prs, config, metrics, data_profile, colors, fonts, content_top, page_def, ctx)
  995. elif _is_forecast_page_type(page_def.page_type):
  996. _build_forecast_page(prs, config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx)
  997. elif page_def.page_type == 'end':
  998. _build_end_page(prs, config, colors, fonts, template_profile)
  999. else:
  1000. raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})')
  1001. for slide in prs.slides:
  1002. _ensure_word_wrap_all(slide, fonts)
  1003. _delete_template_slides(prs, original_slide_count)
  1004. prs.save(output_path)
  1005. print(f"Report saved: {output_path}")
  1006. return output_path
  1007. def quality_assured_build(data_file: str, config: ReportConfig,
  1008. output_path: str) -> tuple:
  1009. if config.require_six_confirmations:
  1010. df = load_generic_excel(data_file)
  1011. confirmation_issues = validate_six_confirmations(config, list(df.columns))
  1012. if confirmation_issues:
  1013. raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues))
  1014. template_profile = _resolve_template_profile(config)
  1015. ctx = LayoutContext.from_template_profile(template_profile)
  1016. colors = _resolve_colors(config, template_profile)
  1017. inspector = QualityInspector(colors, ctx)
  1018. return inspector.quality_assured_build(
  1019. build_fn=lambda d, c: _build_without_save(d, c, config),
  1020. data=data_file,
  1021. config=config,
  1022. output_path=output_path,
  1023. )
  1024. def _build_without_save(data_file, temp_config, original_config):
  1025. from pptx import Presentation as Prs
  1026. prs = Prs(_resolve_master_template(original_config))
  1027. original_slide_count = len(prs.slides)
  1028. df = load_generic_excel(data_file)
  1029. data_profile = original_config.data_profiling or {}
  1030. template_profile = _resolve_template_profile(original_config)
  1031. ctx = LayoutContext.from_template_profile(template_profile)
  1032. colors = _resolve_colors(original_config, template_profile)
  1033. fonts = _resolve_fonts(original_config, template_profile)
  1034. metrics = calc_generic_metrics(df, original_config)
  1035. content_top = template_profile.get_content_top('content')
  1036. for page_def in original_config.pages:
  1037. if not page_def.selected:
  1038. continue
  1039. if page_def.page_type == 'cover':
  1040. _build_cover_page(prs, original_config, colors, fonts, template_profile)
  1041. elif page_def.page_type == 'kpi_overview':
  1042. _build_kpi_overview_page(prs, original_config, metrics, colors, fonts, content_top, df, data_profile, ctx)
  1043. elif page_def.page_type == 'trend':
  1044. if not _build_trend_page(prs, original_config, df, data_profile, colors, fonts, content_top, ctx):
  1045. _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  1046. elif page_def.page_type == 'distribution':
  1047. if not _build_distribution_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx):
  1048. _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  1049. elif page_def.page_type == 'ranking':
  1050. if not _build_ranking_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx):
  1051. _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
  1052. elif page_def.page_type == 'summary':
  1053. _build_summary_page(prs, original_config, metrics, data_profile, colors, fonts, content_top, page_def, ctx)
  1054. elif _is_forecast_page_type(page_def.page_type):
  1055. _build_forecast_page(prs, original_config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx)
  1056. elif page_def.page_type == 'end':
  1057. _build_end_page(prs, original_config, colors, fonts, template_profile)
  1058. elif page_def.page_type == 'toc':
  1059. _build_toc_page(prs, original_config, colors, fonts, template_profile)
  1060. else:
  1061. raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})')
  1062. for slide in prs.slides:
  1063. _ensure_word_wrap_all(slide, fonts)
  1064. _delete_template_slides(prs, original_slide_count)
  1065. return prs
  1066. def _build_cover_page(prs, config, colors, fonts, template_profile):
  1067. """Build cover page from template.
  1068. Two-pass strategy:
  1069. 1. Pattern-based: _replace_all_placeholders() for templates with
  1070. {report_title}/{date}/{department} text markers in placeholders.
  1071. 2. Idx-based fallback: for templates where placeholders are empty or
  1072. have template-default text (e.g. Wuling's 封面半版), fill by
  1073. placeholder_format.idx directly.
  1074. IMPORTANT — text color pitfall:
  1075. Many template covers have a decorative gradient/banner that covers only
  1076. the top portion of the slide. The TITLE placeholder may be positioned
  1077. BELOW the colored area (on white background). Using white/light text
  1078. on a white background makes it invisible.
  1079. → Always use dark text (C_PRIMARY) in the idx fallback to avoid this.
  1080. """
  1081. slide = _duplicate_master_slide(prs, template_profile, 'cover', keep_shapes=True)
  1082. # ---- Pass 1: pattern-based replacement ----
  1083. _replace_all_placeholders(slide, {
  1084. '{report_title}': config.title,
  1085. '{report_type}': '数据报告',
  1086. '{date}': config.period_str or config.date_range[0].strftime('%Y年%m月%d日'),
  1087. '{department}': config.source_label,
  1088. '{period}': config.period_str,
  1089. '{gen_time}': datetime.now().strftime('%Y-%m-%d %H:%M'),
  1090. }, fonts)
  1091. _remove_empty_cover_kpi_placeholders(slide)
  1092. # ---- Pass 2: idx-based fallback ----
  1093. # If the template has no {report_title} etc. text markers, pass 1 is a
  1094. # no-op. Detect unfilled placeholders by idx and fill them directly.
  1095. # Common idx mappings (from OOXML spec + Wuling/real-world templates):
  1096. # idx=0 → TITLE placeholder → report title
  1097. # idx=10 → SUBTITLE placeholder → date / subtitle (if idx=21 absent)
  1098. # idx=21 → BODY quarter-size → date / period string
  1099. # idx=22 → BODY quarter-size → department / source
  1100. TEMPLATE_DEFAULT_PATTERNS = {
  1101. '单击此处编辑母版标题样式', '单击此处添加标题',
  1102. '单击此处编辑母版文本样式', '单击此处添加文本',
  1103. '单击此处添加副标题',
  1104. }
  1105. _colors = colors or {}
  1106. _C_PRIMARY = _colors.get('primary', C_PRIMARY)
  1107. _C_TEXT_GRAY = _colors.get('text_gray', C_TEXT_GRAY)
  1108. _title_font = (fonts or {}).get('title_font', '微软雅黑')
  1109. _body_font = (fonts or {}).get('body_font', '微软雅黑')
  1110. date_text = config.period_str or (
  1111. config.date_range[0].strftime('%Y年%m月') if config.date_range else ''
  1112. )
  1113. dept_text = config.source_label or ''
  1114. filled_title = False
  1115. filled_date = False
  1116. for shape in slide.shapes:
  1117. if not shape.is_placeholder or not shape.has_text_frame:
  1118. continue
  1119. ph = shape.placeholder_format
  1120. tf = shape.text_frame
  1121. current_text = tf.text.strip()
  1122. is_unfilled = (
  1123. not current_text
  1124. or current_text in TEMPLATE_DEFAULT_PATTERNS
  1125. or any(tpl in current_text for tpl in TEMPLATE_DEFAULT_PATTERNS)
  1126. )
  1127. # idx=0 TITLE — report title (highest priority)
  1128. if ph.idx == 0 and (is_unfilled or not filled_title):
  1129. p = tf.paragraphs[0]
  1130. _set_para_text(p, config.title, _C_PRIMARY, Pt(36),
  1131. bold=True, font_name=_title_font)
  1132. filled_title = True
  1133. # idx=10 SUBTITLE — date (only if idx=21 was not filled)
  1134. elif ph.idx == 10 and (is_unfilled or not filled_date):
  1135. p = tf.paragraphs[0]
  1136. _set_para_text(p, date_text, _C_PRIMARY, Pt(18),
  1137. font_name=_body_font)
  1138. filled_date = True
  1139. # idx=21 BODY quarter-size — date/period
  1140. elif ph.idx == 21 and (is_unfilled or not filled_date):
  1141. p = tf.paragraphs[0]
  1142. _set_para_text(p, date_text, _C_PRIMARY, Pt(18),
  1143. font_name=_body_font)
  1144. filled_date = True
  1145. # idx=22 BODY quarter-size — department/source
  1146. elif ph.idx == 22 and is_unfilled:
  1147. p = tf.paragraphs[0]
  1148. _set_para_text(p, dept_text, _C_TEXT_GRAY, Pt(12),
  1149. font_name=_body_font)
  1150. total = len([p for p in config.pages if p.selected]) or len(config.pages)
  1151. _add_footer_if_missing(slide, f'数据来源:{config.source_label} | 1/{total}',
  1152. slide_width=prs.slide_width, colors=colors)
  1153. def _set_para_text(para, text, color, size, bold=False, font_name=None):
  1154. """Set paragraph text + formatting, reusing existing run or creating new one."""
  1155. para.text = ''
  1156. if para.runs:
  1157. run = para.runs[0]
  1158. else:
  1159. run = para.add_run()
  1160. run.text = text
  1161. run.font.color.rgb = color
  1162. run.font.size = size
  1163. run.font.bold = bold
  1164. if font_name:
  1165. run.font.name = font_name
  1166. def _build_fallback_analysis_page(prs, config, page_def, df, profile, metrics, colors, fonts, content_top, ctx=None):
  1167. """
  1168. Fallback page builder: generates analysis text from available data
  1169. when the primary page type cannot produce content (e.g. no time columns
  1170. for trend, no category columns for distribution).
  1171. Produces at least 4 deep analysis blocks with data citations.
  1172. """
  1173. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
  1174. page_title = page_def.title if page_def and page_def.title else f'{config.title}数据分析'
  1175. _replace_all_placeholders(slide, {
  1176. '{report_title}': config.title,
  1177. '{date}': config.period_str,
  1178. '{page_title}': page_title,
  1179. '{source}': config.source_label,
  1180. '{period}': '',
  1181. '{page_num}': '',
  1182. }, fonts)
  1183. num_cols = profile.get('numeric_columns', [])
  1184. cat_cols = profile.get('category_columns', [])
  1185. insight_items = []
  1186. if num_cols:
  1187. top_metric = num_cols[0]
  1188. top_name = top_metric.get('inferred_label', top_metric['column_name'])
  1189. top_vals = df[top_metric['column_name']].dropna()
  1190. if len(top_vals) > 0:
  1191. mean_val = top_vals.mean()
  1192. max_val = top_vals.max()
  1193. min_val = top_vals.min()
  1194. median_val = top_vals.median()
  1195. total_val = top_vals.sum()
  1196. insight_items.append({
  1197. 'title': f'{top_name}整体概览',
  1198. 'content': f'报告周期内,{top_name}统计数据共包含 {len(top_vals)} 条有效记录。'
  1199. f'总和为 {total_val:,.0f},平均值为 {mean_val:,.2f},中位数为 {median_val:,.2f}。'
  1200. f'最大值为 {max_val:,.2f},最小值为 {min_val:,.2f}。'
  1201. f'{"数据波动范围较大,最大值与最小值差距显著,说明不同条目间差异明显,建议深入分析极端值成因" if min_val > 0 and max_val / max(min_val, 1) > 100 else "数据整体分布较为均衡,波动性在合理范围内"}。'
  1202. f'中位数与平均值的偏差反映了数据的{"右偏分布(少数大值拉高了均值),说明存在显著头部效应" if median_val < mean_val * 0.8 else "左偏分布" if median_val > mean_val * 1.2 else "较为对称,数据呈正态分布趋势"}。',
  1203. })
  1204. insight_items.append({
  1205. 'title': f'{top_name}分段分析',
  1206. 'content': f'对 {top_name} 进行四分段统计:上四分位数(25%数据高于此值)为 {top_vals.quantile(0.75):,.2f},'
  1207. f'下四分位数(25%数据低于此值)为 {top_vals.quantile(0.25):,.2f},'
  1208. f'四分位距(IQR)为 {top_vals.quantile(0.75) - top_vals.quantile(0.25):,.2f}。'
  1209. f'{"IQR较大,数据分布较为离散,不同类别的表现差异明显,需关注尾部类别的提升空间" if (top_vals.quantile(0.75) - top_vals.quantile(0.25)) > abs(mean_val) * 0.5 else "IQR在合理范围内,数据集中度较好"}。'
  1210. f'建议按四分位将数据分为四组,重点跟踪上四分位组的表现,识别可复制的成功因素。',
  1211. })
  1212. if cat_cols and num_cols:
  1213. cat = cat_cols[0]
  1214. cat_name = cat.get('inferred_label', cat['column_name'])
  1215. num = num_cols[0]
  1216. num_name = num.get('inferred_label', num['column_name'])
  1217. cat_unique = df[cat['column_name']].dropna().nunique()
  1218. insight_items.append({
  1219. 'title': f'{cat_name}分类覆盖分析',
  1220. 'content': f'数据共覆盖 {cat_unique} 个不同的{cat_name},在 {num_name} 维度上呈现差异化分布。'
  1221. f'不同{cat_name}对整体{num_name}的贡献度各异,建议按贡献度大小将{cat_name}进行分类管理。'
  1222. f'高贡献类别应重点维护和深度挖掘,中等贡献类别需持续培育和资源投入,'
  1223. f'低贡献类别可评估其战略价值,适当调整投入节奏。建议建立分类分级管理体系,'
  1224. f'每月跟踪各类别的变化趋势和占比波动。',
  1225. })
  1226. if len(num_cols) >= 2:
  1227. num1 = num_cols[0]
  1228. num2 = num_cols[1]
  1229. ratio = df[num1['column_name']].sum() / max(df[num2['column_name']].sum(), 1)
  1230. insight_items.append({
  1231. 'title': '关键比率与效率指标',
  1232. 'content': f'{num1.get("inferred_label", num1["column_name"])}与{num2.get("inferred_label", num2["column_name"])}的比率为 {ratio:.2f},'
  1233. f'该比率是衡量业务效率的重要参考指标。'
  1234. f'{"比率处于较高水平,表明单位投入产出效率良好" if ratio > 1 else "比率偏低,单位投入的产出效益有限,存在效率提升空间"}。'
  1235. f'建议将此比率纳入定期监控指标,按月环比追踪变化趋势,'
  1236. f'并针对低比率项目制定专项提升计划,分析制约因素和可优化环节。',
  1237. })
  1238. insight_items.append({
  1239. 'title': '数据质量与代表性评估',
  1240. 'content': f'本报告基于共 {len(df)} 条记录进行分析,数据覆盖范围包括上述多个维度。'
  1241. f'建议在后续周期中持续关注数据完整性和及时性,确保分析结果准确反映业务真实情况。'
  1242. f'对于数据量较小或集中度较高的维度,应结合业务判断进行解读,避免以偏概全。'
  1243. f'同时建议补充更多维度的数据(如时间序列数据、竞品对标数据等),'
  1244. f'以支撑更全面的分析视角和更精准的决策建议。',
  1245. })
  1246. if not insight_items:
  1247. insight_items = [{
  1248. 'title': '数据总览',
  1249. 'content': f'当前数据集包含 {len(df)} 条记录,{len(df.columns)} 个字段。'
  1250. f'数值字段 {len(num_cols)} 个,分类字段 {len(cat_cols)} 个。'
  1251. f'建议结合业务场景规划具体的数据分析维度,'
  1252. f'以生成更具洞察力和指导意义的数据报告。',
  1253. }]
  1254. if num_cols and len(df) > 0:
  1255. top_col = num_cols[0]
  1256. chart_zone = get_chart_left_zone(content_top, 0.4, ctx=ctx)
  1257. text_zone = get_insight_right_zone(content_top, 0.4, ctx=ctx)
  1258. sample_vals = df[top_col['column_name']].dropna().head(10).tolist()
  1259. sample_labels = [f'记录{i+1}' for i in range(len(sample_vals))]
  1260. if sample_vals:
  1261. add_bar_chart(slide, sample_labels, sample_vals,
  1262. Emu(chart_zone.x), Emu(chart_zone.y),
  1263. Emu(chart_zone.width), Emu(chart_zone.height),
  1264. series_name=top_col.get('inferred_label', top_col['column_name']),
  1265. color=colors.get('primary'))
  1266. _add_structured_insight(slide, insight_items,
  1267. Emu(text_zone.x), Emu(text_zone.y),
  1268. Emu(text_zone.width), Emu(text_zone.height))
  1269. else:
  1270. zone = get_full_width_zone(content_top, ctx=ctx)
  1271. _add_structured_insight(slide, insight_items,
  1272. Emu(zone.x), Emu(zone.y),
  1273. Emu(zone.width), Emu(zone.height))
  1274. def _build_toc_page(prs, config, colors, fonts, template_profile):
  1275. slide = _duplicate_master_slide(prs, template_profile, 'toc', keep_shapes=True)
  1276. active_pages = [p for p in config.pages if p.selected and p.page_type not in ('cover', 'toc', 'end')]
  1277. _replace_all_placeholders(slide, {
  1278. '{report_title}': config.title,
  1279. '{date}': config.period_str,
  1280. '{page_title}': '目录',
  1281. '{source}': config.source_label,
  1282. '{period}': f'2/{len(config.pages)}',
  1283. '{page_num}': '',
  1284. }, fonts)
  1285. for i, page in enumerate(active_pages[:6], 1):
  1286. _replace_placeholder(slide, f'{{chapter{i}_title}}', page.title, fonts)
  1287. _replace_placeholder(slide, f'{{chapter{i}_desc}}', page.conclusion_title or page.title, fonts)
  1288. def _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df=None, profile=None, ctx=None):
  1289. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), 'content')
  1290. page_title = '核心指标概览'
  1291. _replace_all_placeholders(slide, {
  1292. '{report_title}': config.title,
  1293. '{date}': config.period_str,
  1294. '{page_title}': page_title,
  1295. '{source}': config.source_label,
  1296. '{period}': '',
  1297. '{page_num}': '',
  1298. }, fonts)
  1299. kpi_items = []
  1300. primary_vals = {}
  1301. all_vals = {}
  1302. for md in config.metrics:
  1303. if md.metric_type.value == 'kpi' and md.selected:
  1304. val = metrics.get(md.name, 0)
  1305. display_val = format(val, md.format_spec) if isinstance(val, (int, float)) else str(val)
  1306. kpi_items.append({
  1307. 'label': md.label,
  1308. 'value': display_val,
  1309. 'unit': md.unit,
  1310. 'change': '',
  1311. 'sub': '',
  1312. })
  1313. if md.is_primary:
  1314. primary_vals[md.label] = val
  1315. all_vals[md.label] = val
  1316. if kpi_items:
  1317. kpi_count = len(kpi_items)
  1318. if kpi_count <= 3:
  1319. _add_kpi_cards(slide, kpi_items, start_y=Emu(content_top))
  1320. else:
  1321. shown_kpis = kpi_items[:9]
  1322. compact_card_h = Emu(1780000) if len(shown_kpis) <= 6 else Emu(1600000)
  1323. kpi_bottom = _add_compact_kpi_cards(
  1324. slide,
  1325. shown_kpis,
  1326. start_y=Emu(content_top),
  1327. card_h=compact_card_h,
  1328. gap_y=Emu(220000),
  1329. )
  1330. insight_items = []
  1331. kpi_names = [m.label for m in config.metrics if m.selected]
  1332. kpi_str = "、".join(kpi_names[:6]) if kpi_names else "各指标"
  1333. if len(kpi_names) > 6:
  1334. kpi_str += f'等{len(kpi_names)}项'
  1335. primary_kpis = [m for m in config.metrics if m.is_primary and m.selected]
  1336. if not primary_kpis:
  1337. primary_kpis = [m for m in config.metrics if m.selected][:3]
  1338. kpi_detail_parts = []
  1339. for i, pk in enumerate(primary_kpis):
  1340. val = all_vals.get(pk.label, 0)
  1341. unit_str = pk.unit if pk.unit else ''
  1342. display_val = format(val, pk.format_spec) if isinstance(val, (int, float)) else str(val)
  1343. kpi_detail_parts.append(f'{pk.label}: {display_val}{unit_str}')
  1344. insight_items.append({
  1345. 'title': '核心数据概览',
  1346. 'content': f'本期报告涵盖 {kpi_str} 共 {len(kpi_names)} 项核心指标。'
  1347. f'{";".join(kpi_detail_parts[:4])}。'
  1348. f'其中{"、".join(p.label for p in primary_kpis[:3])}为本次分析的重点关注指标。'
  1349. f'建议将这些指标与历史同期数据进行纵向对比,以及与行业基准进行横向对标,以全面评估当前业务健康度。'
  1350. f'对于波动较大的指标,需深入追溯其背后的业务动因,判断是否为趋势性变化还是季节性波动。',
  1351. })
  1352. cat_cols = profile.get('category_columns', []) if profile else []
  1353. num_cols = profile.get('numeric_columns', []) if profile else []
  1354. total_rows = profile.get('total_rows', 0) if profile else 0
  1355. if cat_cols:
  1356. top_cats = [c.get('inferred_label', c.get('column_name', '')) for c in cat_cols[:3]]
  1357. cat_details = []
  1358. for c in cat_cols[:3]:
  1359. uc = c.get('unique_count', 'N/A')
  1360. cat_details.append(f'{c.get("inferred_label", c.get("column_name", ""))}({uc}类)')
  1361. insight_items.append({
  1362. 'title': '数据覆盖与维度分析',
  1363. 'content': f'数据覆盖 {total_rows:,} 条记录,包含 {", ".join(cat_details)} 等多个分析维度。'
  1364. f'丰富的维度数据支持从 {", ".join(top_cats)} 等角度进行多维度联动分析。'
  1365. f'建议关注各维度下的数据分布特征,识别高贡献或异常的分类群体,'
  1366. f'针对性地分析不同维度的表现差异,为精细化运营和数据驱动决策提供支撑。',
  1367. })
  1368. if len(config.metrics) >= 3:
  1369. compare_items = []
  1370. for a, b in zip(primary_kpis[:2], primary_kpis[1:3]):
  1371. va = all_vals.get(a.label, 0)
  1372. vb = all_vals.get(b.label, 0)
  1373. if va and vb:
  1374. ratio = round(va / vb, 2) if vb else 0
  1375. compare_items.append(f'{a.label}与{b.label}的比值为 {ratio}')
  1376. if compare_items:
  1377. insight_items.append({
  1378. 'title': '指标间关联分析',
  1379. 'content': f'{";".join(compare_items)}。通过指标间的比值关系可以发现数据的内在规律,'
  1380. f'比值异常偏离正常区间时需重点关注。建议进一步计算各指标与核心业务目标之间的相关系数,'
  1381. f'量化不同指标对业务目标的影响力排序,将有限资源聚焦在驱动型指标上。',
  1382. })
  1383. else:
  1384. insight_items.append({
  1385. 'title': '指标间关联分析',
  1386. 'content': f'本期核心指标包括 {", ".join(p.label for p in primary_kpis[:3])}。'
  1387. f'建议通过散点图或相关系数分析探索指标间的线性/非线性关系,识别是否存在协同或对冲效应。'
  1388. f'同时建议按时间序列分析各指标的周期性规律,为资源配置和预测提供依据。',
  1389. })
  1390. insight_items.append({
  1391. 'title': '关键发现与行动建议',
  1392. 'content': f'综合分析 {len(kpi_names)} 项指标,建议重点关注以下方向:'
  1393. f'(1) 定期监控核心指标的趋势变化,建立异常预警机制,当指标偏离正常区间时及时触发排查流程;'
  1394. f'(2) 深化多维度交叉分析,挖掘不同群体间的结构差异,识别增长机会和风险点;'
  1395. f'(3) 结合业务经验和外部数据,验证数据指标的准确性和合理性;'
  1396. f'(4) 将分析结论转化为可执行的具体行动项,明确责任人和时间节点,建立跟踪闭环机制。',
  1397. })
  1398. if kpi_count > 9:
  1399. extra_names = '、'.join(k['label'] for k in kpi_items[9:15])
  1400. insight_items.append({
  1401. 'title': '更多核心指标说明',
  1402. 'content': f'本页优先展示前 9 个核心指标,其余 {kpi_count - 9} 个指标(如 {extra_names})'
  1403. f'已纳入综合分析口径。建议在页面结构确认阶段将核心指标按“结果指标、过程指标、风险指标”分组,'
  1404. f'必要时拆分为多页 KPI 看板,以保证每个指标都有足够的解释空间。',
  1405. })
  1406. if kpi_count <= 3:
  1407. kpi_grid_bottom = int(content_top) + Emu(3048000)
  1408. else:
  1409. kpi_grid_bottom = max(kpi_bottom, int(content_top) + Emu(1780000))
  1410. insight_zone_y = kpi_grid_bottom + Emu(254000)
  1411. remaining_height = int(FOOTER_TOP - insight_zone_y - Emu(140000))
  1412. if remaining_height >= Emu(950000):
  1413. if kpi_count <= 3:
  1414. compact_items = insight_items[:3]
  1415. else:
  1416. compact_items = insight_items[:3] if kpi_count <= 6 else insight_items[:4]
  1417. _add_structured_insight(slide, compact_items,
  1418. Emu(CONTENT_LEFT), Emu(insight_zone_y),
  1419. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(remaining_height),
  1420. title_size=Pt(10), body_size=Pt(9), min_body_size=Pt(8))
  1421. elif kpi_count > 3:
  1422. fallback_top = max(insight_zone_y, int(FOOTER_TOP) - int(Emu(1250000)))
  1423. fallback_height = int(FOOTER_TOP - fallback_top - Emu(120000))
  1424. fallback_items = insight_items[:2]
  1425. _add_structured_insight(slide, fallback_items,
  1426. Emu(CONTENT_LEFT), Emu(fallback_top),
  1427. Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(max(fallback_height, Emu(850000))),
  1428. title_size=Pt(9), body_size=Pt(8), min_body_size=Pt(7))
  1429. def _build_trend_page(prs, config, df, profile, colors, fonts, content_top, ctx=None):
  1430. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
  1431. time_cols = profile.get('time_columns', [])
  1432. num_cols = profile.get('numeric_columns', [])
  1433. if not time_cols or not num_cols:
  1434. _remove_slide(prs, slide)
  1435. return False
  1436. time_col = time_cols[0]['column_name']
  1437. metric_col = num_cols[0]['column_name']
  1438. label = num_cols[0].get('inferred_label', metric_col)
  1439. page_title = f'{label}趋势'
  1440. _replace_all_placeholders(slide, {
  1441. '{report_title}': config.title,
  1442. '{date}': config.period_str,
  1443. '{page_title}': page_title,
  1444. '{source}': config.source_label,
  1445. '{period}': '',
  1446. '{page_num}': '',
  1447. }, fonts)
  1448. trend_data = calc_generic_trend(df, time_col, metric_col)
  1449. if trend_data.get('dates'):
  1450. chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx)
  1451. text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx)
  1452. add_line_chart(slide, trend_data['dates'], trend_data['values'],
  1453. Emu(chart_zone.x), Emu(chart_zone.y),
  1454. Emu(chart_zone.width), Emu(chart_zone.height),
  1455. series_name=label, color=colors.get('primary'))
  1456. dates = trend_data['dates']
  1457. vals = trend_data['values']
  1458. n = len(vals)
  1459. first_v, last_v = vals[0], vals[-1]
  1460. change = last_v - first_v
  1461. change_pct = round(change / first_v * 100, 1) if first_v else 0
  1462. max_v = max(vals) if vals else 0
  1463. min_v = min(vals) if vals else 0
  1464. max_idx = vals.index(max_v) if vals else 0
  1465. min_idx = vals.index(min_v) if vals else 0
  1466. peak_date = dates[max_idx] if max_idx < len(dates) else 'N/A'
  1467. trough_date = dates[min_idx] if min_idx < len(dates) else 'N/A'
  1468. direction_text = '上升' if change > 0 else '下降' if change < 0 else '平稳'
  1469. volatility = round((max_v - min_v) / (sum(vals) / n) * 100, 1) if sum(vals) else 0 if vals else 0
  1470. insight_items = [
  1471. {
  1472. 'title': f'{label}整体趋势概况',
  1473. 'content': f'在报告周期内共采集 {n} 个时间点的数据,{label}'
  1474. f'从 {dates[0]} 的 {first_v:,.0f} 变动至 {dates[-1]} 的 {last_v:,.0f},'
  1475. f'整体{direction_text}{abs(change_pct):.1f}%,{direction_text}趋势{"显著" if abs(change_pct) > 20 else "温和" if abs(change_pct) > 5 else "较为平缓"}。'
  1476. f'数据变化轨迹反映出{"持续向好的增长态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和改善的积极信号" if direction_text == "上升" else "回调盘整的阶段性特征" if direction_text == "下降" else "平稳运行的基本状态"},'
  1477. f'建议将当前趋势与业务目标和历史同期数据进行交叉对比,评估达成全年目标的可行性。如需更详尽的趋势分析,建议增加数据采集频度和时间跨度。',
  1478. },
  1479. {
  1480. 'title': '峰值与谷值分析',
  1481. 'content': f'周期内最高值出现在 {peak_date},为 {max_v:,.0f};'
  1482. f'最低值出现在 {trough_date},为 {min_v:,.0f}。'
  1483. f'极值差距 {max_v - min_v:,.0f},波动幅度 {volatility}%,'
  1484. f'{"波动显著,需关注异常节点的驱动因素,建议排查是否受节假日、促销活动、外部政策变化等因素影响" if volatility > 30 else "波动在可控范围内,但仍需对异常波动保持警觉"}{"." if volatility > 30 else ",建立异常值的快速预警和响应机制。"}',
  1485. },
  1486. {
  1487. 'title': '趋势阶段性特征',
  1488. 'content': f'前半程({dates[0]}至{dates[min(n//2, n-1)]})'
  1489. f'{"呈上升态势" if sum(vals[:n//2]) < sum(vals[n//2:]) else "呈下降态势" if sum(vals[:n//2]) > sum(vals[n//2:]) else "基本持平"},'
  1490. f'后半程均值为 {sum(vals[n//2:])/(n-n//2):,.0f}。建议结合业务事件节点深入分析拐点成因,'
  1491. f'重点关注是否存在季节性波动、周期性波动或外部冲击等结构性因素。'
  1492. f'若数据量较少,趋势解读应以业务经验为主,辅以数据验证。',
  1493. },
  1494. {
  1495. 'title': '业务启示',
  1496. 'content': f'综合趋势分析,当前数据反映出{"积极向好的发展态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和稳定的运行动态" if abs(change_pct) <= 10 else "需重点关注的下行风险"}。'
  1497. f'建议{"加大资源投入以把握增长机遇,同时关注增速的可持续性,避免盲目扩张" if direction_text == "上升" else "排查下降原因并制定针对性应对措施,分析是短期波动还是长期趋势转折" if direction_text == "下降" else "保持当前运营节奏,同时关注潜在变化信号,适时调整策略" if direction_text == "平稳" else "继续观察数据走势"}。'
  1498. f'建议将数据与业务KPI目标进行对标分析,定期回顾趋势变化。',
  1499. },
  1500. ]
  1501. _add_structured_insight(slide, insight_items,
  1502. Emu(text_zone.x), Emu(text_zone.y),
  1503. Emu(text_zone.width), Emu(text_zone.height))
  1504. return True
  1505. return False
  1506. def _build_distribution_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None):
  1507. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
  1508. cat_cols = profile.get('category_columns', [])
  1509. num_cols = profile.get('numeric_columns', [])
  1510. if not cat_cols:
  1511. _remove_slide(prs, slide)
  1512. return False
  1513. elem = (page_def.elements or [{}])[0] if page_def else {}
  1514. cat_col = elem.get('category') or cat_cols[0]['column_name']
  1515. cat_label = elem.get('category_label') or next(
  1516. (c.get('inferred_label', cat_col) for c in cat_cols if c['column_name'] == cat_col), cat_col)
  1517. metric_col = elem.get('metric') or (num_cols[0]['column_name'] if num_cols else None)
  1518. metric_label = elem.get('metric_label') or (next(
  1519. (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) if metric_col else '')
  1520. page_title = page_def.title if page_def and page_def.title else f'{cat_label}分布'
  1521. _replace_all_placeholders(slide, {
  1522. '{report_title}': config.title,
  1523. '{date}': config.period_str,
  1524. '{page_title}': page_title,
  1525. '{source}': config.source_label,
  1526. '{period}': '',
  1527. '{page_num}': '',
  1528. }, fonts)
  1529. dist = calc_generic_distribution(df, cat_col, metric_col, top_n=8)
  1530. if dist.get('categories'):
  1531. chart_zone = get_chart_left_zone(content_top, 0.55, ctx=ctx)
  1532. text_zone = get_insight_right_zone(content_top, 0.55, ctx=ctx)
  1533. if len(dist['categories']) <= 8:
  1534. add_doughnut_chart(slide, dist['categories'], dist['values'],
  1535. Emu(chart_zone.x), Emu(chart_zone.y),
  1536. Emu(chart_zone.width), Emu(chart_zone.height),
  1537. colors=colors.get('series'))
  1538. else:
  1539. add_bar_chart(slide, dist['categories'], dist['values'],
  1540. Emu(chart_zone.x), Emu(chart_zone.y),
  1541. Emu(chart_zone.width), Emu(chart_zone.height),
  1542. series_name=metric_label, color=colors.get('primary'))
  1543. cats, vals, pcts = dist['categories'], dist['values'], dist['percentages']
  1544. grand_total = sum(vals)
  1545. top3_pct = sum(pcts[:3])
  1546. top1_name, top1_val, top1_pct = cats[0], vals[0], pcts[0]
  1547. metric_suffix = metric_label if metric_label else '数量'
  1548. insight_items = [
  1549. {
  1550. 'title': f'{cat_label}分布概况',
  1551. 'content': f'共有 {len(cats)} 个不同的{cat_label},覆盖范围'
  1552. f'{"广泛" if len(cats) >= 8 else "较为丰富" if len(cats) >= 5 else "相对集中"}。'
  1553. f'前3名合计占比 {top3_pct:.1f}%,集中度'
  1554. f'{"较高,呈现显著的头部集中特征" if top3_pct > 70 else "中等,呈现梯度递减分布" if top3_pct > 50 else "较低,分布较为均衡"}。',
  1555. },
  1556. {
  1557. 'title': f'排名第一: {top1_name}',
  1558. 'content': f'{top1_name}以 {top1_val:,}{metric_suffix}(占比 {top1_pct:.1f}%)位居榜首,'
  1559. f'{"是第二名" + cats[1] + "的" + f"{round(top1_val/vals[1],1)}" + "倍,优势极为显著" if len(cats) > 1 else "是该维度中最重要的类别"}。'
  1560. f'该类别贡献了超过三分之一的{metric_label},是整体业务的基本盘和核心增长极。',
  1561. },
  1562. ]
  1563. if len(vals) >= 3:
  1564. top3_sum = sum(vals[:3])
  1565. tail_sum = sum(vals[3:])
  1566. tail_pct = sum(pcts[3:])
  1567. insight_items.append({
  1568. 'title': '长尾分布特征',
  1569. 'content': f'前三名累计 {top3_sum:,}{metric_suffix}({top3_pct:.1f}%),'
  1570. f'剩余 {len(cats)-3} 个合计 {tail_sum:,}{metric_suffix}({tail_pct:.1f}%),'
  1571. f'属于{"头部集中型分布" if top3_pct > 70 else "相对均衡分布" if top3_pct < 50 else "梯度递减型分布"}。'
  1572. f'头部贡献了绝大部分{metric_label},尾部虽数量众多但单个贡献有限。',
  1573. })
  1574. if len(vals) > 1:
  1575. avg_val = sum(vals) / len(vals)
  1576. cv = round(vals[0] / avg_val, 1) if avg_val else 0
  1577. median_idx = len(vals) // 2
  1578. median_val = vals[median_idx]
  1579. insight_items.append({
  1580. 'title': '差异化与离散度分析',
  1581. 'content': f'排名第一的{cat_label}{top1_name}的{metric_suffix}是全部分类均值的 {cv} 倍,'
  1582. f'中位数分类(第{median_idx+1}名)为 {median_val:,}{metric_suffix},'
  1583. f'表明该维度{"差异化显著,资源集中度较高" if cv > 3 else "差异化适中,各分类间差距可控" if cv > 1.5 else "分布较为均匀"}。'
  1584. f'头部与中位数的差距反映了{cat_label}维度上的分层特征,是运营资源重点倾斜方向。',
  1585. })
  1586. insight_items.append({
  1587. 'title': '业务启示',
  1588. 'content': f'建议重点关注 {cats[0]} 的增量拓展与存量维护,同时深入分析排名中位类别的提升空间。'
  1589. f'对于 {metric_label}贡献较小的尾部类别(如占比低于3%的分类),可评估是否优化资源配置、'
  1590. f'调整运营策略或将资源向高回报类别倾斜。结合{cat_label}维度持续跟踪分布变化,及时把握结构性机会。',
  1591. })
  1592. _add_structured_insight(slide, insight_items,
  1593. Emu(text_zone.x), Emu(text_zone.y),
  1594. Emu(text_zone.width), Emu(text_zone.height))
  1595. return True
  1596. return False
  1597. def _build_ranking_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None):
  1598. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
  1599. cat_cols = profile.get('category_columns', [])
  1600. num_cols = profile.get('numeric_columns', [])
  1601. if not cat_cols or not num_cols:
  1602. _remove_slide(prs, slide)
  1603. return False
  1604. elem = (page_def.elements or [{}])[0] if page_def else {}
  1605. rank_col = elem.get('category') or cat_cols[-1]['column_name']
  1606. rank_label = elem.get('category_label') or next(
  1607. (c.get('inferred_label', rank_col) for c in cat_cols if c['column_name'] == rank_col), rank_col)
  1608. metric_col = elem.get('metric') or num_cols[0]['column_name']
  1609. metric_label = elem.get('metric_label') or next(
  1610. (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col)
  1611. page_title = page_def.title if page_def and page_def.title else f'{rank_label}TOP排行'
  1612. _replace_all_placeholders(slide, {
  1613. '{report_title}': config.title,
  1614. '{date}': config.period_str,
  1615. '{page_title}': page_title,
  1616. '{source}': config.source_label,
  1617. '{period}': '',
  1618. '{page_num}': '',
  1619. }, fonts)
  1620. ranking = calc_generic_ranking(df, rank_col, metric_col, top_n=15)
  1621. if ranking:
  1622. chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx)
  1623. text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx)
  1624. names = [r['name'] for r in ranking]
  1625. vals = [r['value'] for r in ranking]
  1626. add_bar_chart(slide, names, vals,
  1627. Emu(chart_zone.x), Emu(chart_zone.y),
  1628. Emu(chart_zone.width), Emu(chart_zone.height),
  1629. series_name=metric_label, color=colors.get('primary'))
  1630. total_val = sum(vals)
  1631. top3_names = [r['name'] for r in ranking[:3]]
  1632. top3_vals = [r['value'] for r in ranking[:3]]
  1633. top3_pct = [round(v / total_val * 100, 1) for v in top3_vals] if total_val else [0, 0, 0]
  1634. top1_vs_last = round(vals[0] / vals[-1], 1) if len(vals) > 1 and vals[-1] > 0 else 'N/A'
  1635. insight_items = [
  1636. {
  1637. 'title': f'{rank_label}TOP排行概况',
  1638. 'content': f'共展示 {len(ranking)} 个排名项,前3名分别为 {top3_names[0]}、{top3_names[1]}、'
  1639. f'{top3_names[2]},累计 {sum(top3_vals):,}{metric_label}({sum(top3_pct):.1f}%)。'
  1640. f'前三名合计贡献超过总量的三分之一,表明{rank_label}维度呈现{"显著的头部集中特征" if sum(top3_pct) > 60 else "梯度递减的分布格局" if sum(top3_pct) > 40 else "相对均衡的分布态势"}。',
  1641. },
  1642. {
  1643. 'title': f'榜首分析: {top3_names[0]}',
  1644. 'content': f'{top3_names[0]}以 {top3_vals[0]:,}{metric_label}(占比 {top3_pct[0]:.1f}%)位居榜首,'
  1645. f'{"是第2名" + top3_names[1] + "的" + f"{round(top3_vals[0]/top3_vals[1],1)}倍,领先优势显著" if len(ranking) > 1 and top3_vals[1] > 0 else "优势突出"}。'
  1646. f'作为排名第一的{rank_label},其业绩表现直接影响整体业务大盘,建议重点关注其可持续增长策略。',
  1647. },
  1648. {
  1649. 'title': '头部与尾部差距分析',
  1650. 'content': f'第1名与第{len(ranking)}名差距达 {top1_vs_last} 倍,'
  1651. f'前5名平均 {round(sum(vals[:5])/5):,}{metric_label},'
  1652. f'后5名平均 {round(sum(vals[-5:])/5):,}{metric_label},'
  1653. f'前后差距约 {round((sum(vals[:5])/5)/(sum(vals[-5:])/5),1) if sum(vals[-5:]) > 0 else "N/A"} 倍。'
  1654. f'{"头部效应极为明显,需关注是否因资源分配不均导致" if isinstance(top1_vs_last, float) and top1_vs_last > 10 else "差距较为显著,存在分层优化的空间" if isinstance(top1_vs_last, float) and top1_vs_last > 5 else "梯度分布相对均衡,可针对性提升各层级表现"}。',
  1655. },
  1656. {
  1657. 'title': '累计贡献率与分层分析',
  1658. 'content': f'前5名累计贡献 {sum(vals[:5]):,}{metric_label}({round(sum(vals[:5])/total_val*100,1) if total_val else 0}%),'
  1659. f'前10名累计贡献 {sum(vals[:10]):,}{metric_label}({round(sum(vals[:10])/total_val*100,1) if total_val else 0}%),'
  1660. f'剩余 {len(ranking)-10} 名合计贡献 {sum(vals[10:]):,}{metric_label}({round(sum(vals[10:])/total_val*100,1) if total_val else 0}%)。'
  1661. f'从分层结构来看,可划分为三个梯队:第一梯队(前3名)为业绩核心贡献者,第二梯队(第4-8名)为稳定输出层,'
  1662. f'第三梯队(第9名及以后)为潜力提升层。',
  1663. },
  1664. {
  1665. 'title': '业务建议',
  1666. 'content': f'重点关注 {", ".join(top3_names)} 的发展动态,提炼其成功经验并推广至团队。'
  1667. f'对于排名靠后的{rank_label},可评估其增长潜力与资源匹配度,'
  1668. f'识别可突破的增量空间。建议建立{rank_label}的绩效考核与激励体系,'
  1669. f'通过标杆带动和梯队培养实现整体业绩提升。',
  1670. },
  1671. ]
  1672. _add_structured_insight(slide, insight_items,
  1673. Emu(text_zone.x), Emu(text_zone.y),
  1674. Emu(text_zone.width), Emu(text_zone.height))
  1675. return True
  1676. return False
  1677. def _build_summary_page(prs, config, metrics, profile, colors, fonts, content_top, page_def=None, ctx=None):
  1678. slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
  1679. page_title = page_def.title if page_def and page_def.title else '总结与建议'
  1680. _replace_all_placeholders(slide, {
  1681. '{report_title}': config.title,
  1682. '{date}': config.period_str,
  1683. '{page_title}': page_title,
  1684. '{source}': config.source_label,
  1685. '{period}': '',
  1686. '{page_num}': '',
  1687. }, fonts)
  1688. elem = (page_def.elements or [{}])[0] if page_def else {}
  1689. if elem.get('support_status') is not None:
  1690. status = elem['support_status']
  1691. dept = elem.get('support_by_dept', {})
  1692. sc = elem.get('support_count', 0)
  1693. cc = elem.get('closed_count', 0)
  1694. close_rate = round(cc / sc * 100, 1) if sc else 0
  1695. fully_closed = status.get('已闭环', 0)
  1696. partial_closed = status.get('部分闭环', 0)
  1697. not_closed = status.get('未闭环', 0)
  1698. insight_items = [{
  1699. 'title': '支持需求总览',
  1700. 'content': f'本期共产生 {sc} 项跨部门支持需求,其中已闭环 {cc} 项(含完全闭环 {fully_closed} 项、部分闭环 {partial_closed} 项),'
  1701. f'闭环率 {close_rate}%。未闭环需求 {sc - cc} 项(占比 {round((sc-cc)/sc*100,1) if sc else 0}%),'
  1702. f'闭环率{"较高,跨部门协作效率良好" if close_rate >= 60 else "处于中等水平,仍有提升空间" if close_rate >= 30 else "偏低,需重点关注闭环推动"}。'
  1703. f'跨部门支持是保障项目推进的重要环节,高效的闭环机制有助于提升客户满意度和订单转化效率。',
  1704. }]
  1705. if status:
  1706. total_status = sum(status.values())
  1707. fully_pct = round(fully_closed / total_status * 100, 1) if total_status else 0
  1708. partial_pct = round(partial_closed / total_status * 100, 1) if total_status else 0
  1709. not_pct = round(not_closed / total_status * 100, 1) if total_status else 0
  1710. insight_items.append({
  1711. 'title': '闭环状态明细',
  1712. 'content': f'已闭环 {fully_closed} 项({fully_pct}%)、部分闭环 {partial_closed} 项({partial_pct}%)、'
  1713. f'未闭环 {not_closed} 项({not_pct}%)。'
  1714. f'其中完全闭环占比{"超过七成,闭环质量较高" if fully_pct >= 70 else "处于中等水平" if fully_pct >= 40 else "偏低,需提升闭环完整性"}。'
  1715. f'部分闭环表明需求已部分满足但未完全解决,需持续跟踪至彻底闭环。',
  1716. })
  1717. if dept:
  1718. dept_top = list(dept.items())[:5]
  1719. dept_top_sum = sum(v for _, v in dept_top)
  1720. dept_total = sum(dept.values())
  1721. dept_str = '、'.join([f'{k}({v}项)' for k, v in dept_top])
  1722. avg_dept_load = round(dept_total / len(dept), 1) if dept else 0
  1723. max_dept = dept_top[0]
  1724. insight_items.append({
  1725. 'title': '支持部门工作量分布',
  1726. 'content': f'需求覆盖 {len(dept)} 个部门/科室,前5个部门承接 {dept_top_sum} 项({round(dept_top_sum/dept_total*100,1) if dept_total else 0}%)。'
  1727. f'Top部门:{dept_str}。其中{max_dept[0]}承接最多({max_dept[1]}项),'
  1728. f'平均每个部门承接 {avg_dept_load} 项。请关注工作量较大的部门资源分配是否充足,'
  1729. f'同时识别是否有部门长期未被分配需求(可能表明资源未充分利用)。',
  1730. })
  1731. if sc - cc > 0:
  1732. insight_items.append({
  1733. 'title': '未闭环需求跟进建议',
  1734. 'content': f'当前仍有 {sc - cc} 项需求未完成闭环。建议按以下策略推进:第一,按紧急程度和影响范围对未闭环需求进行优先级排序,'
  1735. f'高优需求指定专人负责限期解决;第二,建立周度闭环跟踪机制,定期更新需求处理进展;'
  1736. f'第三,对于跨部门协同的复杂需求,建议指定牵头部门统筹协调推进,'
  1737. f'并建立问题升级机制(当需求超期未解决时自动升级至更高层级协调)。',
  1738. })
  1739. insight_items.append({
  1740. 'title': '闭环效率提升建议',
  1741. 'content': f'为持续提升支持需求闭环效率,建议:一是建立标准化的需求流转流程,明确各环节责任人和响应时限;'
  1742. f'二是定期开展闭环案例复盘,提炼最佳实践并在团队内推广;'
  1743. f'三是建立闭环率考核指标,将闭环时效纳入部门协作评价体系,'
  1744. f'通过制度保障跨部门协作的效率和质量。',
  1745. })
  1746. else:
  1747. insight_items = generate_generic_insights(profile, metrics)
  1748. insight_items = _ensure_min_insight_items(
  1749. insight_items,
  1750. profile=profile,
  1751. metrics=metrics,
  1752. min_count=2,
  1753. context_label='总结页',
  1754. )
  1755. zone = get_full_width_zone(content_top, ctx=ctx)
  1756. _add_structured_insight(slide, insight_items,
  1757. Emu(zone.x), Emu(zone.y),
  1758. Emu(zone.width), Emu(zone.height))
  1759. def _build_end_page(prs, config, colors, fonts, template_profile):
  1760. slide = _duplicate_master_slide(prs, template_profile, "end", keep_shapes=True)
  1761. total = len([p for p in config.pages if p.selected])
  1762. _add_footer_if_missing(slide, f'数据来源:{config.source_label} | {total}/{total}', colors=colors)
  1763. _replace_all_placeholders(slide, {
  1764. '{report_title}': config.title,
  1765. '{date}': config.period_str or '',
  1766. '{department}': config.source_label,
  1767. }, fonts)
  1768. # Remove empty KPI placeholders on end page (same as cover)
  1769. _remove_empty_cover_kpi_placeholders(slide)
  1770. def _find_metric_def_by_column(config, column):
  1771. for metric in getattr(config, 'metrics', []) or []:
  1772. if getattr(metric, 'column', None) == column:
  1773. return metric
  1774. return None
  1775. def _forecast_items_from_page_def(page_def, df, profile, metrics, config):
  1776. elem = (page_def.elements or [{}])[0] if page_def else {}
  1777. items = []
  1778. explicit_items = elem.get('forecast_items') or elem.get('goals')
  1779. if explicit_items:
  1780. for idx, item in enumerate(explicit_items[:6], 1):
  1781. title = item.get('title') or item.get('label') or f'预测项{idx}'
  1782. value = item.get('value') or item.get('number') or item.get('target') or 0
  1783. items.append({'title': str(title), 'number': value})
  1784. return items
  1785. metric_names = elem.get('metrics') or elem.get('metric_names') or []
  1786. for metric_name in metric_names[:6]:
  1787. if metric_name in metrics:
  1788. metric_def = next((m for m in getattr(config, 'metrics', []) if m.name == metric_name), None)
  1789. label = metric_def.label if metric_def else str(metric_name)
  1790. items.append({'title': label, 'number': metrics.get(metric_name, 0)})
  1791. if items:
  1792. return items
  1793. num_cols = profile.get('numeric_columns', []) if profile else []
  1794. keyword_cols = []
  1795. keywords = ('预测', 'forecast', '目标', '计划', 'target', 'plan')
  1796. for col in num_cols:
  1797. col_name = col.get('column_name', '')
  1798. label = col.get('inferred_label', col_name)
  1799. if any(k in str(col_name).lower() or k in str(label).lower() for k in keywords):
  1800. keyword_cols.append(col)
  1801. for col in keyword_cols[:6]:
  1802. col_name = col.get('column_name')
  1803. metric_def = _find_metric_def_by_column(config, col_name)
  1804. label = metric_def.label if metric_def else col.get('inferred_label', col_name)
  1805. if metric_def and metric_def.name in metrics:
  1806. value = metrics.get(metric_def.name, 0)
  1807. elif col_name in df.columns:
  1808. series = df[col_name].dropna()
  1809. value = int(series.sum()) if not series.empty else 0
  1810. else:
  1811. value = 0
  1812. items.append({'title': label, 'number': value})
  1813. return items
  1814. def _generic_forecast_insights(page_def, forecast_items, profile, metrics):
  1815. title = page_def.title if page_def else '预测与行动计划'
  1816. total = sum(float(item.get('number') or 0) for item in forecast_items)
  1817. item_desc = '、'.join(f"{item['title']} {item.get('number', 0):,.0f}" for item in forecast_items[:5])
  1818. if forecast_items:
  1819. return [
  1820. {
  1821. 'title': f'{title}目标概览',
  1822. 'content': f'本页围绕已确认的预测/计划指标展开,当前纳入 {len(forecast_items)} 个量化项,'
  1823. f'合计规模约 {total:,.0f}。主要项目包括:{item_desc}。'
  1824. f'这些指标应与本期实际结果、历史同期和资源约束一起判断,避免只看单点预测值。',
  1825. },
  1826. {
  1827. 'title': '达成路径与风险控制',
  1828. 'content': f'建议将预测目标拆解为“责任人、关键动作、时间节点、风险预案”四类信息。'
  1829. f'如果目标值明显高于本期实际表现,应同步确认新增订单、库存、产能、交付或预算等支撑条件;'
  1830. f'如果目标值低于当前趋势,则需要说明保守假设,防止业务团队误判资源投入强度。',
  1831. },
  1832. ]
  1833. total_rows = profile.get('total_rows', 0) if profile else 0
  1834. return [
  1835. {
  1836. 'title': f'{title}口径说明',
  1837. 'content': f'当前页面未检测到明确的预测或目标数值字段,因此以数据画像和核心指标进行预测口径说明。'
  1838. f'本期数据覆盖 {total_rows or "若干"} 条记录,建议在六项确认阶段明确预测指标、目标字段和统计口径,'
  1839. f'例如下月交付、销售目标、库存消化、需求闭环或风险事件数量。',
  1840. },
  1841. {
  1842. 'title': '补充数据建议',
  1843. 'content': f'为了生成更可靠的预测页,建议在源数据中补充至少一个预测/目标字段,并提供历史实际值用于校准。'
  1844. f'报告生成后应检查预测值是否与图表一致,文字洞察是否说明关键假设、达成路径和偏差处理机制。',
  1845. },
  1846. ]
  1847. def _build_forecast_page(prs, config, df, profile, metrics, colors, content_top, page_def=None):
  1848. slide = _duplicate_slide(prs, prs.slides[1])
  1849. page_title = page_def.title if page_def and page_def.title else '预测与行动计划'
  1850. _replace_all_placeholders(slide, {
  1851. '{report_title}': config.title,
  1852. '{date}': config.period_str,
  1853. '{page_title}': page_title,
  1854. '{source}': config.source_label,
  1855. '{period}': '',
  1856. '{page_num}': '',
  1857. }, fonts)
  1858. forecast_items = _forecast_items_from_page_def(page_def, df, profile, metrics, config)
  1859. if not forecast_items and metrics.get('next_month_goals'):
  1860. forecast_items = [
  1861. {'title': g['title'].split(':')[0], 'number': g.get('number', 0)}
  1862. for g in metrics.get('next_month_goals', [])[:6]
  1863. ]
  1864. chart_zone = get_chart_left_zone(content_top, 0.58, ctx=ctx)
  1865. text_zone = get_insight_right_zone(content_top, 0.58, ctx=ctx)
  1866. if forecast_items:
  1867. names = [item['title'] for item in forecast_items[:6]]
  1868. values = [float(item.get('number') or 0) for item in forecast_items[:6]]
  1869. add_column_chart(slide, names, values,
  1870. Emu(chart_zone.x), Emu(chart_zone.y),
  1871. Emu(chart_zone.width), Emu(min(chart_zone.height, Emu(5100000))),
  1872. series_name='预测/目标值', color=colors.get('accent', C_ACCENT),
  1873. category_axis_title='预测项', value_axis_title='数值')
  1874. insight_items = _generic_forecast_insights(page_def, forecast_items, profile, metrics)
  1875. insight_items = _ensure_min_insight_items(insight_items, profile, metrics, context_label='预测页')
  1876. _add_structured_insight(slide, insight_items,
  1877. Emu(text_zone.x), Emu(text_zone.y),
  1878. Emu(text_zone.width), Emu(text_zone.height))
  1879. # ==============================================================================
  1880. # CLI
  1881. # ==============================================================================
  1882. if __name__ == '__main__':
  1883. import sys
  1884. if len(sys.argv) >= 3:
  1885. from report_config import load_report_config
  1886. data_file = sys.argv[1]
  1887. config_file = sys.argv[2]
  1888. output = sys.argv[3] if len(sys.argv) >= 4 else 'output.pptx'
  1889. config = load_report_config(config_file)
  1890. quality_assured_build(data_file, config, output)
  1891. else:
  1892. print("Usage: python ppt_builder.py <data_file> <config_file> [output_path]")