| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999 |
- """
- PPT builder: assemble daily/weekly/monthly reports by duplicating master templates
- and filling charts, tables, KPI cards, and structured insight text blocks.
- Key design principle: Conclusion-first page titles + structured multi-paragraph
- insights (title + body per paragraph) aligned with reference PPT style.
- """
- import copy
- import os
- import sys
- import re as re_module
- from pathlib import Path
- from datetime import datetime, timedelta
- sys.path.insert(0, str(Path(__file__).parent))
- from pptx import Presentation
- from pptx.util import Emu, Pt
- from pptx.dml.color import RGBColor
- from pptx.enum.text import PP_ALIGN
- from pptx.enum.shapes import MSO_SHAPE
- from data_loader import load_generic_excel
- from metrics_calculator import (
- calc_generic_metrics, calc_generic_trend, calc_generic_distribution,
- calc_generic_ranking, generate_generic_insights,
- )
- from chart_factory import (
- add_column_chart, add_bar_chart, add_line_chart, add_doughnut_chart,
- add_pie_chart, add_funnel_chart, add_horizontal_bar_chart,
- add_grouped_bar_chart, add_table
- )
- from page_layouts import (
- get_kpi_grid, get_chart_left_zone, get_insight_right_zone,
- get_full_width_zone, get_two_column_zones, LayoutContext,
- )
- from template_parser import (
- parse_template, get_builtin_template_profile,
- PLACEHOLDER_ALIASES, _matches_any_placeholder,
- )
- from quality_inspector import QualityInspector
- from theme_manager import theme_to_rgb_colors, get_theme
- from report_config import (
- ReportConfig, PageDef, MetricDef, PeriodType, ChartType,
- validate_six_confirmations,
- )
- from quality_rules import SLIDE_WIDTH, SLIDE_HEIGHT, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP
- # Colors — aligned with reference design theme YAML
- C_PRIMARY = RGBColor(0x1E, 0x3A, 0x5F)
- C_ACCENT = RGBColor(0x10, 0xB9, 0x81)
- C_ACCENT_NEG = RGBColor(0xEF, 0x44, 0x44)
- C_SECONDARY = RGBColor(0x64, 0x74, 0x8B)
- C_DARK = RGBColor(0x1F, 0x3A, 0x5C)
- C_WHITE = RGBColor(0xFF, 0xFF, 0xFF)
- C_GRAY_BG = RGBColor(0xF2, 0xF2, 0xF2)
- C_TEXT = RGBColor(0x33, 0x33, 0x33)
- C_TEXT_GRAY = RGBColor(0x66, 0x66, 0x66)
- C_LINE = RGBColor(0xD9, 0xD9, 0xD9)
- C_CARD_BG = RGBColor(0xE7, 0xF0, 0xF7)
- C_GREEN = RGBColor(0x10, 0xB9, 0x81)
- C_RED = RGBColor(0xEF, 0x44, 0x44)
- C_ORANGE = RGBColor(0xED, 0x7D, 0x31)
- # ==============================================================================
- # MASTER / SLIDE HELPERS
- # ==============================================================================
- def get_master_template(report_type: str) -> str:
- """Route report type to corresponding master template."""
- base = os.path.join(os.path.dirname(__file__), '..', 'assets')
- template_map = {
- 'daily': os.path.join(base, 'report-master.pptx'),
- 'weekly': os.path.join(base, 'weekly-master.pptx'),
- 'monthly': os.path.join(base, 'monthly-master.pptx'),
- }
- path = template_map.get(report_type, template_map['daily'])
- if os.path.exists(path):
- return os.path.abspath(path)
- # Fallbacks
- for fallback in [template_map['daily']]:
- if os.path.exists(fallback):
- return os.path.abspath(fallback)
- raise FileNotFoundError(f"Master template not found for {report_type}")
- def _resolve_master_template(config: ReportConfig) -> str:
- if getattr(config, 'template_path', ''):
- return os.path.abspath(config.template_path)
- period_type = getattr(config, 'period_type', None)
- report_type = getattr(period_type, 'value', period_type) or 'daily'
- return get_master_template(report_type)
- def _resolve_template_profile(config: ReportConfig):
- """Resolve TemplateProfile from config (cached or parse on demand)."""
- if getattr(config, 'template_profile', None):
- return config.template_profile
- if getattr(config, 'template_path', ''):
- return parse_template(config.template_path)
- period_type = getattr(config, 'period_type', None)
- report_type = getattr(period_type, 'value', period_type) or 'daily'
- return get_builtin_template_profile(report_type)
- def _resolve_colors(config: ReportConfig, profile) -> dict:
- """Three-tier color resolution: user theme > template theme > defaults."""
- # If user explicitly configured a theme and opted out of template theme
- if config.theme and not getattr(config, 'use_template_theme', True):
- return theme_to_rgb_colors(config.theme)
- # Try template-extracted theme
- from theme_manager import extract_theme_from_template, ThemeConfig
- template_theme = extract_theme_from_template(profile)
- if template_theme:
- return theme_to_rgb_colors(template_theme)
- # Fallback to user theme or default
- if config.theme:
- return theme_to_rgb_colors(config.theme)
- # Ultimate fallback: hard-coded defaults packaged as a theme
- return theme_to_rgb_colors(ThemeConfig())
- def _resolve_fonts(config: ReportConfig, profile) -> dict:
- """Three-tier font resolution: user config > template fonts > defaults."""
- result = {
- 'title_font': '微软雅黑',
- 'body_font': '微软雅黑',
- 'number_font': 'Arial',
- }
- # Template fonts
- detected = getattr(profile, 'detected_fonts', {})
- if detected.get('title_font'):
- result['title_font'] = detected['title_font']
- if detected.get('body_font'):
- result['body_font'] = detected['body_font']
- if detected.get('number_font'):
- result['number_font'] = detected['number_font']
- # User override via theme config
- if config.theme:
- if getattr(config.theme, 'title_font', ''):
- result['title_font'] = config.theme.title_font
- if getattr(config.theme, 'body_font', ''):
- result['body_font'] = config.theme.body_font
- if getattr(config.theme, 'number_font', ''):
- result['number_font'] = config.theme.number_font
- return result
- def _duplicate_master_slide(prs, profile, page_type: str):
- """Duplicate the appropriate master slide for the given page_type."""
- idx = profile.get_master_index_for(page_type)
- if 0 <= idx < len(prs.slides):
- source = prs.slides[idx]
- else:
- source = prs.slides[0]
- return _duplicate_slide(prs, source)
- def _is_forecast_page_type(page_type: str) -> bool:
- normalized = str(page_type or '').lower()
- return normalized in {
- 'forecast',
- 'prediction',
- 'plan',
- 'monthly_forecast',
- 'monthly_plan',
- 'next_month_plan',
- 'custom_forecast',
- 'custom_prediction',
- }
- def _detect_content_top(slide) -> int:
- """Detect content start Y from a content slide template by reading {page_title} position."""
- page_title_bottom = Emu(1422400) # daily default
- for shape in slide.shapes:
- if shape.has_text_frame and '{page_title}' in shape.text_frame.text:
- page_title_bottom = shape.top + shape.height
- break
- # Gap: generous spacing between page title and content to avoid crowding
- gap = Emu(381000)
- return int(page_title_bottom) + int(gap)
- def _delete_template_slides(prs, count=None):
- """Delete original template slides from the presentation.
-
- count: number of original template slides to remove from the beginning.
- If None, auto-detect using a heuristic that looks for unreplaced placeholders.
- """
- if count is None:
- # Auto-detect: count leading slides that contain unreplaced placeholders
- # or have only template-specific content patterns.
- count = 0
- for slide in prs.slides:
- has_unreplaced_placeholder = False
- has_real_content = False
- for shape in slide.shapes:
- if shape.has_text_frame:
- text = shape.text_frame.text.strip()
- if text:
- if '{' in text and '}' in text:
- has_unreplaced_placeholder = True
- else:
- # Text like copyright, footer, etc. on template slides
- # is not "real content" in the report sense
- pass
- # If slide has unreplaced placeholders, it's an original template slide
- if has_unreplaced_placeholder:
- count += 1
- else:
- # Also check if slide is completely empty (some template slides
- # may have no placeholders at all)
- if len(slide.shapes) == 0:
- count += 1
- else:
- break
-
- # Ensure we don't delete all slides
- actual_count = min(count, len(prs.slides) - 1) if len(prs.slides) > 1 else 0
-
- for _ in range(actual_count):
- if len(prs.slides) == 0:
- break
- rId = prs.slides._sldIdLst[0].rId
- prs.part.drop_rel(rId)
- del prs.slides._sldIdLst[0]
- def _duplicate_slide(prs, source_slide):
- # Use last available layout (typically blank) to avoid index errors on custom templates
- layout_idx = min(6, len(prs.slide_layouts) - 1)
- blank_layout = prs.slide_layouts[layout_idx]
- new_slide = prs.slides.add_slide(blank_layout)
-
- # Copy slide background (solid, gradient, image) from source
- try:
- src_cSld = source_slide._element.cSld
- new_cSld = new_slide._element.cSld
- if src_cSld.bg is not None:
- new_bg = copy.deepcopy(src_cSld.bg)
- if new_cSld.bg is not None:
- new_cSld.remove(new_cSld.bg)
- new_cSld.insert(0, new_bg)
- except Exception:
- pass
-
- for shape in source_slide.shapes:
- el = shape.element
- new_el = copy.deepcopy(el)
- new_slide.shapes._spTree.insert_element_before(new_el, 'p:extLst')
- return new_slide
- def _replace_placeholder(slide, placeholder, new_text, fonts: dict = None):
- fonts = fonts or {}
- body_font = fonts.get('body_font', '微软雅黑')
- replacement = (
- _format_kpi_value_for_placeholder(new_text)
- if re_module.fullmatch(r'\{kpi\d+_value\}', placeholder)
- else str(new_text)
- )
- # Gather aliases for this placeholder
- aliases = PLACEHOLDER_ALIASES.get(placeholder, [])
- targets = [placeholder] + [a for a in aliases if a != placeholder]
- for shape in slide.shapes:
- if not shape.has_text_frame:
- continue
- for para in shape.text_frame.paragraphs:
- for target in targets:
- if target in para.text:
- para.text = para.text.replace(target, replacement)
- for run in para.runs:
- run.font.name = body_font
- break # only replace once per paragraph
- def _replace_all_placeholders(slide, mapping: dict, fonts: dict = None):
- for placeholder, new_text in mapping.items():
- _replace_placeholder(slide, placeholder, new_text, fonts)
- def _remove_shape(shape):
- """Remove a python-pptx shape from its parent tree."""
- el = shape.element
- el.getparent().remove(el)
- def _remove_slide(prs, slide):
- """Remove a slide from a presentation by its rId."""
- try:
- for i, s in enumerate(prs.slides):
- if s == slide:
- rId = prs.slides._sldIdLst[i].rId
- prs.part.drop_rel(rId)
- del prs.slides._sldIdLst[i]
- return True
- except Exception:
- pass
- return False
- def _safe_auto_shape_type(shape):
- try:
- return shape.auto_shape_type
- except (AttributeError, ValueError):
- return None
- def _remove_empty_cover_kpi_placeholders(slide):
- """
- Remove template KPI cards when generic cover data does not provide values.
- This prevents empty rounded rectangles from staying on the cover.
- """
- kpi_pattern = re_module.compile(r'\{kpi\d+_(label|value)\}')
- placeholder_shapes = [
- shape for shape in slide.shapes
- if shape.has_text_frame and kpi_pattern.search(shape.text_frame.text or '')
- ]
- if not placeholder_shapes:
- return
- x_min = min(int(shape.left) for shape in placeholder_shapes)
- x_max = max(int(shape.left) + int(shape.width) for shape in placeholder_shapes)
- y_min = min(int(shape.top) for shape in placeholder_shapes)
- y_max = max(int(shape.top) + int(shape.height) for shape in placeholder_shapes)
- pad = Emu(220000)
- to_remove = []
- for shape in slide.shapes:
- sx = int(shape.left)
- sy = int(shape.top)
- sw = int(shape.width)
- sh = int(shape.height)
- in_region = (
- sx >= x_min - pad and sx + sw <= x_max + pad and
- sy >= y_min - pad and sy + sh <= y_max + pad
- )
- is_text_placeholder = shape in placeholder_shapes
- is_empty_kpi_card = (
- in_region and
- _safe_auto_shape_type(shape) == MSO_SHAPE.ROUNDED_RECTANGLE
- )
- if is_text_placeholder or is_empty_kpi_card:
- to_remove.append(shape)
- for shape in to_remove:
- _remove_shape(shape)
- # ==============================================================================
- # NAVIGATION TABS
- # ==============================================================================
- def _add_nav_tabs(slide, tabs, active_index=0, slide_width=None,
- fonts=None, colors=None,
- tab_y=Emu(254000), tab_h=Emu(762000), underline_h=Emu(127000)):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
- if slide_width is None:
- slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx')
- slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000)
- n = len(tabs)
- tab_w = Emu(int(slide_width) // n)
- for i, label in enumerate(tabs):
- x = Emu(i * int(tab_w))
- box = slide.shapes.add_textbox(x, tab_y, tab_w, tab_h)
- p = box.text_frame.paragraphs[0]
- p.text = label
- p.font.size = Pt(11)
- p.font.name = '微软雅黑'
- p.font.color.rgb = C_PRIMARY if i == active_index else C_TEXT_GRAY
- p.alignment = PP_ALIGN.CENTER
- if i == active_index:
- line = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, Emu(457200), tab_w, underline_h)
- line.fill.solid()
- line.fill.fore_color.rgb = C_PRIMARY
- line.line.fill.background()
- # ==============================================================================
- # KPI CARDS
- # ==============================================================================
- def _add_kpi_cards(slide, kpis, start_x=Emu(762000), start_y=Emu(1651000), fonts=None, colors=None):
- fonts = fonts or {}
- body_font = fonts.get("body_font", "微软雅黑")
- number_font = fonts.get("number_font", "Arial")
- colors = colors or {}
- C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7))
- C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- positions = [
- (start_x, start_y),
- (Emu(5778500), start_y),
- (Emu(10795000), start_y),
- (start_x, Emu(start_y + 3429000)),
- (Emu(5778500), Emu(start_y + 3429000)),
- (Emu(10795000), Emu(start_y + 3429000)),
- ]
- for i, kpi in enumerate(kpis[:6]):
- if i >= len(positions):
- break
- x, y = positions[i]
- w, h = Emu(4699000), Emu(3048000)
- card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, x, y, w, h)
- card.fill.solid()
- card.fill.fore_color.rgb = C_CARD_BG
- card.line.fill.background()
- # Label
- lbl = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 228600), Emu(2540000), Emu(406400))
- p = lbl.text_frame.paragraphs[0]
- p.text = kpi.get('label', '')
- p.font.size = Pt(14)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- # Value
- val = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 762000), Emu(2540000), Emu(698500))
- p = val.text_frame.paragraphs[0]
- p.text = str(kpi.get('value', ''))
- p.font.size = Pt(36)
- p.font.bold = True
- p.font.color.rgb = C_PRIMARY
- p.font.name = 'Arial'
- # Unit
- unit = kpi.get('unit', '')
- if unit:
- ubox = slide.shapes.add_textbox(Emu(x + 3048000), Emu(y + 1016000), Emu(508000), Emu(381000))
- p = ubox.text_frame.paragraphs[0]
- p.text = unit
- p.font.size = Pt(14)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- # Change badge
- chg = kpi.get('change', '')
- if chg:
- cbox = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 1778000), Emu(4064000), Emu(304800))
- p = cbox.text_frame.paragraphs[0]
- p.text = chg
- p.font.size = Pt(12)
- chg_str = str(chg)
- is_positive = chg_str.startswith('+') or any(k in chg_str for k in ['↑', '提升', '增长', '上调', '增加', '大幅', '好', '突破', '达成', '优化'])
- is_negative = chg_str.startswith('-') or any(k in chg_str for k in ['↓', '下滑', '下降', '减少', '回落', '滞后', '堆积', '阻塞', '缺口', '延迟'])
- if is_negative:
- p.font.color.rgb = C_RED
- elif is_positive:
- p.font.color.rgb = C_GREEN
- else:
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- # Sub note with semantic background color tag (e.g. "日均51笔")
- sub = kpi.get('sub', '')
- if sub:
- sub_text = _truncate_text(sub, 20)
- tag_color = _sentiment_color(sub_text)
- tag_x = Emu(x + 508000)
- tag_y = Emu(y + 2159000)
- tag_w = Emu(min(len(sub_text) * 220000 + 400000, 3600000))
- tag_h = Emu(304800)
- if tag_color:
- tag_bg = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, tag_x, tag_y, tag_w, tag_h)
- tag_bg.fill.solid()
- tag_bg.fill.fore_color.rgb = tag_color
- tag_bg.line.fill.background()
- sbox = slide.shapes.add_textbox(tag_x, tag_y, tag_w, tag_h)
- p = sbox.text_frame.paragraphs[0]
- p.text = sub_text
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- p.alignment = PP_ALIGN.CENTER
- def _add_compact_kpi_cards(slide, kpis, start_x=Emu(CONTENT_LEFT), start_y=Emu(1651000),
- fonts=None, colors=None,
- max_cols=3, card_h=Emu(1780000), gap_x=Emu(254000),
- gap_y=Emu(254000)):
- colors = colors or {}
- C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7))
- C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- """Draw compact KPI cards so generic overview pages preserve room for insight text."""
- if not kpis:
- return 0
- content_w = SLIDE_WIDTH - 2 * CONTENT_LEFT
- cols = min(max_cols, max(1, len(kpis)))
- card_w = int((content_w - (cols - 1) * int(gap_x)) / cols)
- rows = (len(kpis) + cols - 1) // cols
- for i, kpi in enumerate(kpis):
- row = i // cols
- col = i % cols
- x = int(start_x) + col * (card_w + int(gap_x))
- y = int(start_y) + row * (int(card_h) + int(gap_y))
- card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, Emu(x), Emu(y), Emu(card_w), card_h)
- card.fill.solid()
- card.fill.fore_color.rgb = C_CARD_BG
- card.line.fill.background()
- label = _truncate_text(kpi.get('label', ''), 14)
- lbl = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 180000), Emu(card_w - 560000), Emu(330000))
- p = lbl.text_frame.paragraphs[0]
- p.text = label
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- value = _truncate_text(str(kpi.get('value', '')), 16)
- val = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 570000), Emu(card_w - 1000000), Emu(560000))
- p = val.text_frame.paragraphs[0]
- p.text = value
- p.font.size = Pt(24 if len(value) <= 10 else 20)
- p.font.bold = True
- p.font.color.rgb = C_PRIMARY
- p.font.name = 'Arial'
- unit = kpi.get('unit', '')
- if unit:
- ubox = slide.shapes.add_textbox(Emu(x + card_w - 820000), Emu(y + 710000), Emu(540000), Emu(330000))
- p = ubox.text_frame.paragraphs[0]
- p.text = _truncate_text(str(unit), 4)
- p.font.size = Pt(10)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- sub_text = kpi.get('sub') or kpi.get('change') or '核心指标'
- sub = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 1230000), Emu(card_w - 560000), Emu(330000))
- p = sub.text_frame.paragraphs[0]
- p.text = _truncate_text(str(sub_text), 24)
- p.font.size = Pt(9)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- return int(start_y) + rows * int(card_h) + (rows - 1) * int(gap_y)
- # ==============================================================================
- # TEXT BLOCKS
- # ==============================================================================
- def _add_text_block(slide, title, body, left, top, width, height,
- fonts=None, colors=None,
- title_size=Pt(14), body_size=Pt(11), line_space=Pt(6)):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- """Single text box with title + body."""
- box = slide.shapes.add_textbox(left, top, width, height)
- tf = box.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = title
- p.font.size = title_size
- p.font.bold = True
- p.font.color.rgb = C_PRIMARY if title else C_TEXT
- p.font.name = '微软雅黑'
- if body:
- p2 = tf.add_paragraph()
- p2.text = body
- p2.font.size = body_size
- p2.font.color.rgb = C_TEXT
- p2.font.name = '微软雅黑'
- p2.space_before = line_space
- p2.line_spacing = 1.3
- def _estimate_text_height(items, title_size_pt, body_size_pt, width_emu,
- line_spacing=1.15, title_extra=1.3):
- """Estimate rendered text height in EMU for adaptive font sizing."""
- width_pt = width_emu / 12700.0
- chars_per_line_body = max(10, int(width_pt / (body_size_pt * 1.15)))
- chars_per_line_title = max(10, int(width_pt / (title_size_pt * 1.15)))
- line_height_body = int(body_size_pt * line_spacing * 12700)
- line_height_title = int(title_size_pt * title_extra * 12700)
- total = 0
- for item in items:
- title = item.get('title', '')
- content = item.get('content', '')
- title_lines = max(1, (len(title) + chars_per_line_title - 1) // chars_per_line_title)
- content_lines = max(1, (len(content) + chars_per_line_body - 1) // chars_per_line_body)
- total += title_lines * line_height_title + content_lines * line_height_body + int(6 * 12700)
- return total
- def _add_structured_insight(slide, items, left, top, width, height,
- fonts=None, colors=None,
- title_size=Pt(12), body_size=Pt(11),
- max_items=None, min_body_size=Pt(9)):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- """
- High-density structured multi-paragraph insight block.
- items: list of {'title': str, 'content': str}
- Features:
- - No truncation; full content rendered
- - No max_items limit by default (render all)
- - Auto-shrink body font to fit within height (down to min_body_size)
- - Compact line spacing (1.15) to maximize density
- - Each bullet has emoji + bold title + normal body
- """
- if not items:
- return
- # Adaptive font sizing: shrink body_size until it fits
- target_height = int(height)
- # title_size/body_size may be EMU integers or Pt objects; normalize to pt
- _ts = float(title_size) / 12700.0 if float(title_size) > 1000 else float(title_size)
- _bs = float(body_size) / 12700.0 if float(body_size) > 1000 else float(body_size)
- _min_bs = float(min_body_size) / 12700.0 if float(min_body_size) > 1000 else float(min_body_size)
- ts_pt = _ts
- bs_pt = _bs
- min_bs_pt = _min_bs
- # Binary-search-like shrink to fit
- while bs_pt > min_bs_pt:
- est = _estimate_text_height(items, ts_pt, bs_pt, int(width))
- if est <= target_height:
- break
- bs_pt -= 0.5
- ts_pt = max(bs_pt + 1, ts_pt - 0.25)
- box = slide.shapes.add_textbox(left, top, width, height)
- tf = box.text_frame
- tf.word_wrap = True
- first = True
- for item in items[:max_items] if max_items else items:
- if not first:
- spacer = tf.add_paragraph()
- spacer.text = ''
- spacer.space_before = Pt(3)
- title = item.get('title', '')
- emoji = _emoji_for_item(title)
- # Avoid double emoji
- if emoji and title.startswith(emoji):
- emoji = ''
- title_text = f'{emoji} {title}' if emoji else title
- p = tf.paragraphs[0] if first else tf.add_paragraph()
- p.text = title_text
- p.font.size = Pt(ts_pt)
- p.font.bold = True
- p.font.color.rgb = C_PRIMARY
- p.font.name = '微软雅黑'
- p.line_spacing = 1.15
- first = False
- content = item.get('content', '')
- if content:
- p2 = tf.add_paragraph()
- p2.text = content
- p2.font.size = Pt(bs_pt)
- p2.font.color.rgb = C_TEXT
- p2.font.name = '微软雅黑'
- p2.line_spacing = 1.15
- p2.space_before = Pt(1)
- def _ensure_min_insight_items(items, profile=None, metrics=None, min_count=2,
- context_label='本页'):
- """Guarantee enough long-form insight blocks for quality self-check."""
- cleaned = []
- for item in items or []:
- title = str(item.get('title', '')).strip()
- content = str(item.get('content', '')).strip()
- if title or content:
- cleaned.append({'title': title or '分析说明', 'content': content})
- profile = profile or {}
- metrics = metrics or {}
- total_rows = profile.get('total_rows', 0)
- numeric_count = len(profile.get('numeric_columns', []) or [])
- category_count = len(profile.get('category_columns', []) or [])
- fallback_pool = [
- {
- 'title': f'{context_label}数据基础',
- 'content': f'本页基于当前数据画像进行归纳,覆盖 {total_rows or "若干"} 条记录、'
- f'{numeric_count} 个数值指标和 {category_count} 个分类维度。'
- f'当原始数据字段较少或业务指标尚未形成充分拆解时,报告优先呈现已经确认的核心指标,'
- f'并将可验证的数据范围、维度覆盖和后续分析口径写入页面,避免出现空白页或模板占位内容。',
- },
- {
- 'title': f'{context_label}行动建议',
- 'content': f'建议围绕已确认的核心指标建立持续跟踪机制:先核对指标口径与数据字段映射,'
- f'再按时间、区域、部门或客户等维度拆解异常变化,最后将发现转化为责任人、截止时间和复盘频率明确的行动项。'
- f'如果后续补充历史同期或目标值数据,可进一步增加同比、环比和达成率判断。',
- },
- {
- 'title': f'{context_label}风险提示',
- 'content': f'若数据源存在缺失值、合并表头、人工备注列或统计口径变化,自动生成的结论需要结合业务确认进行复核。'
- f'建议在报告发布前重点检查核心指标是否全部出现、图表数值是否与原表一致、长文本是否仍在页面安全区域内,'
- f'以保证美观度和决策可信度同时达标。',
- },
- ]
- used_titles = {item['title'] for item in cleaned}
- for fallback in fallback_pool:
- if len(cleaned) >= min_count:
- break
- if fallback['title'] not in used_titles:
- cleaned.append(fallback)
- used_titles.add(fallback['title'])
- return cleaned
- # ==============================================================================
- # ALERT / ACTION / ISSUE / GOAL CARDS
- # ==============================================================================
- def _add_alert_cards(slide, alerts, start_y=Emu(1651000), fonts=None, colors=None):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44))
- C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31))
- C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- colors = {'严重': C_RED, '警告': C_ORANGE, '关注': C_PRIMARY, '中度': C_ORANGE, '一般': C_SECONDARY}
- positions = [Emu(762000), Emu(5778500), Emu(10795000)]
- for i, alert in enumerate(alerts[:3]):
- x = positions[i]
- y = start_y
- lvl = alert.get('level', '关注')
- c = colors.get(lvl, C_PRIMARY)
- bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(2286000))
- bar.fill.solid()
- bar.fill.fore_color.rgb = c
- bar.line.fill.background()
- tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(4064000), Emu(406400))
- p = tbox.text_frame.paragraphs[0]
- p.text = alert.get('title', '')
- p.font.size = Pt(15)
- p.font.bold = True
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 762000), Emu(4064000), Emu(1270000))
- tf = dbox.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = alert.get('detail', '')
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- def _add_action_cards(slide, actions, start_y=Emu(2540000), fonts=None, colors=None):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- positions = [Emu(762000), Emu(5778500), Emu(10795000)]
- for i, act in enumerate(actions[:3]):
- x = positions[i]
- y = start_y
- bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(406400))
- bar.fill.solid()
- bar.fill.fore_color.rgb = C_PRIMARY
- bar.line.fill.background()
- tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 952500), Emu(4064000), Emu(406400))
- p = tbox.text_frame.paragraphs[0]
- p.text = act.get('title', '')
- p.font.size = Pt(17)
- p.font.bold = True
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1524000), Emu(4064000), Emu(3429000))
- tf = dbox.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = act.get('detail', '')
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- p.line_spacing = 1.3
- def _add_issue_cards(slide, issues, start_y=Emu(1524000), fonts=None, colors=None):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44))
- C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31))
- C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- colors = {'严重': C_RED, '中度': C_ORANGE, '轻度': C_PRIMARY, '一般': C_SECONDARY}
- for i, issue in enumerate(issues[:3]):
- x = Emu(762000)
- y = Emu(int(start_y) + i * (1778000 + 254000))
- sev = issue.get('severity', '中度')
- c = colors.get(sev, C_ORANGE)
- bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(1778000))
- bar.fill.solid()
- bar.fill.fore_color.rgb = c
- bar.line.fill.background()
- sbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(660400), Emu(304800))
- p = sbox.text_frame.paragraphs[0]
- p.text = sev
- p.font.size = Pt(11)
- p.font.bold = True
- p.font.color.rgb = c
- p.font.name = '微软雅黑'
- tbox = slide.shapes.add_textbox(Emu(x + 1778000), Emu(y + 228600), Emu(13462000), Emu(355600))
- p = tbox.text_frame.paragraphs[0]
- p.text = issue.get('title', '')
- p.font.size = Pt(13)
- p.font.bold = True
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 698500), Emu(14224000), Emu(355600))
- p = dbox.text_frame.paragraphs[0]
- p.text = issue.get('detail', '')
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- abox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1193800), Emu(14224000), Emu(609600))
- tf = abox.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = f"建议措施:{issue.get('action', '')}"
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- def _add_goal_cards(slide, goals, start_y=Emu(1524000), fonts=None, colors=None):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66))
- sy = int(start_y)
- positions = [
- (Emu(762000), Emu(sy)),
- (Emu(8318500), Emu(sy)),
- (Emu(762000), Emu(sy + 1879600)),
- (Emu(8318500), Emu(sy + 1879600)),
- ]
- icon_chars = ['🎯', '💰', '🚀', '⚡']
- for i, goal in enumerate(goals[:4]):
- x, y = positions[i]
- gid = goal.get('id', f'G{i+1}')
- gbox = slide.shapes.add_textbox(x, Emu(y + 101600), Emu(635000), Emu(355600))
- p = gbox.text_frame.paragraphs[0]
- p.text = f"{icon_chars[i % len(icon_chars)]} {gid}"
- p.font.size = Pt(16)
- p.font.bold = True
- p.font.color.rgb = C_PRIMARY
- p.font.name = 'Arial'
- tbox = slide.shapes.add_textbox(Emu(x + 863600), Emu(y + 101600), Emu(6096000), Emu(355600))
- p = tbox.text_frame.paragraphs[0]
- p.text = goal.get('title', '')
- p.font.size = Pt(14)
- p.font.bold = True
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- dbox = slide.shapes.add_textbox(Emu(x + 228600), Emu(y + 571500), Emu(6731000), Emu(863600))
- tf = dbox.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = goal.get('detail', '')
- p.font.size = Pt(11)
- p.font.color.rgb = C_TEXT_GRAY
- p.font.name = '微软雅黑'
- p.line_spacing = 1.3
- def _add_summary_text(slide, text, left=Emu(1016000), top=Emu(5435600), width=Emu(14224000), height=Emu(1270000), fonts=None, colors=None):
- colors = colors or {}
- C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33))
- box = slide.shapes.add_textbox(left, top, width, height)
- tf = box.text_frame
- tf.word_wrap = True
- p = tf.paragraphs[0]
- p.text = text
- p.font.size = Pt(12)
- p.font.color.rgb = C_TEXT
- p.font.name = '微软雅黑'
- p.line_spacing = 1.3
- # ==============================================================================
- # TEXT / LAYOUT HELPERS
- # ==============================================================================
- def _truncate_text(text, max_chars=60):
- """Truncate text to max_chars, appending '...' if truncated."""
- if not text:
- return text
- if len(text) > max_chars:
- return text[:max_chars - 1] + '...'
- return text
- def _format_kpi_value_for_placeholder(value, max_chars=16):
- """
- KPI value placeholders are fixed-size number slots. If upstream passes a
- category list, compact it to a count instead of letting it overflow.
- """
- if value is None:
- return ''
- text = str(value).strip()
- if len(text) <= max_chars:
- return text
- list_text = text.strip().strip('[]()(){}')
- tokens = [
- token.strip().strip("'\"“”‘’")
- for token in re_module.split(r'[、,,;;\n/]+', list_text)
- ]
- tokens = [token for token in tokens if token]
- if len(tokens) >= 3:
- return f'{len(tokens)}项'
- return _truncate_text(text, max_chars)
- def _sentiment_color(text):
- """Return a light background color based on text sentiment."""
- if not text:
- return None
- text = str(text)
- positive_words = ['提升', '增长', '上调', '增加', '高', '好', '大幅', '冲刺', '领跑', '上升', '扩大', '优化', '改善', '突破', '达成']
- negative_words = ['下滑', '下降', '减少', '低', '差', '回落', '下滑', '滞后', '堆积', '阻塞', '缺口', '延迟', '超期', '逾期', '风险', '警告']
- pos_score = sum(1 for w in positive_words if w in text)
- neg_score = sum(1 for w in negative_words if w in text)
- if neg_score > pos_score:
- return RGBColor(0xFE, 0xE2, 0xE2) # light red ~ #EF444420
- if pos_score > neg_score:
- return RGBColor(0xD1, 0xFA, 0xE5) # light green ~ #10B98120
- return None
- import re
- def _emoji_for_item(title):
- """Return an emoji prefix based on title keywords."""
- if not title:
- return '📈'
- title = str(title)
- # Skip if title already starts with an emoji
- if re.match(r'^[\U0001F300-\U0001F9FF\u2600-\u26FF\u2700-\u27BF]', title):
- return ''
- if any(k in title for k in ['风险', '警告', '关注', '下滑', '下降', '延迟', '超期', '缺口', '阻塞']):
- return '⚠️'
- if any(k in title for k in ['建议', '措施', '行动', '协调', '对接']):
- return '💡'
- if any(k in title for k in ['目标', '计划', '冲刺', '展望', '聚焦']):
- return '🎯'
- if any(k in title for k in ['增长', '上升', '提升', '峰值', '领跑', '突破', '活跃', '好转']):
- return '📈'
- return '💡'
- def _add_footer_if_missing(slide, footer_text, slide_width=None, fonts=None, colors=None):
- colors = colors or {}
- C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F))
- C_WHITE = colors.get('white', RGBColor(0xFF, 0xFF, 0xFF))
- if slide_width is None:
- slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx')
- slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000)
- # Check if footer already exists
- has_footer = False
- for shape in slide.shapes:
- if shape.has_text_frame and '数据来源' in shape.text_frame.text:
- has_footer = True
- break
- if has_footer:
- return
- bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, Emu(8824000), slide_width, Emu(320000))
- bar.fill.solid()
- bar.fill.fore_color.rgb = C_PRIMARY
- bar.line.fill.background()
- box = slide.shapes.add_textbox(Emu(762000), Emu(8824000), Emu(14000000), Emu(320000))
- p = box.text_frame.paragraphs[0]
- p.text = footer_text
- p.font.size = Pt(10)
- p.font.color.rgb = C_WHITE
- p.font.name = '微软雅黑'
- def _ensure_word_wrap_all(slide, fonts: dict = None):
- """Enable word_wrap on all text frames in a slide."""
- fonts = fonts or {}
- body_font = fonts.get('body_font', '微软雅黑')
- for shape in slide.shapes:
- if shape.has_text_frame:
- shape.text_frame.word_wrap = True
- for para in shape.text_frame.paragraphs:
- for run in para.runs:
- run.font.name = body_font
- # ==============================================================================
- # MATH HELPERS
- # ==============================================================================
- def _pct_val(curr, prev):
- if prev and prev != 0:
- return (curr - prev) / prev * 100
- return None
- def _format_pct(pct, with_sign=True, suffix='%', zero_suffix=''):
- """Safely format a percentage value. Returns '—' if pct is None."""
- if pct is None:
- return '—'
- sign = '+' if with_sign and pct >= 0 else ''
- return f"{sign}{pct:.1f}{suffix}{zero_suffix}"
- def _pct_str(curr, prev):
- if prev and prev != 0:
- pct = round((curr - prev) / prev * 100, 1)
- sign = '+' if pct >= 0 else ''
- return f"{sign}{pct}% vs 上期"
- return "—"
- def _safe_div(a, b):
- return round(a / b, 1) if b else 0
- # ==============================================================================
- # DYNAMIC / UNIVERSAL REPORT BUILDER
- # ==============================================================================
- def build_report(data_file: str, config: ReportConfig, output_path: str) -> str:
- master_path = _resolve_master_template(config)
- prs = Presentation(master_path)
- original_slide_count = len(prs.slides)
- df = load_generic_excel(data_file)
- if config.require_six_confirmations:
- confirmation_issues = validate_six_confirmations(config, list(df.columns))
- if confirmation_issues:
- raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues))
- data_profile = config.data_profiling or {}
- # Resolve template profile and dynamic layout context
- template_profile = _resolve_template_profile(config)
- ctx = LayoutContext.from_template_profile(template_profile)
- colors = _resolve_colors(config, template_profile)
- fonts = _resolve_fonts(config, template_profile)
- metrics = calc_generic_metrics(df, config)
- content_top = template_profile.get_content_top('content')
- total_pages = len([p for p in config.pages if p.selected])
- if total_pages == 0:
- total_pages = len(config.pages)
- for page_idx, page_def in enumerate(config.pages):
- if not page_def.selected:
- continue
- page_num = page_idx + 1
- if page_def.page_type == 'cover':
- _build_cover_page(prs, config, colors, fonts, template_profile)
- elif page_def.page_type == 'toc':
- _build_toc_page(prs, config, colors, fonts, template_profile)
- elif page_def.page_type == 'kpi_overview':
- _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df, data_profile, ctx)
- elif page_def.page_type == 'trend':
- if not _build_trend_page(prs, config, df, data_profile, colors, fonts, content_top, ctx):
- _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'distribution':
- if not _build_distribution_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx):
- _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'ranking':
- if not _build_ranking_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx):
- _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'summary':
- _build_summary_page(prs, config, metrics, data_profile, colors, fonts, content_top, page_def, ctx)
- elif _is_forecast_page_type(page_def.page_type):
- _build_forecast_page(prs, config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx)
- elif page_def.page_type == 'end':
- _build_end_page(prs, config, colors, fonts, template_profile)
- else:
- raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})')
- for slide in prs.slides:
- _ensure_word_wrap_all(slide, fonts)
- _delete_template_slides(prs, original_slide_count)
- prs.save(output_path)
- print(f"Report saved: {output_path}")
- return output_path
- def quality_assured_build(data_file: str, config: ReportConfig,
- output_path: str) -> tuple:
- if config.require_six_confirmations:
- df = load_generic_excel(data_file)
- confirmation_issues = validate_six_confirmations(config, list(df.columns))
- if confirmation_issues:
- raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues))
- template_profile = _resolve_template_profile(config)
- ctx = LayoutContext.from_template_profile(template_profile)
- colors = _resolve_colors(config, template_profile)
- inspector = QualityInspector(colors, ctx)
- return inspector.quality_assured_build(
- build_fn=lambda d, c: _build_without_save(d, c, config),
- data=data_file,
- config=config,
- output_path=output_path,
- )
- def _build_without_save(data_file, temp_config, original_config):
- from pptx import Presentation as Prs
- prs = Prs(_resolve_master_template(original_config))
- original_slide_count = len(prs.slides)
- df = load_generic_excel(data_file)
- data_profile = original_config.data_profiling or {}
- template_profile = _resolve_template_profile(original_config)
- ctx = LayoutContext.from_template_profile(template_profile)
- colors = _resolve_colors(original_config, template_profile)
- fonts = _resolve_fonts(original_config, template_profile)
- metrics = calc_generic_metrics(df, original_config)
- content_top = template_profile.get_content_top('content')
- for page_def in original_config.pages:
- if not page_def.selected:
- continue
- if page_def.page_type == 'cover':
- _build_cover_page(prs, original_config, colors, fonts, template_profile)
- elif page_def.page_type == 'kpi_overview':
- _build_kpi_overview_page(prs, original_config, metrics, colors, fonts, content_top, df, data_profile, ctx)
- elif page_def.page_type == 'trend':
- if not _build_trend_page(prs, original_config, df, data_profile, colors, fonts, content_top, ctx):
- _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'distribution':
- if not _build_distribution_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx):
- _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'ranking':
- if not _build_ranking_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx):
- _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx)
- elif page_def.page_type == 'summary':
- _build_summary_page(prs, original_config, metrics, data_profile, colors, fonts, content_top, page_def, ctx)
- elif _is_forecast_page_type(page_def.page_type):
- _build_forecast_page(prs, original_config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx)
- elif page_def.page_type == 'end':
- _build_end_page(prs, original_config, colors, fonts, template_profile)
- elif page_def.page_type == 'toc':
- _build_toc_page(prs, original_config, colors, fonts, template_profile)
- else:
- raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})')
- for slide in prs.slides:
- _ensure_word_wrap_all(slide, fonts)
- _delete_template_slides(prs, original_slide_count)
- return prs
- def _build_cover_page(prs, config, colors, fonts, template_profile):
- slide = _duplicate_master_slide(prs, template_profile, 'cover')
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{report_type}': '数据报告',
- '{date}': config.period_str or config.date_range[0].strftime('%Y年%m月%d日'),
- '{department}': config.source_label,
- '{period}': config.period_str,
- '{gen_time}': datetime.now().strftime('%Y-%m-%d %H:%M'),
- }, fonts)
- _remove_empty_cover_kpi_placeholders(slide)
- total = len([p for p in config.pages if p.selected]) or len(config.pages)
- _add_footer_if_missing(slide, f'数据来源:{config.source_label} | 1/{total}', slide_width=prs.slide_width, colors=colors)
- def _build_fallback_analysis_page(prs, config, page_def, df, profile, metrics, colors, fonts, content_top, ctx=None):
- """
- Fallback page builder: generates analysis text from available data
- when the primary page type cannot produce content (e.g. no time columns
- for trend, no category columns for distribution).
- Produces at least 4 deep analysis blocks with data citations.
- """
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
- page_title = page_def.title if page_def and page_def.title else f'{config.title}数据分析'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- num_cols = profile.get('numeric_columns', [])
- cat_cols = profile.get('category_columns', [])
- insight_items = []
- if num_cols:
- top_metric = num_cols[0]
- top_name = top_metric.get('inferred_label', top_metric['column_name'])
- top_vals = df[top_metric['column_name']].dropna()
- if len(top_vals) > 0:
- mean_val = top_vals.mean()
- max_val = top_vals.max()
- min_val = top_vals.min()
- median_val = top_vals.median()
- total_val = top_vals.sum()
- insight_items.append({
- 'title': f'{top_name}整体概览',
- 'content': f'报告周期内,{top_name}统计数据共包含 {len(top_vals)} 条有效记录。'
- f'总和为 {total_val:,.0f},平均值为 {mean_val:,.2f},中位数为 {median_val:,.2f}。'
- f'最大值为 {max_val:,.2f},最小值为 {min_val:,.2f}。'
- f'{"数据波动范围较大,最大值与最小值差距显著,说明不同条目间差异明显,建议深入分析极端值成因" if min_val > 0 and max_val / max(min_val, 1) > 100 else "数据整体分布较为均衡,波动性在合理范围内"}。'
- f'中位数与平均值的偏差反映了数据的{"右偏分布(少数大值拉高了均值),说明存在显著头部效应" if median_val < mean_val * 0.8 else "左偏分布" if median_val > mean_val * 1.2 else "较为对称,数据呈正态分布趋势"}。',
- })
- insight_items.append({
- 'title': f'{top_name}分段分析',
- 'content': f'对 {top_name} 进行四分段统计:上四分位数(25%数据高于此值)为 {top_vals.quantile(0.75):,.2f},'
- f'下四分位数(25%数据低于此值)为 {top_vals.quantile(0.25):,.2f},'
- f'四分位距(IQR)为 {top_vals.quantile(0.75) - top_vals.quantile(0.25):,.2f}。'
- f'{"IQR较大,数据分布较为离散,不同类别的表现差异明显,需关注尾部类别的提升空间" if (top_vals.quantile(0.75) - top_vals.quantile(0.25)) > abs(mean_val) * 0.5 else "IQR在合理范围内,数据集中度较好"}。'
- f'建议按四分位将数据分为四组,重点跟踪上四分位组的表现,识别可复制的成功因素。',
- })
- if cat_cols and num_cols:
- cat = cat_cols[0]
- cat_name = cat.get('inferred_label', cat['column_name'])
- num = num_cols[0]
- num_name = num.get('inferred_label', num['column_name'])
- cat_unique = df[cat['column_name']].dropna().nunique()
- insight_items.append({
- 'title': f'{cat_name}分类覆盖分析',
- 'content': f'数据共覆盖 {cat_unique} 个不同的{cat_name},在 {num_name} 维度上呈现差异化分布。'
- f'不同{cat_name}对整体{num_name}的贡献度各异,建议按贡献度大小将{cat_name}进行分类管理。'
- f'高贡献类别应重点维护和深度挖掘,中等贡献类别需持续培育和资源投入,'
- f'低贡献类别可评估其战略价值,适当调整投入节奏。建议建立分类分级管理体系,'
- f'每月跟踪各类别的变化趋势和占比波动。',
- })
- if len(num_cols) >= 2:
- num1 = num_cols[0]
- num2 = num_cols[1]
- ratio = df[num1['column_name']].sum() / max(df[num2['column_name']].sum(), 1)
- insight_items.append({
- 'title': '关键比率与效率指标',
- 'content': f'{num1.get("inferred_label", num1["column_name"])}与{num2.get("inferred_label", num2["column_name"])}的比率为 {ratio:.2f},'
- f'该比率是衡量业务效率的重要参考指标。'
- f'{"比率处于较高水平,表明单位投入产出效率良好" if ratio > 1 else "比率偏低,单位投入的产出效益有限,存在效率提升空间"}。'
- f'建议将此比率纳入定期监控指标,按月环比追踪变化趋势,'
- f'并针对低比率项目制定专项提升计划,分析制约因素和可优化环节。',
- })
- insight_items.append({
- 'title': '数据质量与代表性评估',
- 'content': f'本报告基于共 {len(df)} 条记录进行分析,数据覆盖范围包括上述多个维度。'
- f'建议在后续周期中持续关注数据完整性和及时性,确保分析结果准确反映业务真实情况。'
- f'对于数据量较小或集中度较高的维度,应结合业务判断进行解读,避免以偏概全。'
- f'同时建议补充更多维度的数据(如时间序列数据、竞品对标数据等),'
- f'以支撑更全面的分析视角和更精准的决策建议。',
- })
- if not insight_items:
- insight_items = [{
- 'title': '数据总览',
- 'content': f'当前数据集包含 {len(df)} 条记录,{len(df.columns)} 个字段。'
- f'数值字段 {len(num_cols)} 个,分类字段 {len(cat_cols)} 个。'
- f'建议结合业务场景规划具体的数据分析维度,'
- f'以生成更具洞察力和指导意义的数据报告。',
- }]
- if num_cols and len(df) > 0:
- top_col = num_cols[0]
- chart_zone = get_chart_left_zone(content_top, 0.4, ctx=ctx)
- text_zone = get_insight_right_zone(content_top, 0.4, ctx=ctx)
- sample_vals = df[top_col['column_name']].dropna().head(10).tolist()
- sample_labels = [f'记录{i+1}' for i in range(len(sample_vals))]
- if sample_vals:
- add_bar_chart(slide, sample_labels, sample_vals,
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(chart_zone.height),
- series_name=top_col.get('inferred_label', top_col['column_name']),
- color=colors.get('primary'))
- _add_structured_insight(slide, insight_items,
- Emu(text_zone.x), Emu(text_zone.y),
- Emu(text_zone.width), Emu(text_zone.height))
- else:
- zone = get_full_width_zone(content_top, ctx=ctx)
- _add_structured_insight(slide, insight_items,
- Emu(zone.x), Emu(zone.y),
- Emu(zone.width), Emu(zone.height))
- def _build_toc_page(prs, config, colors, fonts, template_profile):
- slide = _duplicate_master_slide(prs, template_profile, 'toc')
- active_pages = [p for p in config.pages if p.selected and p.page_type not in ('cover', 'toc', 'end')]
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': '目录',
- '{source}': config.source_label,
- '{period}': f'2/{len(config.pages)}',
- '{page_num}': '',
- }, fonts)
- for i, page in enumerate(active_pages[:6], 1):
- _replace_placeholder(slide, f'{{chapter{i}_title}}', page.title, fonts)
- _replace_placeholder(slide, f'{{chapter{i}_desc}}', page.conclusion_title or page.title, fonts)
- def _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df=None, profile=None, ctx=None):
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), 'content')
- page_title = '核心指标概览'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- kpi_items = []
- primary_vals = {}
- all_vals = {}
- for md in config.metrics:
- if md.metric_type.value == 'kpi' and md.selected:
- val = metrics.get(md.name, 0)
- display_val = format(val, md.format_spec) if isinstance(val, (int, float)) else str(val)
- kpi_items.append({
- 'label': md.label,
- 'value': display_val,
- 'unit': md.unit,
- 'change': '',
- 'sub': '',
- })
- if md.is_primary:
- primary_vals[md.label] = val
- all_vals[md.label] = val
- if kpi_items:
- kpi_count = len(kpi_items)
- if kpi_count <= 3:
- _add_kpi_cards(slide, kpi_items, start_y=Emu(content_top))
- else:
- shown_kpis = kpi_items[:9]
- compact_card_h = Emu(1780000) if len(shown_kpis) <= 6 else Emu(1600000)
- kpi_bottom = _add_compact_kpi_cards(
- slide,
- shown_kpis,
- start_y=Emu(content_top),
- card_h=compact_card_h,
- gap_y=Emu(220000),
- )
- insight_items = []
- kpi_names = [m.label for m in config.metrics if m.selected]
- kpi_str = "、".join(kpi_names[:6]) if kpi_names else "各指标"
- if len(kpi_names) > 6:
- kpi_str += f'等{len(kpi_names)}项'
- primary_kpis = [m for m in config.metrics if m.is_primary and m.selected]
- if not primary_kpis:
- primary_kpis = [m for m in config.metrics if m.selected][:3]
- kpi_detail_parts = []
- for i, pk in enumerate(primary_kpis):
- val = all_vals.get(pk.label, 0)
- unit_str = pk.unit if pk.unit else ''
- display_val = format(val, pk.format_spec) if isinstance(val, (int, float)) else str(val)
- kpi_detail_parts.append(f'{pk.label}: {display_val}{unit_str}')
- insight_items.append({
- 'title': '核心数据概览',
- 'content': f'本期报告涵盖 {kpi_str} 共 {len(kpi_names)} 项核心指标。'
- f'{";".join(kpi_detail_parts[:4])}。'
- f'其中{"、".join(p.label for p in primary_kpis[:3])}为本次分析的重点关注指标。'
- f'建议将这些指标与历史同期数据进行纵向对比,以及与行业基准进行横向对标,以全面评估当前业务健康度。'
- f'对于波动较大的指标,需深入追溯其背后的业务动因,判断是否为趋势性变化还是季节性波动。',
- })
- cat_cols = profile.get('category_columns', []) if profile else []
- num_cols = profile.get('numeric_columns', []) if profile else []
- total_rows = profile.get('total_rows', 0) if profile else 0
- if cat_cols:
- top_cats = [c.get('inferred_label', c.get('column_name', '')) for c in cat_cols[:3]]
- cat_details = []
- for c in cat_cols[:3]:
- uc = c.get('unique_count', 'N/A')
- cat_details.append(f'{c.get("inferred_label", c.get("column_name", ""))}({uc}类)')
- insight_items.append({
- 'title': '数据覆盖与维度分析',
- 'content': f'数据覆盖 {total_rows:,} 条记录,包含 {", ".join(cat_details)} 等多个分析维度。'
- f'丰富的维度数据支持从 {", ".join(top_cats)} 等角度进行多维度联动分析。'
- f'建议关注各维度下的数据分布特征,识别高贡献或异常的分类群体,'
- f'针对性地分析不同维度的表现差异,为精细化运营和数据驱动决策提供支撑。',
- })
- if len(config.metrics) >= 3:
- compare_items = []
- for a, b in zip(primary_kpis[:2], primary_kpis[1:3]):
- va = all_vals.get(a.label, 0)
- vb = all_vals.get(b.label, 0)
- if va and vb:
- ratio = round(va / vb, 2) if vb else 0
- compare_items.append(f'{a.label}与{b.label}的比值为 {ratio}')
- if compare_items:
- insight_items.append({
- 'title': '指标间关联分析',
- 'content': f'{";".join(compare_items)}。通过指标间的比值关系可以发现数据的内在规律,'
- f'比值异常偏离正常区间时需重点关注。建议进一步计算各指标与核心业务目标之间的相关系数,'
- f'量化不同指标对业务目标的影响力排序,将有限资源聚焦在驱动型指标上。',
- })
- else:
- insight_items.append({
- 'title': '指标间关联分析',
- 'content': f'本期核心指标包括 {", ".join(p.label for p in primary_kpis[:3])}。'
- f'建议通过散点图或相关系数分析探索指标间的线性/非线性关系,识别是否存在协同或对冲效应。'
- f'同时建议按时间序列分析各指标的周期性规律,为资源配置和预测提供依据。',
- })
- insight_items.append({
- 'title': '关键发现与行动建议',
- 'content': f'综合分析 {len(kpi_names)} 项指标,建议重点关注以下方向:'
- f'(1) 定期监控核心指标的趋势变化,建立异常预警机制,当指标偏离正常区间时及时触发排查流程;'
- f'(2) 深化多维度交叉分析,挖掘不同群体间的结构差异,识别增长机会和风险点;'
- f'(3) 结合业务经验和外部数据,验证数据指标的准确性和合理性;'
- f'(4) 将分析结论转化为可执行的具体行动项,明确责任人和时间节点,建立跟踪闭环机制。',
- })
- if kpi_count > 9:
- extra_names = '、'.join(k['label'] for k in kpi_items[9:15])
- insight_items.append({
- 'title': '更多核心指标说明',
- 'content': f'本页优先展示前 9 个核心指标,其余 {kpi_count - 9} 个指标(如 {extra_names})'
- f'已纳入综合分析口径。建议在页面结构确认阶段将核心指标按“结果指标、过程指标、风险指标”分组,'
- f'必要时拆分为多页 KPI 看板,以保证每个指标都有足够的解释空间。',
- })
- if kpi_count <= 3:
- kpi_grid_bottom = int(content_top) + Emu(3048000)
- else:
- kpi_grid_bottom = max(kpi_bottom, int(content_top) + Emu(1780000))
- insight_zone_y = kpi_grid_bottom + Emu(254000)
- remaining_height = int(FOOTER_TOP - insight_zone_y - Emu(140000))
- if remaining_height >= Emu(950000):
- if kpi_count <= 3:
- compact_items = insight_items[:3]
- else:
- compact_items = insight_items[:3] if kpi_count <= 6 else insight_items[:4]
- _add_structured_insight(slide, compact_items,
- Emu(CONTENT_LEFT), Emu(insight_zone_y),
- Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(remaining_height),
- title_size=Pt(10), body_size=Pt(9), min_body_size=Pt(8))
- elif kpi_count > 3:
- fallback_top = max(insight_zone_y, int(FOOTER_TOP) - int(Emu(1250000)))
- fallback_height = int(FOOTER_TOP - fallback_top - Emu(120000))
- fallback_items = insight_items[:2]
- _add_structured_insight(slide, fallback_items,
- Emu(CONTENT_LEFT), Emu(fallback_top),
- Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(max(fallback_height, Emu(850000))),
- title_size=Pt(9), body_size=Pt(8), min_body_size=Pt(7))
- def _build_trend_page(prs, config, df, profile, colors, fonts, content_top, ctx=None):
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
- time_cols = profile.get('time_columns', [])
- num_cols = profile.get('numeric_columns', [])
- if not time_cols or not num_cols:
- _remove_slide(prs, slide)
- return False
- time_col = time_cols[0]['column_name']
- metric_col = num_cols[0]['column_name']
- label = num_cols[0].get('inferred_label', metric_col)
- page_title = f'{label}趋势'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- trend_data = calc_generic_trend(df, time_col, metric_col)
- if trend_data.get('dates'):
- chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx)
- text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx)
- add_line_chart(slide, trend_data['dates'], trend_data['values'],
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(chart_zone.height),
- series_name=label, color=colors.get('primary'))
- dates = trend_data['dates']
- vals = trend_data['values']
- n = len(vals)
- first_v, last_v = vals[0], vals[-1]
- change = last_v - first_v
- change_pct = round(change / first_v * 100, 1) if first_v else 0
- max_v = max(vals) if vals else 0
- min_v = min(vals) if vals else 0
- max_idx = vals.index(max_v) if vals else 0
- min_idx = vals.index(min_v) if vals else 0
- peak_date = dates[max_idx] if max_idx < len(dates) else 'N/A'
- trough_date = dates[min_idx] if min_idx < len(dates) else 'N/A'
- direction_text = '上升' if change > 0 else '下降' if change < 0 else '平稳'
- volatility = round((max_v - min_v) / (sum(vals) / n) * 100, 1) if sum(vals) else 0 if vals else 0
- insight_items = [
- {
- 'title': f'{label}整体趋势概况',
- 'content': f'在报告周期内共采集 {n} 个时间点的数据,{label}'
- f'从 {dates[0]} 的 {first_v:,.0f} 变动至 {dates[-1]} 的 {last_v:,.0f},'
- f'整体{direction_text}{abs(change_pct):.1f}%,{direction_text}趋势{"显著" if abs(change_pct) > 20 else "温和" if abs(change_pct) > 5 else "较为平缓"}。'
- f'数据变化轨迹反映出{"持续向好的增长态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和改善的积极信号" if direction_text == "上升" else "回调盘整的阶段性特征" if direction_text == "下降" else "平稳运行的基本状态"},'
- f'建议将当前趋势与业务目标和历史同期数据进行交叉对比,评估达成全年目标的可行性。如需更详尽的趋势分析,建议增加数据采集频度和时间跨度。',
- },
- {
- 'title': '峰值与谷值分析',
- 'content': f'周期内最高值出现在 {peak_date},为 {max_v:,.0f};'
- f'最低值出现在 {trough_date},为 {min_v:,.0f}。'
- f'极值差距 {max_v - min_v:,.0f},波动幅度 {volatility}%,'
- f'{"波动显著,需关注异常节点的驱动因素,建议排查是否受节假日、促销活动、外部政策变化等因素影响" if volatility > 30 else "波动在可控范围内,但仍需对异常波动保持警觉"}{"." if volatility > 30 else ",建立异常值的快速预警和响应机制。"}',
- },
- {
- 'title': '趋势阶段性特征',
- 'content': f'前半程({dates[0]}至{dates[min(n//2, n-1)]})'
- f'{"呈上升态势" if sum(vals[:n//2]) < sum(vals[n//2:]) else "呈下降态势" if sum(vals[:n//2]) > sum(vals[n//2:]) else "基本持平"},'
- f'后半程均值为 {sum(vals[n//2:])/(n-n//2):,.0f}。建议结合业务事件节点深入分析拐点成因,'
- f'重点关注是否存在季节性波动、周期性波动或外部冲击等结构性因素。'
- f'若数据量较少,趋势解读应以业务经验为主,辅以数据验证。',
- },
- {
- 'title': '业务启示',
- 'content': f'综合趋势分析,当前数据反映出{"积极向好的发展态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和稳定的运行动态" if abs(change_pct) <= 10 else "需重点关注的下行风险"}。'
- f'建议{"加大资源投入以把握增长机遇,同时关注增速的可持续性,避免盲目扩张" if direction_text == "上升" else "排查下降原因并制定针对性应对措施,分析是短期波动还是长期趋势转折" if direction_text == "下降" else "保持当前运营节奏,同时关注潜在变化信号,适时调整策略" if direction_text == "平稳" else "继续观察数据走势"}。'
- f'建议将数据与业务KPI目标进行对标分析,定期回顾趋势变化。',
- },
- ]
- _add_structured_insight(slide, insight_items,
- Emu(text_zone.x), Emu(text_zone.y),
- Emu(text_zone.width), Emu(text_zone.height))
- return True
- return False
- def _build_distribution_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None):
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
- cat_cols = profile.get('category_columns', [])
- num_cols = profile.get('numeric_columns', [])
- if not cat_cols:
- _remove_slide(prs, slide)
- return False
- elem = (page_def.elements or [{}])[0] if page_def else {}
- cat_col = elem.get('category') or cat_cols[0]['column_name']
- cat_label = elem.get('category_label') or next(
- (c.get('inferred_label', cat_col) for c in cat_cols if c['column_name'] == cat_col), cat_col)
- metric_col = elem.get('metric') or (num_cols[0]['column_name'] if num_cols else None)
- metric_label = elem.get('metric_label') or (next(
- (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) if metric_col else '')
- page_title = page_def.title if page_def and page_def.title else f'{cat_label}分布'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- dist = calc_generic_distribution(df, cat_col, metric_col, top_n=8)
- if dist.get('categories'):
- chart_zone = get_chart_left_zone(content_top, 0.55, ctx=ctx)
- text_zone = get_insight_right_zone(content_top, 0.55, ctx=ctx)
- if len(dist['categories']) <= 8:
- add_doughnut_chart(slide, dist['categories'], dist['values'],
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(chart_zone.height),
- colors=colors.get('series'))
- else:
- add_bar_chart(slide, dist['categories'], dist['values'],
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(chart_zone.height),
- series_name=metric_label, color=colors.get('primary'))
- cats, vals, pcts = dist['categories'], dist['values'], dist['percentages']
- grand_total = sum(vals)
- top3_pct = sum(pcts[:3])
- top1_name, top1_val, top1_pct = cats[0], vals[0], pcts[0]
- metric_suffix = metric_label if metric_label else '数量'
- insight_items = [
- {
- 'title': f'{cat_label}分布概况',
- 'content': f'共有 {len(cats)} 个不同的{cat_label},覆盖范围'
- f'{"广泛" if len(cats) >= 8 else "较为丰富" if len(cats) >= 5 else "相对集中"}。'
- f'前3名合计占比 {top3_pct:.1f}%,集中度'
- f'{"较高,呈现显著的头部集中特征" if top3_pct > 70 else "中等,呈现梯度递减分布" if top3_pct > 50 else "较低,分布较为均衡"}。',
- },
- {
- 'title': f'排名第一: {top1_name}',
- 'content': f'{top1_name}以 {top1_val:,}{metric_suffix}(占比 {top1_pct:.1f}%)位居榜首,'
- f'{"是第二名" + cats[1] + "的" + f"{round(top1_val/vals[1],1)}" + "倍,优势极为显著" if len(cats) > 1 else "是该维度中最重要的类别"}。'
- f'该类别贡献了超过三分之一的{metric_label},是整体业务的基本盘和核心增长极。',
- },
- ]
- if len(vals) >= 3:
- top3_sum = sum(vals[:3])
- tail_sum = sum(vals[3:])
- tail_pct = sum(pcts[3:])
- insight_items.append({
- 'title': '长尾分布特征',
- 'content': f'前三名累计 {top3_sum:,}{metric_suffix}({top3_pct:.1f}%),'
- f'剩余 {len(cats)-3} 个合计 {tail_sum:,}{metric_suffix}({tail_pct:.1f}%),'
- f'属于{"头部集中型分布" if top3_pct > 70 else "相对均衡分布" if top3_pct < 50 else "梯度递减型分布"}。'
- f'头部贡献了绝大部分{metric_label},尾部虽数量众多但单个贡献有限。',
- })
- if len(vals) > 1:
- avg_val = sum(vals) / len(vals)
- cv = round(vals[0] / avg_val, 1) if avg_val else 0
- median_idx = len(vals) // 2
- median_val = vals[median_idx]
- insight_items.append({
- 'title': '差异化与离散度分析',
- 'content': f'排名第一的{cat_label}{top1_name}的{metric_suffix}是全部分类均值的 {cv} 倍,'
- f'中位数分类(第{median_idx+1}名)为 {median_val:,}{metric_suffix},'
- f'表明该维度{"差异化显著,资源集中度较高" if cv > 3 else "差异化适中,各分类间差距可控" if cv > 1.5 else "分布较为均匀"}。'
- f'头部与中位数的差距反映了{cat_label}维度上的分层特征,是运营资源重点倾斜方向。',
- })
- insight_items.append({
- 'title': '业务启示',
- 'content': f'建议重点关注 {cats[0]} 的增量拓展与存量维护,同时深入分析排名中位类别的提升空间。'
- f'对于 {metric_label}贡献较小的尾部类别(如占比低于3%的分类),可评估是否优化资源配置、'
- f'调整运营策略或将资源向高回报类别倾斜。结合{cat_label}维度持续跟踪分布变化,及时把握结构性机会。',
- })
- _add_structured_insight(slide, insight_items,
- Emu(text_zone.x), Emu(text_zone.y),
- Emu(text_zone.width), Emu(text_zone.height))
- return True
- return False
- def _build_ranking_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None):
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
- cat_cols = profile.get('category_columns', [])
- num_cols = profile.get('numeric_columns', [])
- if not cat_cols or not num_cols:
- _remove_slide(prs, slide)
- return False
- elem = (page_def.elements or [{}])[0] if page_def else {}
- rank_col = elem.get('category') or cat_cols[-1]['column_name']
- rank_label = elem.get('category_label') or next(
- (c.get('inferred_label', rank_col) for c in cat_cols if c['column_name'] == rank_col), rank_col)
- metric_col = elem.get('metric') or num_cols[0]['column_name']
- metric_label = elem.get('metric_label') or next(
- (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col)
- page_title = page_def.title if page_def and page_def.title else f'{rank_label}TOP排行'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- ranking = calc_generic_ranking(df, rank_col, metric_col, top_n=15)
- if ranking:
- chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx)
- text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx)
- names = [r['name'] for r in ranking]
- vals = [r['value'] for r in ranking]
- add_bar_chart(slide, names, vals,
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(chart_zone.height),
- series_name=metric_label, color=colors.get('primary'))
- total_val = sum(vals)
- top3_names = [r['name'] for r in ranking[:3]]
- top3_vals = [r['value'] for r in ranking[:3]]
- top3_pct = [round(v / total_val * 100, 1) for v in top3_vals] if total_val else [0, 0, 0]
- top1_vs_last = round(vals[0] / vals[-1], 1) if len(vals) > 1 and vals[-1] > 0 else 'N/A'
- insight_items = [
- {
- 'title': f'{rank_label}TOP排行概况',
- 'content': f'共展示 {len(ranking)} 个排名项,前3名分别为 {top3_names[0]}、{top3_names[1]}、'
- f'{top3_names[2]},累计 {sum(top3_vals):,}{metric_label}({sum(top3_pct):.1f}%)。'
- f'前三名合计贡献超过总量的三分之一,表明{rank_label}维度呈现{"显著的头部集中特征" if sum(top3_pct) > 60 else "梯度递减的分布格局" if sum(top3_pct) > 40 else "相对均衡的分布态势"}。',
- },
- {
- 'title': f'榜首分析: {top3_names[0]}',
- 'content': f'{top3_names[0]}以 {top3_vals[0]:,}{metric_label}(占比 {top3_pct[0]:.1f}%)位居榜首,'
- f'{"是第2名" + top3_names[1] + "的" + f"{round(top3_vals[0]/top3_vals[1],1)}倍,领先优势显著" if len(ranking) > 1 and top3_vals[1] > 0 else "优势突出"}。'
- f'作为排名第一的{rank_label},其业绩表现直接影响整体业务大盘,建议重点关注其可持续增长策略。',
- },
- {
- 'title': '头部与尾部差距分析',
- 'content': f'第1名与第{len(ranking)}名差距达 {top1_vs_last} 倍,'
- f'前5名平均 {round(sum(vals[:5])/5):,}{metric_label},'
- f'后5名平均 {round(sum(vals[-5:])/5):,}{metric_label},'
- f'前后差距约 {round((sum(vals[:5])/5)/(sum(vals[-5:])/5),1) if sum(vals[-5:]) > 0 else "N/A"} 倍。'
- f'{"头部效应极为明显,需关注是否因资源分配不均导致" if isinstance(top1_vs_last, float) and top1_vs_last > 10 else "差距较为显著,存在分层优化的空间" if isinstance(top1_vs_last, float) and top1_vs_last > 5 else "梯度分布相对均衡,可针对性提升各层级表现"}。',
- },
- {
- 'title': '累计贡献率与分层分析',
- 'content': f'前5名累计贡献 {sum(vals[:5]):,}{metric_label}({round(sum(vals[:5])/total_val*100,1) if total_val else 0}%),'
- f'前10名累计贡献 {sum(vals[:10]):,}{metric_label}({round(sum(vals[:10])/total_val*100,1) if total_val else 0}%),'
- f'剩余 {len(ranking)-10} 名合计贡献 {sum(vals[10:]):,}{metric_label}({round(sum(vals[10:])/total_val*100,1) if total_val else 0}%)。'
- f'从分层结构来看,可划分为三个梯队:第一梯队(前3名)为业绩核心贡献者,第二梯队(第4-8名)为稳定输出层,'
- f'第三梯队(第9名及以后)为潜力提升层。',
- },
- {
- 'title': '业务建议',
- 'content': f'重点关注 {", ".join(top3_names)} 的发展动态,提炼其成功经验并推广至团队。'
- f'对于排名靠后的{rank_label},可评估其增长潜力与资源匹配度,'
- f'识别可突破的增量空间。建议建立{rank_label}的绩效考核与激励体系,'
- f'通过标杆带动和梯队培养实现整体业绩提升。',
- },
- ]
- _add_structured_insight(slide, insight_items,
- Emu(text_zone.x), Emu(text_zone.y),
- Emu(text_zone.width), Emu(text_zone.height))
- return True
- return False
- def _build_summary_page(prs, config, metrics, profile, colors, fonts, content_top, page_def=None, ctx=None):
- slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content")
- page_title = page_def.title if page_def and page_def.title else '总结与建议'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- elem = (page_def.elements or [{}])[0] if page_def else {}
- if elem.get('support_status') is not None:
- status = elem['support_status']
- dept = elem.get('support_by_dept', {})
- sc = elem.get('support_count', 0)
- cc = elem.get('closed_count', 0)
- close_rate = round(cc / sc * 100, 1) if sc else 0
- fully_closed = status.get('已闭环', 0)
- partial_closed = status.get('部分闭环', 0)
- not_closed = status.get('未闭环', 0)
- insight_items = [{
- 'title': '支持需求总览',
- 'content': f'本期共产生 {sc} 项跨部门支持需求,其中已闭环 {cc} 项(含完全闭环 {fully_closed} 项、部分闭环 {partial_closed} 项),'
- f'闭环率 {close_rate}%。未闭环需求 {sc - cc} 项(占比 {round((sc-cc)/sc*100,1) if sc else 0}%),'
- f'闭环率{"较高,跨部门协作效率良好" if close_rate >= 60 else "处于中等水平,仍有提升空间" if close_rate >= 30 else "偏低,需重点关注闭环推动"}。'
- f'跨部门支持是保障项目推进的重要环节,高效的闭环机制有助于提升客户满意度和订单转化效率。',
- }]
- if status:
- total_status = sum(status.values())
- fully_pct = round(fully_closed / total_status * 100, 1) if total_status else 0
- partial_pct = round(partial_closed / total_status * 100, 1) if total_status else 0
- not_pct = round(not_closed / total_status * 100, 1) if total_status else 0
- insight_items.append({
- 'title': '闭环状态明细',
- 'content': f'已闭环 {fully_closed} 项({fully_pct}%)、部分闭环 {partial_closed} 项({partial_pct}%)、'
- f'未闭环 {not_closed} 项({not_pct}%)。'
- f'其中完全闭环占比{"超过七成,闭环质量较高" if fully_pct >= 70 else "处于中等水平" if fully_pct >= 40 else "偏低,需提升闭环完整性"}。'
- f'部分闭环表明需求已部分满足但未完全解决,需持续跟踪至彻底闭环。',
- })
- if dept:
- dept_top = list(dept.items())[:5]
- dept_top_sum = sum(v for _, v in dept_top)
- dept_total = sum(dept.values())
- dept_str = '、'.join([f'{k}({v}项)' for k, v in dept_top])
- avg_dept_load = round(dept_total / len(dept), 1) if dept else 0
- max_dept = dept_top[0]
- insight_items.append({
- 'title': '支持部门工作量分布',
- 'content': f'需求覆盖 {len(dept)} 个部门/科室,前5个部门承接 {dept_top_sum} 项({round(dept_top_sum/dept_total*100,1) if dept_total else 0}%)。'
- f'Top部门:{dept_str}。其中{max_dept[0]}承接最多({max_dept[1]}项),'
- f'平均每个部门承接 {avg_dept_load} 项。请关注工作量较大的部门资源分配是否充足,'
- f'同时识别是否有部门长期未被分配需求(可能表明资源未充分利用)。',
- })
- if sc - cc > 0:
- insight_items.append({
- 'title': '未闭环需求跟进建议',
- 'content': f'当前仍有 {sc - cc} 项需求未完成闭环。建议按以下策略推进:第一,按紧急程度和影响范围对未闭环需求进行优先级排序,'
- f'高优需求指定专人负责限期解决;第二,建立周度闭环跟踪机制,定期更新需求处理进展;'
- f'第三,对于跨部门协同的复杂需求,建议指定牵头部门统筹协调推进,'
- f'并建立问题升级机制(当需求超期未解决时自动升级至更高层级协调)。',
- })
- insight_items.append({
- 'title': '闭环效率提升建议',
- 'content': f'为持续提升支持需求闭环效率,建议:一是建立标准化的需求流转流程,明确各环节责任人和响应时限;'
- f'二是定期开展闭环案例复盘,提炼最佳实践并在团队内推广;'
- f'三是建立闭环率考核指标,将闭环时效纳入部门协作评价体系,'
- f'通过制度保障跨部门协作的效率和质量。',
- })
- else:
- insight_items = generate_generic_insights(profile, metrics)
- insight_items = _ensure_min_insight_items(
- insight_items,
- profile=profile,
- metrics=metrics,
- min_count=2,
- context_label='总结页',
- )
- zone = get_full_width_zone(content_top, ctx=ctx)
- _add_structured_insight(slide, insight_items,
- Emu(zone.x), Emu(zone.y),
- Emu(zone.width), Emu(zone.height))
- def _build_end_page(prs, config, colors, fonts, template_profile):
- slide = _duplicate_master_slide(prs, template_profile, "end")
- total = len([p for p in config.pages if p.selected])
- _add_footer_if_missing(slide, f'数据来源:{config.source_label} | {total}/{total}', colors=colors)
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str or '',
- '{department}': config.source_label,
- }, fonts)
- # Remove empty KPI placeholders on end page (same as cover)
- _remove_empty_cover_kpi_placeholders(slide)
- def _find_metric_def_by_column(config, column):
- for metric in getattr(config, 'metrics', []) or []:
- if getattr(metric, 'column', None) == column:
- return metric
- return None
- def _forecast_items_from_page_def(page_def, df, profile, metrics, config):
- elem = (page_def.elements or [{}])[0] if page_def else {}
- items = []
- explicit_items = elem.get('forecast_items') or elem.get('goals')
- if explicit_items:
- for idx, item in enumerate(explicit_items[:6], 1):
- title = item.get('title') or item.get('label') or f'预测项{idx}'
- value = item.get('value') or item.get('number') or item.get('target') or 0
- items.append({'title': str(title), 'number': value})
- return items
- metric_names = elem.get('metrics') or elem.get('metric_names') or []
- for metric_name in metric_names[:6]:
- if metric_name in metrics:
- metric_def = next((m for m in getattr(config, 'metrics', []) if m.name == metric_name), None)
- label = metric_def.label if metric_def else str(metric_name)
- items.append({'title': label, 'number': metrics.get(metric_name, 0)})
- if items:
- return items
- num_cols = profile.get('numeric_columns', []) if profile else []
- keyword_cols = []
- keywords = ('预测', 'forecast', '目标', '计划', 'target', 'plan')
- for col in num_cols:
- col_name = col.get('column_name', '')
- label = col.get('inferred_label', col_name)
- if any(k in str(col_name).lower() or k in str(label).lower() for k in keywords):
- keyword_cols.append(col)
- for col in keyword_cols[:6]:
- col_name = col.get('column_name')
- metric_def = _find_metric_def_by_column(config, col_name)
- label = metric_def.label if metric_def else col.get('inferred_label', col_name)
- if metric_def and metric_def.name in metrics:
- value = metrics.get(metric_def.name, 0)
- elif col_name in df.columns:
- series = df[col_name].dropna()
- value = int(series.sum()) if not series.empty else 0
- else:
- value = 0
- items.append({'title': label, 'number': value})
- return items
- def _generic_forecast_insights(page_def, forecast_items, profile, metrics):
- title = page_def.title if page_def else '预测与行动计划'
- total = sum(float(item.get('number') or 0) for item in forecast_items)
- item_desc = '、'.join(f"{item['title']} {item.get('number', 0):,.0f}" for item in forecast_items[:5])
- if forecast_items:
- return [
- {
- 'title': f'{title}目标概览',
- 'content': f'本页围绕已确认的预测/计划指标展开,当前纳入 {len(forecast_items)} 个量化项,'
- f'合计规模约 {total:,.0f}。主要项目包括:{item_desc}。'
- f'这些指标应与本期实际结果、历史同期和资源约束一起判断,避免只看单点预测值。',
- },
- {
- 'title': '达成路径与风险控制',
- 'content': f'建议将预测目标拆解为“责任人、关键动作、时间节点、风险预案”四类信息。'
- f'如果目标值明显高于本期实际表现,应同步确认新增订单、库存、产能、交付或预算等支撑条件;'
- f'如果目标值低于当前趋势,则需要说明保守假设,防止业务团队误判资源投入强度。',
- },
- ]
- total_rows = profile.get('total_rows', 0) if profile else 0
- return [
- {
- 'title': f'{title}口径说明',
- 'content': f'当前页面未检测到明确的预测或目标数值字段,因此以数据画像和核心指标进行预测口径说明。'
- f'本期数据覆盖 {total_rows or "若干"} 条记录,建议在六项确认阶段明确预测指标、目标字段和统计口径,'
- f'例如下月交付、销售目标、库存消化、需求闭环或风险事件数量。',
- },
- {
- 'title': '补充数据建议',
- 'content': f'为了生成更可靠的预测页,建议在源数据中补充至少一个预测/目标字段,并提供历史实际值用于校准。'
- f'报告生成后应检查预测值是否与图表一致,文字洞察是否说明关键假设、达成路径和偏差处理机制。',
- },
- ]
- def _build_forecast_page(prs, config, df, profile, metrics, colors, content_top, page_def=None):
- slide = _duplicate_slide(prs, prs.slides[1])
- page_title = page_def.title if page_def and page_def.title else '预测与行动计划'
- _replace_all_placeholders(slide, {
- '{report_title}': config.title,
- '{date}': config.period_str,
- '{page_title}': page_title,
- '{source}': config.source_label,
- '{period}': '',
- '{page_num}': '',
- }, fonts)
- forecast_items = _forecast_items_from_page_def(page_def, df, profile, metrics, config)
- if not forecast_items and metrics.get('next_month_goals'):
- forecast_items = [
- {'title': g['title'].split(':')[0], 'number': g.get('number', 0)}
- for g in metrics.get('next_month_goals', [])[:6]
- ]
- chart_zone = get_chart_left_zone(content_top, 0.58, ctx=ctx)
- text_zone = get_insight_right_zone(content_top, 0.58, ctx=ctx)
- if forecast_items:
- names = [item['title'] for item in forecast_items[:6]]
- values = [float(item.get('number') or 0) for item in forecast_items[:6]]
- add_column_chart(slide, names, values,
- Emu(chart_zone.x), Emu(chart_zone.y),
- Emu(chart_zone.width), Emu(min(chart_zone.height, Emu(5100000))),
- series_name='预测/目标值', color=colors.get('accent', C_ACCENT),
- category_axis_title='预测项', value_axis_title='数值')
- insight_items = _generic_forecast_insights(page_def, forecast_items, profile, metrics)
- insight_items = _ensure_min_insight_items(insight_items, profile, metrics, context_label='预测页')
- _add_structured_insight(slide, insight_items,
- Emu(text_zone.x), Emu(text_zone.y),
- Emu(text_zone.width), Emu(text_zone.height))
- # ==============================================================================
- # CLI
- # ==============================================================================
- if __name__ == '__main__':
- import sys
- if len(sys.argv) >= 3:
- from report_config import load_report_config
- data_file = sys.argv[1]
- config_file = sys.argv[2]
- output = sys.argv[3] if len(sys.argv) >= 4 else 'output.pptx'
- config = load_report_config(config_file)
- quality_assured_build(data_file, config, output)
- else:
- print("Usage: python ppt_builder.py <data_file> <config_file> [output_path]")
|