""" PPT builder: assemble daily/weekly/monthly reports by duplicating master templates and filling charts, tables, KPI cards, and structured insight text blocks. Key design principle: Conclusion-first page titles + structured multi-paragraph insights (title + body per paragraph) aligned with reference PPT style. """ import copy import os import sys import re as re_module from pathlib import Path from datetime import datetime, timedelta sys.path.insert(0, str(Path(__file__).parent)) from pptx import Presentation from pptx.util import Emu, Pt from pptx.dml.color import RGBColor from pptx.enum.text import PP_ALIGN from pptx.enum.shapes import MSO_SHAPE from data_loader import load_generic_excel from metrics_calculator import ( calc_generic_metrics, calc_generic_trend, calc_generic_distribution, calc_generic_ranking, generate_generic_insights, ) from chart_factory import ( add_column_chart, add_bar_chart, add_line_chart, add_doughnut_chart, add_pie_chart, add_funnel_chart, add_horizontal_bar_chart, add_grouped_bar_chart, add_table ) from page_layouts import ( get_kpi_grid, get_chart_left_zone, get_insight_right_zone, get_full_width_zone, get_two_column_zones, LayoutContext, ) from template_parser import ( parse_template, get_builtin_template_profile, PLACEHOLDER_ALIASES, _matches_any_placeholder, ) from quality_inspector import QualityInspector from theme_manager import theme_to_rgb_colors, get_theme from report_config import ( ReportConfig, PageDef, MetricDef, PeriodType, ChartType, validate_six_confirmations, ) from quality_rules import SLIDE_WIDTH, SLIDE_HEIGHT, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP # Colors — aligned with reference design theme YAML C_PRIMARY = RGBColor(0x1E, 0x3A, 0x5F) C_ACCENT = RGBColor(0x10, 0xB9, 0x81) C_ACCENT_NEG = RGBColor(0xEF, 0x44, 0x44) C_SECONDARY = RGBColor(0x64, 0x74, 0x8B) C_DARK = RGBColor(0x1F, 0x3A, 0x5C) C_WHITE = RGBColor(0xFF, 0xFF, 0xFF) C_GRAY_BG = RGBColor(0xF2, 0xF2, 0xF2) C_TEXT = RGBColor(0x33, 0x33, 0x33) C_TEXT_GRAY = RGBColor(0x66, 0x66, 0x66) C_LINE = RGBColor(0xD9, 0xD9, 0xD9) C_CARD_BG = RGBColor(0xE7, 0xF0, 0xF7) C_GREEN = RGBColor(0x10, 0xB9, 0x81) C_RED = RGBColor(0xEF, 0x44, 0x44) C_ORANGE = RGBColor(0xED, 0x7D, 0x31) # ============================================================================== # MASTER / SLIDE HELPERS # ============================================================================== def get_master_template(report_type: str) -> str: """Route report type to corresponding master template.""" base = os.path.join(os.path.dirname(__file__), '..', 'assets') template_map = { 'daily': os.path.join(base, 'report-master.pptx'), 'weekly': os.path.join(base, 'weekly-master.pptx'), 'monthly': os.path.join(base, 'monthly-master.pptx'), } path = template_map.get(report_type, template_map['daily']) if os.path.exists(path): return os.path.abspath(path) # Fallbacks for fallback in [template_map['daily']]: if os.path.exists(fallback): return os.path.abspath(fallback) raise FileNotFoundError(f"Master template not found for {report_type}") def _resolve_master_template(config: ReportConfig) -> str: if getattr(config, 'template_path', ''): return os.path.abspath(config.template_path) period_type = getattr(config, 'period_type', None) report_type = getattr(period_type, 'value', period_type) or 'daily' return get_master_template(report_type) def _resolve_template_profile(config: ReportConfig): """Resolve TemplateProfile from config (cached or parse on demand).""" if getattr(config, 'template_profile', None): return config.template_profile if getattr(config, 'template_path', ''): return parse_template(config.template_path) period_type = getattr(config, 'period_type', None) report_type = getattr(period_type, 'value', period_type) or 'daily' return get_builtin_template_profile(report_type) def _resolve_colors(config: ReportConfig, profile) -> dict: """Three-tier color resolution: user theme > template theme > defaults.""" # If user explicitly configured a theme and opted out of template theme if config.theme and not getattr(config, 'use_template_theme', True): return theme_to_rgb_colors(config.theme) # Try template-extracted theme from theme_manager import extract_theme_from_template, ThemeConfig template_theme = extract_theme_from_template(profile) if template_theme: return theme_to_rgb_colors(template_theme) # Fallback to user theme or default if config.theme: return theme_to_rgb_colors(config.theme) # Ultimate fallback: hard-coded defaults packaged as a theme return theme_to_rgb_colors(ThemeConfig()) def _resolve_fonts(config: ReportConfig, profile) -> dict: """Three-tier font resolution: user config > template fonts > defaults.""" result = { 'title_font': '微软雅黑', 'body_font': '微软雅黑', 'number_font': 'Arial', } # Template fonts detected = getattr(profile, 'detected_fonts', {}) if detected.get('title_font'): result['title_font'] = detected['title_font'] if detected.get('body_font'): result['body_font'] = detected['body_font'] if detected.get('number_font'): result['number_font'] = detected['number_font'] # User override via theme config if config.theme: if getattr(config.theme, 'title_font', ''): result['title_font'] = config.theme.title_font if getattr(config.theme, 'body_font', ''): result['body_font'] = config.theme.body_font if getattr(config.theme, 'number_font', ''): result['number_font'] = config.theme.number_font return result def _duplicate_master_slide(prs, profile, page_type: str, keep_shapes: bool = False): """Duplicate the appropriate master slide for the given page_type. keep_shapes=True: keep layout-inherited placeholders (cover/toc/end pages). keep_shapes=False: remove layout placeholders and copy from source (content pages). """ idx = profile.get_master_index_for(page_type) if 0 <= idx < len(prs.slides): source = prs.slides[idx] else: source = prs.slides[0] return _duplicate_slide(prs, source, keep_shapes=keep_shapes) def _is_forecast_page_type(page_type: str) -> bool: normalized = str(page_type or '').lower() return normalized in { 'forecast', 'prediction', 'plan', 'monthly_forecast', 'monthly_plan', 'next_month_plan', 'custom_forecast', 'custom_prediction', } def _detect_content_top(slide) -> int: """Detect content start Y from a content slide template by reading {page_title} position.""" page_title_bottom = Emu(1422400) # daily default for shape in slide.shapes: if shape.has_text_frame and '{page_title}' in shape.text_frame.text: page_title_bottom = shape.top + shape.height break # Gap: generous spacing between page title and content to avoid crowding gap = Emu(381000) return int(page_title_bottom) + int(gap) def _delete_template_slides(prs, count=None): """Delete original template slides from the presentation. count: number of original template slides to remove from the beginning. If None, auto-detect using a heuristic that looks for unreplaced placeholders. """ if count is None: # Auto-detect: count leading slides that contain unreplaced placeholders # or have only template-specific content patterns. count = 0 for slide in prs.slides: has_unreplaced_placeholder = False has_real_content = False for shape in slide.shapes: if shape.has_text_frame: text = shape.text_frame.text.strip() if text: if '{' in text and '}' in text: has_unreplaced_placeholder = True else: # Text like copyright, footer, etc. on template slides # is not "real content" in the report sense pass # If slide has unreplaced placeholders, it's an original template slide if has_unreplaced_placeholder: count += 1 else: # Also check if slide is completely empty (some template slides # may have no placeholders at all) if len(slide.shapes) == 0: count += 1 else: break # Ensure we don't delete all slides actual_count = min(count, len(prs.slides) - 1) if len(prs.slides) > 1 else 0 for _ in range(actual_count): if len(prs.slides) == 0: break rId = prs.slides._sldIdLst[0].rId prs.part.drop_rel(rId) del prs.slides._sldIdLst[0] def copy_layout_decorative_shapes(slide, layout): """Copy non-placeholder decorative shapes from a layout to a slide. python-pptx's add_slide(layout) does NOT copy layout-level decorative shapes (gradient rectangles, logos, decorative lines) to the slide's spTree. PowerPoint renders them from the layout reference, but this is unreliable across PowerPoint versions. This function deep-copies all elements from the layout's spTree that do NOT contain a (placeholder) element into the slide's spTree. Args: slide: The slide to add shapes to (from prs.slides.add_slide(layout)). layout: The SlideLayout whose decorative shapes should be copied. Returns: int: Number of shapes copied. """ from copy import deepcopy from pptx.oxml.ns import qn layout_spTree = layout._element.find(qn('p:cSld')).find(qn('p:spTree')) slide_spTree = slide._element.find(qn('p:cSld')).find(qn('p:spTree')) count = 0 for child in list(layout_spTree): tag = child.tag.split('}')[-1] if '}' in child.tag else child.tag if tag == 'sp': # Check if this shape is a placeholder (has element) ph = child.find('.//' + qn('p:ph')) if ph is None: new_shape = deepcopy(child) slide_spTree.append(new_shape) count += 1 return count def _duplicate_slide(prs, source_slide, keep_shapes: bool = False): # Use the SOURCE slide's own layout to preserve: # - layout-level background (gradient, color, image) # - layout-level shapes (company logo, decorative icons) # - theme colors, fonts # Previously used blank_layout which stripped all of the above. source_layout = source_slide.slide_layout new_slide = prs.slides.add_slide(source_layout) if not keep_shapes: # Remove layout-default shapes (placeholders) from the new slide — # they'll be replaced by shapes deep-copied from the source slide. # Layout-level decorative shapes (logos, backgrounds) are NOT in # slide.shapes and remain intact via layout inheritance. for shape in list(new_slide.shapes): sp = shape._element sp.getparent().remove(sp) # Copy slide-level background override if present (rare, but safe) try: src_cSld = source_slide._element.cSld new_cSld = new_slide._element.cSld if src_cSld.bg is not None: new_bg = copy.deepcopy(src_cSld.bg) if new_cSld.bg is not None: new_cSld.remove(new_cSld.bg) new_cSld.insert(0, new_bg) except Exception: pass if not keep_shapes: for shape in source_slide.shapes: el = shape.element new_el = copy.deepcopy(el) new_slide.shapes._spTree.insert_element_before(new_el, 'p:extLst') return new_slide def _replace_placeholder(slide, placeholder, new_text, fonts: dict = None): fonts = fonts or {} body_font = fonts.get('body_font', '微软雅黑') replacement = ( _format_kpi_value_for_placeholder(new_text) if re_module.fullmatch(r'\{kpi\d+_value\}', placeholder) else str(new_text) ) # Gather aliases for this placeholder aliases = PLACEHOLDER_ALIASES.get(placeholder, []) targets = [placeholder] + [a for a in aliases if a != placeholder] for shape in slide.shapes: if not shape.has_text_frame: continue for para in shape.text_frame.paragraphs: for target in targets: if target in para.text: para.text = para.text.replace(target, replacement) for run in para.runs: run.font.name = body_font break # only replace once per paragraph def _replace_all_placeholders(slide, mapping: dict, fonts: dict = None): for placeholder, new_text in mapping.items(): _replace_placeholder(slide, placeholder, new_text, fonts) def _remove_shape(shape): """Remove a python-pptx shape from its parent tree.""" el = shape.element el.getparent().remove(el) def _remove_slide(prs, slide): """Remove a slide from a presentation by its rId.""" try: for i, s in enumerate(prs.slides): if s == slide: rId = prs.slides._sldIdLst[i].rId prs.part.drop_rel(rId) del prs.slides._sldIdLst[i] return True except Exception: pass return False def _safe_auto_shape_type(shape): try: return shape.auto_shape_type except (AttributeError, ValueError): return None def _remove_empty_cover_kpi_placeholders(slide): """ Remove template KPI cards when generic cover data does not provide values. This prevents empty rounded rectangles from staying on the cover. """ kpi_pattern = re_module.compile(r'\{kpi\d+_(label|value)\}') placeholder_shapes = [ shape for shape in slide.shapes if shape.has_text_frame and kpi_pattern.search(shape.text_frame.text or '') ] if not placeholder_shapes: return x_min = min(int(shape.left) for shape in placeholder_shapes) x_max = max(int(shape.left) + int(shape.width) for shape in placeholder_shapes) y_min = min(int(shape.top) for shape in placeholder_shapes) y_max = max(int(shape.top) + int(shape.height) for shape in placeholder_shapes) pad = Emu(220000) to_remove = [] for shape in slide.shapes: sx = int(shape.left) sy = int(shape.top) sw = int(shape.width) sh = int(shape.height) in_region = ( sx >= x_min - pad and sx + sw <= x_max + pad and sy >= y_min - pad and sy + sh <= y_max + pad ) is_text_placeholder = shape in placeholder_shapes is_empty_kpi_card = ( in_region and _safe_auto_shape_type(shape) == MSO_SHAPE.ROUNDED_RECTANGLE ) if is_text_placeholder or is_empty_kpi_card: to_remove.append(shape) for shape in to_remove: _remove_shape(shape) # ============================================================================== # NAVIGATION TABS # ============================================================================== def _add_nav_tabs(slide, tabs, active_index=0, slide_width=None, fonts=None, colors=None, tab_y=Emu(254000), tab_h=Emu(762000), underline_h=Emu(127000)): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66)) if slide_width is None: slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx') slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000) n = len(tabs) tab_w = Emu(int(slide_width) // n) for i, label in enumerate(tabs): x = Emu(i * int(tab_w)) box = slide.shapes.add_textbox(x, tab_y, tab_w, tab_h) p = box.text_frame.paragraphs[0] p.text = label p.font.size = Pt(11) p.font.name = '微软雅黑' p.font.color.rgb = C_PRIMARY if i == active_index else C_TEXT_GRAY p.alignment = PP_ALIGN.CENTER if i == active_index: line = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, Emu(457200), tab_w, underline_h) line.fill.solid() line.fill.fore_color.rgb = C_PRIMARY line.line.fill.background() # ============================================================================== # KPI CARDS # ============================================================================== def _add_kpi_cards(slide, kpis, start_x=Emu(762000), start_y=Emu(1651000), fonts=None, colors=None): fonts = fonts or {} body_font = fonts.get("body_font", "微软雅黑") number_font = fonts.get("number_font", "Arial") colors = colors or {} C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7)) C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66)) C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) positions = [ (start_x, start_y), (Emu(5778500), start_y), (Emu(10795000), start_y), (start_x, Emu(start_y + 3429000)), (Emu(5778500), Emu(start_y + 3429000)), (Emu(10795000), Emu(start_y + 3429000)), ] for i, kpi in enumerate(kpis[:6]): if i >= len(positions): break x, y = positions[i] w, h = Emu(4699000), Emu(3048000) card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, x, y, w, h) card.fill.solid() card.fill.fore_color.rgb = C_CARD_BG card.line.fill.background() # Label lbl = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 228600), Emu(2540000), Emu(406400)) p = lbl.text_frame.paragraphs[0] p.text = kpi.get('label', '') p.font.size = Pt(14) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' # Value val = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 762000), Emu(2540000), Emu(698500)) p = val.text_frame.paragraphs[0] p.text = str(kpi.get('value', '')) p.font.size = Pt(36) p.font.bold = True p.font.color.rgb = C_PRIMARY p.font.name = 'Arial' # Unit unit = kpi.get('unit', '') if unit: ubox = slide.shapes.add_textbox(Emu(x + 3048000), Emu(y + 1016000), Emu(508000), Emu(381000)) p = ubox.text_frame.paragraphs[0] p.text = unit p.font.size = Pt(14) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' # Change badge chg = kpi.get('change', '') if chg: cbox = slide.shapes.add_textbox(Emu(x + 508000), Emu(y + 1778000), Emu(4064000), Emu(304800)) p = cbox.text_frame.paragraphs[0] p.text = chg p.font.size = Pt(12) chg_str = str(chg) is_positive = chg_str.startswith('+') or any(k in chg_str for k in ['↑', '提升', '增长', '上调', '增加', '大幅', '好', '突破', '达成', '优化']) is_negative = chg_str.startswith('-') or any(k in chg_str for k in ['↓', '下滑', '下降', '减少', '回落', '滞后', '堆积', '阻塞', '缺口', '延迟']) if is_negative: p.font.color.rgb = C_RED elif is_positive: p.font.color.rgb = C_GREEN else: p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' # Sub note with semantic background color tag (e.g. "日均51笔") sub = kpi.get('sub', '') if sub: sub_text = _truncate_text(sub, 20) tag_color = _sentiment_color(sub_text) tag_x = Emu(x + 508000) tag_y = Emu(y + 2159000) tag_w = Emu(min(len(sub_text) * 220000 + 400000, 3600000)) tag_h = Emu(304800) if tag_color: tag_bg = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, tag_x, tag_y, tag_w, tag_h) tag_bg.fill.solid() tag_bg.fill.fore_color.rgb = tag_color tag_bg.line.fill.background() sbox = slide.shapes.add_textbox(tag_x, tag_y, tag_w, tag_h) p = sbox.text_frame.paragraphs[0] p.text = sub_text p.font.size = Pt(11) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' p.alignment = PP_ALIGN.CENTER def _add_compact_kpi_cards(slide, kpis, start_x=Emu(CONTENT_LEFT), start_y=Emu(1651000), fonts=None, colors=None, max_cols=3, card_h=Emu(1780000), gap_x=Emu(254000), gap_y=Emu(254000)): colors = colors or {} C_CARD_BG = colors.get('card_bg', RGBColor(0xE7, 0xF0, 0xF7)) C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66)) C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) """Draw compact KPI cards so generic overview pages preserve room for insight text.""" if not kpis: return 0 content_w = SLIDE_WIDTH - 2 * CONTENT_LEFT cols = min(max_cols, max(1, len(kpis))) card_w = int((content_w - (cols - 1) * int(gap_x)) / cols) rows = (len(kpis) + cols - 1) // cols for i, kpi in enumerate(kpis): row = i // cols col = i % cols x = int(start_x) + col * (card_w + int(gap_x)) y = int(start_y) + row * (int(card_h) + int(gap_y)) card = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, Emu(x), Emu(y), Emu(card_w), card_h) card.fill.solid() card.fill.fore_color.rgb = C_CARD_BG card.line.fill.background() label = _truncate_text(kpi.get('label', ''), 14) lbl = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 180000), Emu(card_w - 560000), Emu(330000)) p = lbl.text_frame.paragraphs[0] p.text = label p.font.size = Pt(11) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' value = _truncate_text(str(kpi.get('value', '')), 16) val = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 570000), Emu(card_w - 1000000), Emu(560000)) p = val.text_frame.paragraphs[0] p.text = value p.font.size = Pt(24 if len(value) <= 10 else 20) p.font.bold = True p.font.color.rgb = C_PRIMARY p.font.name = 'Arial' unit = kpi.get('unit', '') if unit: ubox = slide.shapes.add_textbox(Emu(x + card_w - 820000), Emu(y + 710000), Emu(540000), Emu(330000)) p = ubox.text_frame.paragraphs[0] p.text = _truncate_text(str(unit), 4) p.font.size = Pt(10) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' sub_text = kpi.get('sub') or kpi.get('change') or '核心指标' sub = slide.shapes.add_textbox(Emu(x + 280000), Emu(y + 1230000), Emu(card_w - 560000), Emu(330000)) p = sub.text_frame.paragraphs[0] p.text = _truncate_text(str(sub_text), 24) p.font.size = Pt(9) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' return int(start_y) + rows * int(card_h) + (rows - 1) * int(gap_y) # ============================================================================== # TEXT BLOCKS # ============================================================================== def _add_text_block(slide, title, body, left, top, width, height, fonts=None, colors=None, title_size=Pt(14), body_size=Pt(11), line_space=Pt(6)): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) """Single text box with title + body.""" box = slide.shapes.add_textbox(left, top, width, height) tf = box.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = title p.font.size = title_size p.font.bold = True p.font.color.rgb = C_PRIMARY if title else C_TEXT p.font.name = '微软雅黑' if body: p2 = tf.add_paragraph() p2.text = body p2.font.size = body_size p2.font.color.rgb = C_TEXT p2.font.name = '微软雅黑' p2.space_before = line_space p2.line_spacing = 1.3 def _estimate_text_height(items, title_size_pt, body_size_pt, width_emu, line_spacing=1.15, title_extra=1.3): """Estimate rendered text height in EMU for adaptive font sizing.""" width_pt = width_emu / 12700.0 chars_per_line_body = max(10, int(width_pt / (body_size_pt * 1.15))) chars_per_line_title = max(10, int(width_pt / (title_size_pt * 1.15))) line_height_body = int(body_size_pt * line_spacing * 12700) line_height_title = int(title_size_pt * title_extra * 12700) total = 0 for item in items: title = item.get('title', '') content = item.get('content', '') title_lines = max(1, (len(title) + chars_per_line_title - 1) // chars_per_line_title) content_lines = max(1, (len(content) + chars_per_line_body - 1) // chars_per_line_body) total += title_lines * line_height_title + content_lines * line_height_body + int(6 * 12700) return total def _add_structured_insight(slide, items, left, top, width, height, fonts=None, colors=None, title_size=Pt(12), body_size=Pt(11), max_items=None, min_body_size=Pt(9)): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) """ High-density structured multi-paragraph insight block. items: list of {'title': str, 'content': str} Features: - No truncation; full content rendered - No max_items limit by default (render all) - Auto-shrink body font to fit within height (down to min_body_size) - Compact line spacing (1.15) to maximize density - Each bullet has emoji + bold title + normal body """ if not items: return # Adaptive font sizing: shrink body_size until it fits target_height = int(height) # title_size/body_size may be EMU integers or Pt objects; normalize to pt _ts = float(title_size) / 12700.0 if float(title_size) > 1000 else float(title_size) _bs = float(body_size) / 12700.0 if float(body_size) > 1000 else float(body_size) _min_bs = float(min_body_size) / 12700.0 if float(min_body_size) > 1000 else float(min_body_size) ts_pt = _ts bs_pt = _bs min_bs_pt = _min_bs # Binary-search-like shrink to fit while bs_pt > min_bs_pt: est = _estimate_text_height(items, ts_pt, bs_pt, int(width)) if est <= target_height: break bs_pt -= 0.5 ts_pt = max(bs_pt + 1, ts_pt - 0.25) box = slide.shapes.add_textbox(left, top, width, height) tf = box.text_frame tf.word_wrap = True first = True for item in items[:max_items] if max_items else items: if not first: spacer = tf.add_paragraph() spacer.text = '' spacer.space_before = Pt(3) title = item.get('title', '') emoji = _emoji_for_item(title) # Avoid double emoji if emoji and title.startswith(emoji): emoji = '' title_text = f'{emoji} {title}' if emoji else title p = tf.paragraphs[0] if first else tf.add_paragraph() p.text = title_text p.font.size = Pt(ts_pt) p.font.bold = True p.font.color.rgb = C_PRIMARY p.font.name = '微软雅黑' p.line_spacing = 1.15 first = False content = item.get('content', '') if content: p2 = tf.add_paragraph() p2.text = content p2.font.size = Pt(bs_pt) p2.font.color.rgb = C_TEXT p2.font.name = '微软雅黑' p2.line_spacing = 1.15 p2.space_before = Pt(1) def _ensure_min_insight_items(items, profile=None, metrics=None, min_count=2, context_label='本页'): """Guarantee enough long-form insight blocks for quality self-check.""" cleaned = [] for item in items or []: title = str(item.get('title', '')).strip() content = str(item.get('content', '')).strip() if title or content: cleaned.append({'title': title or '分析说明', 'content': content}) profile = profile or {} metrics = metrics or {} total_rows = profile.get('total_rows', 0) numeric_count = len(profile.get('numeric_columns', []) or []) category_count = len(profile.get('category_columns', []) or []) fallback_pool = [ { 'title': f'{context_label}数据基础', 'content': f'本页基于当前数据画像进行归纳,覆盖 {total_rows or "若干"} 条记录、' f'{numeric_count} 个数值指标和 {category_count} 个分类维度。' f'当原始数据字段较少或业务指标尚未形成充分拆解时,报告优先呈现已经确认的核心指标,' f'并将可验证的数据范围、维度覆盖和后续分析口径写入页面,避免出现空白页或模板占位内容。', }, { 'title': f'{context_label}行动建议', 'content': f'建议围绕已确认的核心指标建立持续跟踪机制:先核对指标口径与数据字段映射,' f'再按时间、区域、部门或客户等维度拆解异常变化,最后将发现转化为责任人、截止时间和复盘频率明确的行动项。' f'如果后续补充历史同期或目标值数据,可进一步增加同比、环比和达成率判断。', }, { 'title': f'{context_label}风险提示', 'content': f'若数据源存在缺失值、合并表头、人工备注列或统计口径变化,自动生成的结论需要结合业务确认进行复核。' f'建议在报告发布前重点检查核心指标是否全部出现、图表数值是否与原表一致、长文本是否仍在页面安全区域内,' f'以保证美观度和决策可信度同时达标。', }, ] used_titles = {item['title'] for item in cleaned} for fallback in fallback_pool: if len(cleaned) >= min_count: break if fallback['title'] not in used_titles: cleaned.append(fallback) used_titles.add(fallback['title']) return cleaned # ============================================================================== # ALERT / ACTION / ISSUE / GOAL CARDS # ============================================================================== def _add_alert_cards(slide, alerts, start_y=Emu(1651000), fonts=None, colors=None): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44)) C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31)) C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) colors = {'严重': C_RED, '警告': C_ORANGE, '关注': C_PRIMARY, '中度': C_ORANGE, '一般': C_SECONDARY} positions = [Emu(762000), Emu(5778500), Emu(10795000)] for i, alert in enumerate(alerts[:3]): x = positions[i] y = start_y lvl = alert.get('level', '关注') c = colors.get(lvl, C_PRIMARY) bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(2286000)) bar.fill.solid() bar.fill.fore_color.rgb = c bar.line.fill.background() tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(4064000), Emu(406400)) p = tbox.text_frame.paragraphs[0] p.text = alert.get('title', '') p.font.size = Pt(15) p.font.bold = True p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 762000), Emu(4064000), Emu(1270000)) tf = dbox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = alert.get('detail', '') p.font.size = Pt(11) p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' def _add_action_cards(slide, actions, start_y=Emu(2540000), fonts=None, colors=None): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) positions = [Emu(762000), Emu(5778500), Emu(10795000)] for i, act in enumerate(actions[:3]): x = positions[i] y = start_y bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(406400)) bar.fill.solid() bar.fill.fore_color.rgb = C_PRIMARY bar.line.fill.background() tbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 952500), Emu(4064000), Emu(406400)) p = tbox.text_frame.paragraphs[0] p.text = act.get('title', '') p.font.size = Pt(17) p.font.bold = True p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1524000), Emu(4064000), Emu(3429000)) tf = dbox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = act.get('detail', '') p.font.size = Pt(11) p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' p.line_spacing = 1.3 def _add_issue_cards(slide, issues, start_y=Emu(1524000), fonts=None, colors=None): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_RED = colors.get('red', RGBColor(0xEF, 0x44, 0x44)) C_ORANGE = colors.get('orange', RGBColor(0xED, 0x7D, 0x31)) C_SECONDARY = colors.get('secondary', RGBColor(0x64, 0x74, 0x8B)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) colors = {'严重': C_RED, '中度': C_ORANGE, '轻度': C_PRIMARY, '一般': C_SECONDARY} for i, issue in enumerate(issues[:3]): x = Emu(762000) y = Emu(int(start_y) + i * (1778000 + 254000)) sev = issue.get('severity', '中度') c = colors.get(sev, C_ORANGE) bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, x, y, Emu(50800), Emu(1778000)) bar.fill.solid() bar.fill.fore_color.rgb = c bar.line.fill.background() sbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 228600), Emu(660400), Emu(304800)) p = sbox.text_frame.paragraphs[0] p.text = sev p.font.size = Pt(11) p.font.bold = True p.font.color.rgb = c p.font.name = '微软雅黑' tbox = slide.shapes.add_textbox(Emu(x + 1778000), Emu(y + 228600), Emu(13462000), Emu(355600)) p = tbox.text_frame.paragraphs[0] p.text = issue.get('title', '') p.font.size = Pt(13) p.font.bold = True p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' dbox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 698500), Emu(14224000), Emu(355600)) p = dbox.text_frame.paragraphs[0] p.text = issue.get('detail', '') p.font.size = Pt(11) p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' abox = slide.shapes.add_textbox(Emu(x + 101600), Emu(y + 1193800), Emu(14224000), Emu(609600)) tf = abox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = f"建议措施:{issue.get('action', '')}" p.font.size = Pt(11) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' def _add_goal_cards(slide, goals, start_y=Emu(1524000), fonts=None, colors=None): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) C_TEXT_GRAY = colors.get('text_gray', RGBColor(0x66, 0x66, 0x66)) sy = int(start_y) positions = [ (Emu(762000), Emu(sy)), (Emu(8318500), Emu(sy)), (Emu(762000), Emu(sy + 1879600)), (Emu(8318500), Emu(sy + 1879600)), ] icon_chars = ['🎯', '💰', '🚀', '⚡'] for i, goal in enumerate(goals[:4]): x, y = positions[i] gid = goal.get('id', f'G{i+1}') gbox = slide.shapes.add_textbox(x, Emu(y + 101600), Emu(635000), Emu(355600)) p = gbox.text_frame.paragraphs[0] p.text = f"{icon_chars[i % len(icon_chars)]} {gid}" p.font.size = Pt(16) p.font.bold = True p.font.color.rgb = C_PRIMARY p.font.name = 'Arial' tbox = slide.shapes.add_textbox(Emu(x + 863600), Emu(y + 101600), Emu(6096000), Emu(355600)) p = tbox.text_frame.paragraphs[0] p.text = goal.get('title', '') p.font.size = Pt(14) p.font.bold = True p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' dbox = slide.shapes.add_textbox(Emu(x + 228600), Emu(y + 571500), Emu(6731000), Emu(863600)) tf = dbox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = goal.get('detail', '') p.font.size = Pt(11) p.font.color.rgb = C_TEXT_GRAY p.font.name = '微软雅黑' p.line_spacing = 1.3 def _add_summary_text(slide, text, left=Emu(1016000), top=Emu(5435600), width=Emu(14224000), height=Emu(1270000), fonts=None, colors=None): colors = colors or {} C_TEXT = colors.get('text', RGBColor(0x33, 0x33, 0x33)) box = slide.shapes.add_textbox(left, top, width, height) tf = box.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = text p.font.size = Pt(12) p.font.color.rgb = C_TEXT p.font.name = '微软雅黑' p.line_spacing = 1.3 # ============================================================================== # TEXT / LAYOUT HELPERS # ============================================================================== def _truncate_text(text, max_chars=60): """Truncate text to max_chars, appending '...' if truncated.""" if not text: return text if len(text) > max_chars: return text[:max_chars - 1] + '...' return text def _format_kpi_value_for_placeholder(value, max_chars=16): """ KPI value placeholders are fixed-size number slots. If upstream passes a category list, compact it to a count instead of letting it overflow. """ if value is None: return '' text = str(value).strip() if len(text) <= max_chars: return text list_text = text.strip().strip('[]()(){}') tokens = [ token.strip().strip("'\"“”‘’") for token in re_module.split(r'[、,,;;\n/]+', list_text) ] tokens = [token for token in tokens if token] if len(tokens) >= 3: return f'{len(tokens)}项' return _truncate_text(text, max_chars) def _sentiment_color(text): """Return a light background color based on text sentiment.""" if not text: return None text = str(text) positive_words = ['提升', '增长', '上调', '增加', '高', '好', '大幅', '冲刺', '领跑', '上升', '扩大', '优化', '改善', '突破', '达成'] negative_words = ['下滑', '下降', '减少', '低', '差', '回落', '下滑', '滞后', '堆积', '阻塞', '缺口', '延迟', '超期', '逾期', '风险', '警告'] pos_score = sum(1 for w in positive_words if w in text) neg_score = sum(1 for w in negative_words if w in text) if neg_score > pos_score: return RGBColor(0xFE, 0xE2, 0xE2) # light red ~ #EF444420 if pos_score > neg_score: return RGBColor(0xD1, 0xFA, 0xE5) # light green ~ #10B98120 return None import re def _emoji_for_item(title): """Return an emoji prefix based on title keywords.""" if not title: return '📈' title = str(title) # Skip if title already starts with an emoji if re.match(r'^[\U0001F300-\U0001F9FF\u2600-\u26FF\u2700-\u27BF]', title): return '' if any(k in title for k in ['风险', '警告', '关注', '下滑', '下降', '延迟', '超期', '缺口', '阻塞']): return '⚠️' if any(k in title for k in ['建议', '措施', '行动', '协调', '对接']): return '💡' if any(k in title for k in ['目标', '计划', '冲刺', '展望', '聚焦']): return '🎯' if any(k in title for k in ['增长', '上升', '提升', '峰值', '领跑', '突破', '活跃', '好转']): return '📈' return '💡' def _add_footer_if_missing(slide, footer_text, slide_width=None, fonts=None, colors=None): colors = colors or {} C_PRIMARY = colors.get('primary', RGBColor(0x1E, 0x3A, 0x5F)) C_WHITE = colors.get('white', RGBColor(0xFF, 0xFF, 0xFF)) if slide_width is None: slide_width = slide.shapes._spTree.getparent().getparent().attrib.get('cx') slide_width = Emu(int(slide_width)) if slide_width else Emu(16256000) # Check if footer already exists has_footer = False for shape in slide.shapes: if shape.has_text_frame and '数据来源' in shape.text_frame.text: has_footer = True break if has_footer: return bar = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, Emu(8824000), slide_width, Emu(320000)) bar.fill.solid() bar.fill.fore_color.rgb = C_PRIMARY bar.line.fill.background() box = slide.shapes.add_textbox(Emu(762000), Emu(8824000), Emu(14000000), Emu(320000)) p = box.text_frame.paragraphs[0] p.text = footer_text p.font.size = Pt(10) p.font.color.rgb = C_WHITE p.font.name = '微软雅黑' def _ensure_word_wrap_all(slide, fonts: dict = None): """Enable word_wrap on all text frames in a slide.""" fonts = fonts or {} body_font = fonts.get('body_font', '微软雅黑') for shape in slide.shapes: if shape.has_text_frame: shape.text_frame.word_wrap = True for para in shape.text_frame.paragraphs: for run in para.runs: run.font.name = body_font # ============================================================================== # MATH HELPERS # ============================================================================== def _pct_val(curr, prev): if prev and prev != 0: return (curr - prev) / prev * 100 return None def _format_pct(pct, with_sign=True, suffix='%', zero_suffix=''): """Safely format a percentage value. Returns '—' if pct is None.""" if pct is None: return '—' sign = '+' if with_sign and pct >= 0 else '' return f"{sign}{pct:.1f}{suffix}{zero_suffix}" def _pct_str(curr, prev): if prev and prev != 0: pct = round((curr - prev) / prev * 100, 1) sign = '+' if pct >= 0 else '' return f"{sign}{pct}% vs 上期" return "—" def _safe_div(a, b): return round(a / b, 1) if b else 0 # ============================================================================== # DYNAMIC / UNIVERSAL REPORT BUILDER # ============================================================================== def build_report(data_file: str, config: ReportConfig, output_path: str) -> str: master_path = _resolve_master_template(config) prs = Presentation(master_path) original_slide_count = len(prs.slides) df = load_generic_excel(data_file) if config.require_six_confirmations: confirmation_issues = validate_six_confirmations(config, list(df.columns)) if confirmation_issues: raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues)) data_profile = config.data_profiling or {} # Resolve template profile and dynamic layout context template_profile = _resolve_template_profile(config) ctx = LayoutContext.from_template_profile(template_profile) colors = _resolve_colors(config, template_profile) fonts = _resolve_fonts(config, template_profile) metrics = calc_generic_metrics(df, config) content_top = template_profile.get_content_top('content') total_pages = len([p for p in config.pages if p.selected]) if total_pages == 0: total_pages = len(config.pages) for page_idx, page_def in enumerate(config.pages): if not page_def.selected: continue page_num = page_idx + 1 if page_def.page_type == 'cover': _build_cover_page(prs, config, colors, fonts, template_profile) elif page_def.page_type == 'toc': _build_toc_page(prs, config, colors, fonts, template_profile) elif page_def.page_type == 'kpi_overview': _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df, data_profile, ctx) elif page_def.page_type == 'trend': if not _build_trend_page(prs, config, df, data_profile, colors, fonts, content_top, ctx): _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'distribution': if not _build_distribution_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx): _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'ranking': if not _build_ranking_page(prs, config, df, data_profile, colors, fonts, content_top, page_def, ctx): _build_fallback_analysis_page(prs, config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'summary': _build_summary_page(prs, config, metrics, data_profile, colors, fonts, content_top, page_def, ctx) elif _is_forecast_page_type(page_def.page_type): _build_forecast_page(prs, config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx) elif page_def.page_type == 'end': _build_end_page(prs, config, colors, fonts, template_profile) else: raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})') for slide in prs.slides: _ensure_word_wrap_all(slide, fonts) _delete_template_slides(prs, original_slide_count) prs.save(output_path) print(f"Report saved: {output_path}") return output_path def quality_assured_build(data_file: str, config: ReportConfig, output_path: str) -> tuple: if config.require_six_confirmations: df = load_generic_excel(data_file) confirmation_issues = validate_six_confirmations(config, list(df.columns)) if confirmation_issues: raise ValueError('生成前六项确认未通过:\n- ' + '\n- '.join(confirmation_issues)) template_profile = _resolve_template_profile(config) ctx = LayoutContext.from_template_profile(template_profile) colors = _resolve_colors(config, template_profile) inspector = QualityInspector(colors, ctx) return inspector.quality_assured_build( build_fn=lambda d, c: _build_without_save(d, c, config), data=data_file, config=config, output_path=output_path, ) def _build_without_save(data_file, temp_config, original_config): from pptx import Presentation as Prs prs = Prs(_resolve_master_template(original_config)) original_slide_count = len(prs.slides) df = load_generic_excel(data_file) data_profile = original_config.data_profiling or {} template_profile = _resolve_template_profile(original_config) ctx = LayoutContext.from_template_profile(template_profile) colors = _resolve_colors(original_config, template_profile) fonts = _resolve_fonts(original_config, template_profile) metrics = calc_generic_metrics(df, original_config) content_top = template_profile.get_content_top('content') for page_def in original_config.pages: if not page_def.selected: continue if page_def.page_type == 'cover': _build_cover_page(prs, original_config, colors, fonts, template_profile) elif page_def.page_type == 'kpi_overview': _build_kpi_overview_page(prs, original_config, metrics, colors, fonts, content_top, df, data_profile, ctx) elif page_def.page_type == 'trend': if not _build_trend_page(prs, original_config, df, data_profile, colors, fonts, content_top, ctx): _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'distribution': if not _build_distribution_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx): _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'ranking': if not _build_ranking_page(prs, original_config, df, data_profile, colors, fonts, content_top, page_def, ctx): _build_fallback_analysis_page(prs, original_config, page_def, df, data_profile, metrics, colors, fonts, content_top, ctx) elif page_def.page_type == 'summary': _build_summary_page(prs, original_config, metrics, data_profile, colors, fonts, content_top, page_def, ctx) elif _is_forecast_page_type(page_def.page_type): _build_forecast_page(prs, original_config, df, data_profile, metrics, colors, fonts, content_top, page_def, ctx) elif page_def.page_type == 'end': _build_end_page(prs, original_config, colors, fonts, template_profile) elif page_def.page_type == 'toc': _build_toc_page(prs, original_config, colors, fonts, template_profile) else: raise ValueError(f'不支持的页面类型: {page_def.page_type}(页面: {page_def.title})') for slide in prs.slides: _ensure_word_wrap_all(slide, fonts) _delete_template_slides(prs, original_slide_count) return prs def _build_cover_page(prs, config, colors, fonts, template_profile): """Build cover page from template. Two-pass strategy: 1. Pattern-based: _replace_all_placeholders() for templates with {report_title}/{date}/{department} text markers in placeholders. 2. Idx-based fallback: for templates where placeholders are empty or have template-default text (e.g. Wuling's 封面半版), fill by placeholder_format.idx directly. IMPORTANT — text color pitfall: Many template covers have a decorative gradient/banner that covers only the top portion of the slide. The TITLE placeholder may be positioned BELOW the colored area (on white background). Using white/light text on a white background makes it invisible. → Always use dark text (C_PRIMARY) in the idx fallback to avoid this. """ slide = _duplicate_master_slide(prs, template_profile, 'cover', keep_shapes=True) # ---- Pass 1: pattern-based replacement ---- _replace_all_placeholders(slide, { '{report_title}': config.title, '{report_type}': '数据报告', '{date}': config.period_str or config.date_range[0].strftime('%Y年%m月%d日'), '{department}': config.source_label, '{period}': config.period_str, '{gen_time}': datetime.now().strftime('%Y-%m-%d %H:%M'), }, fonts) _remove_empty_cover_kpi_placeholders(slide) # ---- Pass 2: idx-based fallback ---- # If the template has no {report_title} etc. text markers, pass 1 is a # no-op. Detect unfilled placeholders by idx and fill them directly. # Common idx mappings (from OOXML spec + Wuling/real-world templates): # idx=0 → TITLE placeholder → report title # idx=10 → SUBTITLE placeholder → date / subtitle (if idx=21 absent) # idx=21 → BODY quarter-size → date / period string # idx=22 → BODY quarter-size → department / source TEMPLATE_DEFAULT_PATTERNS = { '单击此处编辑母版标题样式', '单击此处添加标题', '单击此处编辑母版文本样式', '单击此处添加文本', '单击此处添加副标题', } _colors = colors or {} _C_PRIMARY = _colors.get('primary', C_PRIMARY) _C_TEXT_GRAY = _colors.get('text_gray', C_TEXT_GRAY) _title_font = (fonts or {}).get('title_font', '微软雅黑') _body_font = (fonts or {}).get('body_font', '微软雅黑') date_text = config.period_str or ( config.date_range[0].strftime('%Y年%m月') if config.date_range else '' ) dept_text = config.source_label or '' filled_title = False filled_date = False for shape in slide.shapes: if not shape.is_placeholder or not shape.has_text_frame: continue ph = shape.placeholder_format tf = shape.text_frame current_text = tf.text.strip() is_unfilled = ( not current_text or current_text in TEMPLATE_DEFAULT_PATTERNS or any(tpl in current_text for tpl in TEMPLATE_DEFAULT_PATTERNS) ) # idx=0 TITLE — report title (highest priority) if ph.idx == 0 and (is_unfilled or not filled_title): p = tf.paragraphs[0] _set_para_text(p, config.title, _C_PRIMARY, Pt(36), bold=True, font_name=_title_font) filled_title = True # idx=10 SUBTITLE — date (only if idx=21 was not filled) elif ph.idx == 10 and (is_unfilled or not filled_date): p = tf.paragraphs[0] _set_para_text(p, date_text, _C_PRIMARY, Pt(18), font_name=_body_font) filled_date = True # idx=21 BODY quarter-size — date/period elif ph.idx == 21 and (is_unfilled or not filled_date): p = tf.paragraphs[0] _set_para_text(p, date_text, _C_PRIMARY, Pt(18), font_name=_body_font) filled_date = True # idx=22 BODY quarter-size — department/source elif ph.idx == 22 and is_unfilled: p = tf.paragraphs[0] _set_para_text(p, dept_text, _C_TEXT_GRAY, Pt(12), font_name=_body_font) total = len([p for p in config.pages if p.selected]) or len(config.pages) _add_footer_if_missing(slide, f'数据来源:{config.source_label} | 1/{total}', slide_width=prs.slide_width, colors=colors) def _set_para_text(para, text, color, size, bold=False, font_name=None): """Set paragraph text + formatting, reusing existing run or creating new one.""" para.text = '' if para.runs: run = para.runs[0] else: run = para.add_run() run.text = text run.font.color.rgb = color run.font.size = size run.font.bold = bold if font_name: run.font.name = font_name def _build_fallback_analysis_page(prs, config, page_def, df, profile, metrics, colors, fonts, content_top, ctx=None): """ Fallback page builder: generates analysis text from available data when the primary page type cannot produce content (e.g. no time columns for trend, no category columns for distribution). Produces at least 4 deep analysis blocks with data citations. """ slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content") page_title = page_def.title if page_def and page_def.title else f'{config.title}数据分析' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) num_cols = profile.get('numeric_columns', []) cat_cols = profile.get('category_columns', []) insight_items = [] if num_cols: top_metric = num_cols[0] top_name = top_metric.get('inferred_label', top_metric['column_name']) top_vals = df[top_metric['column_name']].dropna() if len(top_vals) > 0: mean_val = top_vals.mean() max_val = top_vals.max() min_val = top_vals.min() median_val = top_vals.median() total_val = top_vals.sum() insight_items.append({ 'title': f'{top_name}整体概览', 'content': f'报告周期内,{top_name}统计数据共包含 {len(top_vals)} 条有效记录。' f'总和为 {total_val:,.0f},平均值为 {mean_val:,.2f},中位数为 {median_val:,.2f}。' f'最大值为 {max_val:,.2f},最小值为 {min_val:,.2f}。' f'{"数据波动范围较大,最大值与最小值差距显著,说明不同条目间差异明显,建议深入分析极端值成因" if min_val > 0 and max_val / max(min_val, 1) > 100 else "数据整体分布较为均衡,波动性在合理范围内"}。' f'中位数与平均值的偏差反映了数据的{"右偏分布(少数大值拉高了均值),说明存在显著头部效应" if median_val < mean_val * 0.8 else "左偏分布" if median_val > mean_val * 1.2 else "较为对称,数据呈正态分布趋势"}。', }) insight_items.append({ 'title': f'{top_name}分段分析', 'content': f'对 {top_name} 进行四分段统计:上四分位数(25%数据高于此值)为 {top_vals.quantile(0.75):,.2f},' f'下四分位数(25%数据低于此值)为 {top_vals.quantile(0.25):,.2f},' f'四分位距(IQR)为 {top_vals.quantile(0.75) - top_vals.quantile(0.25):,.2f}。' f'{"IQR较大,数据分布较为离散,不同类别的表现差异明显,需关注尾部类别的提升空间" if (top_vals.quantile(0.75) - top_vals.quantile(0.25)) > abs(mean_val) * 0.5 else "IQR在合理范围内,数据集中度较好"}。' f'建议按四分位将数据分为四组,重点跟踪上四分位组的表现,识别可复制的成功因素。', }) if cat_cols and num_cols: cat = cat_cols[0] cat_name = cat.get('inferred_label', cat['column_name']) num = num_cols[0] num_name = num.get('inferred_label', num['column_name']) cat_unique = df[cat['column_name']].dropna().nunique() insight_items.append({ 'title': f'{cat_name}分类覆盖分析', 'content': f'数据共覆盖 {cat_unique} 个不同的{cat_name},在 {num_name} 维度上呈现差异化分布。' f'不同{cat_name}对整体{num_name}的贡献度各异,建议按贡献度大小将{cat_name}进行分类管理。' f'高贡献类别应重点维护和深度挖掘,中等贡献类别需持续培育和资源投入,' f'低贡献类别可评估其战略价值,适当调整投入节奏。建议建立分类分级管理体系,' f'每月跟踪各类别的变化趋势和占比波动。', }) if len(num_cols) >= 2: num1 = num_cols[0] num2 = num_cols[1] ratio = df[num1['column_name']].sum() / max(df[num2['column_name']].sum(), 1) insight_items.append({ 'title': '关键比率与效率指标', 'content': f'{num1.get("inferred_label", num1["column_name"])}与{num2.get("inferred_label", num2["column_name"])}的比率为 {ratio:.2f},' f'该比率是衡量业务效率的重要参考指标。' f'{"比率处于较高水平,表明单位投入产出效率良好" if ratio > 1 else "比率偏低,单位投入的产出效益有限,存在效率提升空间"}。' f'建议将此比率纳入定期监控指标,按月环比追踪变化趋势,' f'并针对低比率项目制定专项提升计划,分析制约因素和可优化环节。', }) insight_items.append({ 'title': '数据质量与代表性评估', 'content': f'本报告基于共 {len(df)} 条记录进行分析,数据覆盖范围包括上述多个维度。' f'建议在后续周期中持续关注数据完整性和及时性,确保分析结果准确反映业务真实情况。' f'对于数据量较小或集中度较高的维度,应结合业务判断进行解读,避免以偏概全。' f'同时建议补充更多维度的数据(如时间序列数据、竞品对标数据等),' f'以支撑更全面的分析视角和更精准的决策建议。', }) if not insight_items: insight_items = [{ 'title': '数据总览', 'content': f'当前数据集包含 {len(df)} 条记录,{len(df.columns)} 个字段。' f'数值字段 {len(num_cols)} 个,分类字段 {len(cat_cols)} 个。' f'建议结合业务场景规划具体的数据分析维度,' f'以生成更具洞察力和指导意义的数据报告。', }] if num_cols and len(df) > 0: top_col = num_cols[0] chart_zone = get_chart_left_zone(content_top, 0.4, ctx=ctx) text_zone = get_insight_right_zone(content_top, 0.4, ctx=ctx) sample_vals = df[top_col['column_name']].dropna().head(10).tolist() sample_labels = [f'记录{i+1}' for i in range(len(sample_vals))] if sample_vals: add_bar_chart(slide, sample_labels, sample_vals, Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(chart_zone.height), series_name=top_col.get('inferred_label', top_col['column_name']), color=colors.get('primary')) _add_structured_insight(slide, insight_items, Emu(text_zone.x), Emu(text_zone.y), Emu(text_zone.width), Emu(text_zone.height)) else: zone = get_full_width_zone(content_top, ctx=ctx) _add_structured_insight(slide, insight_items, Emu(zone.x), Emu(zone.y), Emu(zone.width), Emu(zone.height)) def _build_toc_page(prs, config, colors, fonts, template_profile): slide = _duplicate_master_slide(prs, template_profile, 'toc', keep_shapes=True) active_pages = [p for p in config.pages if p.selected and p.page_type not in ('cover', 'toc', 'end')] _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': '目录', '{source}': config.source_label, '{period}': f'2/{len(config.pages)}', '{page_num}': '', }, fonts) for i, page in enumerate(active_pages[:6], 1): _replace_placeholder(slide, f'{{chapter{i}_title}}', page.title, fonts) _replace_placeholder(slide, f'{{chapter{i}_desc}}', page.conclusion_title or page.title, fonts) def _build_kpi_overview_page(prs, config, metrics, colors, fonts, content_top, df=None, profile=None, ctx=None): slide = _duplicate_master_slide(prs, _resolve_template_profile(config), 'content') page_title = '核心指标概览' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) kpi_items = [] primary_vals = {} all_vals = {} for md in config.metrics: if md.metric_type.value == 'kpi' and md.selected: val = metrics.get(md.name, 0) display_val = format(val, md.format_spec) if isinstance(val, (int, float)) else str(val) kpi_items.append({ 'label': md.label, 'value': display_val, 'unit': md.unit, 'change': '', 'sub': '', }) if md.is_primary: primary_vals[md.label] = val all_vals[md.label] = val if kpi_items: kpi_count = len(kpi_items) if kpi_count <= 3: _add_kpi_cards(slide, kpi_items, start_y=Emu(content_top)) else: shown_kpis = kpi_items[:9] compact_card_h = Emu(1780000) if len(shown_kpis) <= 6 else Emu(1600000) kpi_bottom = _add_compact_kpi_cards( slide, shown_kpis, start_y=Emu(content_top), card_h=compact_card_h, gap_y=Emu(220000), ) insight_items = [] kpi_names = [m.label for m in config.metrics if m.selected] kpi_str = "、".join(kpi_names[:6]) if kpi_names else "各指标" if len(kpi_names) > 6: kpi_str += f'等{len(kpi_names)}项' primary_kpis = [m for m in config.metrics if m.is_primary and m.selected] if not primary_kpis: primary_kpis = [m for m in config.metrics if m.selected][:3] kpi_detail_parts = [] for i, pk in enumerate(primary_kpis): val = all_vals.get(pk.label, 0) unit_str = pk.unit if pk.unit else '' display_val = format(val, pk.format_spec) if isinstance(val, (int, float)) else str(val) kpi_detail_parts.append(f'{pk.label}: {display_val}{unit_str}') insight_items.append({ 'title': '核心数据概览', 'content': f'本期报告涵盖 {kpi_str} 共 {len(kpi_names)} 项核心指标。' f'{";".join(kpi_detail_parts[:4])}。' f'其中{"、".join(p.label for p in primary_kpis[:3])}为本次分析的重点关注指标。' f'建议将这些指标与历史同期数据进行纵向对比,以及与行业基准进行横向对标,以全面评估当前业务健康度。' f'对于波动较大的指标,需深入追溯其背后的业务动因,判断是否为趋势性变化还是季节性波动。', }) cat_cols = profile.get('category_columns', []) if profile else [] num_cols = profile.get('numeric_columns', []) if profile else [] total_rows = profile.get('total_rows', 0) if profile else 0 if cat_cols: top_cats = [c.get('inferred_label', c.get('column_name', '')) for c in cat_cols[:3]] cat_details = [] for c in cat_cols[:3]: uc = c.get('unique_count', 'N/A') cat_details.append(f'{c.get("inferred_label", c.get("column_name", ""))}({uc}类)') insight_items.append({ 'title': '数据覆盖与维度分析', 'content': f'数据覆盖 {total_rows:,} 条记录,包含 {", ".join(cat_details)} 等多个分析维度。' f'丰富的维度数据支持从 {", ".join(top_cats)} 等角度进行多维度联动分析。' f'建议关注各维度下的数据分布特征,识别高贡献或异常的分类群体,' f'针对性地分析不同维度的表现差异,为精细化运营和数据驱动决策提供支撑。', }) if len(config.metrics) >= 3: compare_items = [] for a, b in zip(primary_kpis[:2], primary_kpis[1:3]): va = all_vals.get(a.label, 0) vb = all_vals.get(b.label, 0) if va and vb: ratio = round(va / vb, 2) if vb else 0 compare_items.append(f'{a.label}与{b.label}的比值为 {ratio}') if compare_items: insight_items.append({ 'title': '指标间关联分析', 'content': f'{";".join(compare_items)}。通过指标间的比值关系可以发现数据的内在规律,' f'比值异常偏离正常区间时需重点关注。建议进一步计算各指标与核心业务目标之间的相关系数,' f'量化不同指标对业务目标的影响力排序,将有限资源聚焦在驱动型指标上。', }) else: insight_items.append({ 'title': '指标间关联分析', 'content': f'本期核心指标包括 {", ".join(p.label for p in primary_kpis[:3])}。' f'建议通过散点图或相关系数分析探索指标间的线性/非线性关系,识别是否存在协同或对冲效应。' f'同时建议按时间序列分析各指标的周期性规律,为资源配置和预测提供依据。', }) insight_items.append({ 'title': '关键发现与行动建议', 'content': f'综合分析 {len(kpi_names)} 项指标,建议重点关注以下方向:' f'(1) 定期监控核心指标的趋势变化,建立异常预警机制,当指标偏离正常区间时及时触发排查流程;' f'(2) 深化多维度交叉分析,挖掘不同群体间的结构差异,识别增长机会和风险点;' f'(3) 结合业务经验和外部数据,验证数据指标的准确性和合理性;' f'(4) 将分析结论转化为可执行的具体行动项,明确责任人和时间节点,建立跟踪闭环机制。', }) if kpi_count > 9: extra_names = '、'.join(k['label'] for k in kpi_items[9:15]) insight_items.append({ 'title': '更多核心指标说明', 'content': f'本页优先展示前 9 个核心指标,其余 {kpi_count - 9} 个指标(如 {extra_names})' f'已纳入综合分析口径。建议在页面结构确认阶段将核心指标按“结果指标、过程指标、风险指标”分组,' f'必要时拆分为多页 KPI 看板,以保证每个指标都有足够的解释空间。', }) if kpi_count <= 3: kpi_grid_bottom = int(content_top) + Emu(3048000) else: kpi_grid_bottom = max(kpi_bottom, int(content_top) + Emu(1780000)) insight_zone_y = kpi_grid_bottom + Emu(254000) remaining_height = int(FOOTER_TOP - insight_zone_y - Emu(140000)) if remaining_height >= Emu(950000): if kpi_count <= 3: compact_items = insight_items[:3] else: compact_items = insight_items[:3] if kpi_count <= 6 else insight_items[:4] _add_structured_insight(slide, compact_items, Emu(CONTENT_LEFT), Emu(insight_zone_y), Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(remaining_height), title_size=Pt(10), body_size=Pt(9), min_body_size=Pt(8)) elif kpi_count > 3: fallback_top = max(insight_zone_y, int(FOOTER_TOP) - int(Emu(1250000))) fallback_height = int(FOOTER_TOP - fallback_top - Emu(120000)) fallback_items = insight_items[:2] _add_structured_insight(slide, fallback_items, Emu(CONTENT_LEFT), Emu(fallback_top), Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(max(fallback_height, Emu(850000))), title_size=Pt(9), body_size=Pt(8), min_body_size=Pt(7)) def _build_trend_page(prs, config, df, profile, colors, fonts, content_top, ctx=None): slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content") time_cols = profile.get('time_columns', []) num_cols = profile.get('numeric_columns', []) if not time_cols or not num_cols: _remove_slide(prs, slide) return False time_col = time_cols[0]['column_name'] metric_col = num_cols[0]['column_name'] label = num_cols[0].get('inferred_label', metric_col) page_title = f'{label}趋势' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) trend_data = calc_generic_trend(df, time_col, metric_col) if trend_data.get('dates'): chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx) text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx) add_line_chart(slide, trend_data['dates'], trend_data['values'], Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(chart_zone.height), series_name=label, color=colors.get('primary')) dates = trend_data['dates'] vals = trend_data['values'] n = len(vals) first_v, last_v = vals[0], vals[-1] change = last_v - first_v change_pct = round(change / first_v * 100, 1) if first_v else 0 max_v = max(vals) if vals else 0 min_v = min(vals) if vals else 0 max_idx = vals.index(max_v) if vals else 0 min_idx = vals.index(min_v) if vals else 0 peak_date = dates[max_idx] if max_idx < len(dates) else 'N/A' trough_date = dates[min_idx] if min_idx < len(dates) else 'N/A' direction_text = '上升' if change > 0 else '下降' if change < 0 else '平稳' volatility = round((max_v - min_v) / (sum(vals) / n) * 100, 1) if sum(vals) else 0 if vals else 0 insight_items = [ { 'title': f'{label}整体趋势概况', 'content': f'在报告周期内共采集 {n} 个时间点的数据,{label}' f'从 {dates[0]} 的 {first_v:,.0f} 变动至 {dates[-1]} 的 {last_v:,.0f},' f'整体{direction_text}{abs(change_pct):.1f}%,{direction_text}趋势{"显著" if abs(change_pct) > 20 else "温和" if abs(change_pct) > 5 else "较为平缓"}。' f'数据变化轨迹反映出{"持续向好的增长态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和改善的积极信号" if direction_text == "上升" else "回调盘整的阶段性特征" if direction_text == "下降" else "平稳运行的基本状态"},' f'建议将当前趋势与业务目标和历史同期数据进行交叉对比,评估达成全年目标的可行性。如需更详尽的趋势分析,建议增加数据采集频度和时间跨度。', }, { 'title': '峰值与谷值分析', 'content': f'周期内最高值出现在 {peak_date},为 {max_v:,.0f};' f'最低值出现在 {trough_date},为 {min_v:,.0f}。' f'极值差距 {max_v - min_v:,.0f},波动幅度 {volatility}%,' f'{"波动显著,需关注异常节点的驱动因素,建议排查是否受节假日、促销活动、外部政策变化等因素影响" if volatility > 30 else "波动在可控范围内,但仍需对异常波动保持警觉"}{"." if volatility > 30 else ",建立异常值的快速预警和响应机制。"}', }, { 'title': '趋势阶段性特征', 'content': f'前半程({dates[0]}至{dates[min(n//2, n-1)]})' f'{"呈上升态势" if sum(vals[:n//2]) < sum(vals[n//2:]) else "呈下降态势" if sum(vals[:n//2]) > sum(vals[n//2:]) else "基本持平"},' f'后半程均值为 {sum(vals[n//2:])/(n-n//2):,.0f}。建议结合业务事件节点深入分析拐点成因,' f'重点关注是否存在季节性波动、周期性波动或外部冲击等结构性因素。' f'若数据量较少,趋势解读应以业务经验为主,辅以数据验证。', }, { 'title': '业务启示', 'content': f'综合趋势分析,当前数据反映出{"积极向好的发展态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和稳定的运行动态" if abs(change_pct) <= 10 else "需重点关注的下行风险"}。' f'建议{"加大资源投入以把握增长机遇,同时关注增速的可持续性,避免盲目扩张" if direction_text == "上升" else "排查下降原因并制定针对性应对措施,分析是短期波动还是长期趋势转折" if direction_text == "下降" else "保持当前运营节奏,同时关注潜在变化信号,适时调整策略" if direction_text == "平稳" else "继续观察数据走势"}。' f'建议将数据与业务KPI目标进行对标分析,定期回顾趋势变化。', }, ] _add_structured_insight(slide, insight_items, Emu(text_zone.x), Emu(text_zone.y), Emu(text_zone.width), Emu(text_zone.height)) return True return False def _build_distribution_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None): slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content") cat_cols = profile.get('category_columns', []) num_cols = profile.get('numeric_columns', []) if not cat_cols: _remove_slide(prs, slide) return False elem = (page_def.elements or [{}])[0] if page_def else {} cat_col = elem.get('category') or cat_cols[0]['column_name'] cat_label = elem.get('category_label') or next( (c.get('inferred_label', cat_col) for c in cat_cols if c['column_name'] == cat_col), cat_col) metric_col = elem.get('metric') or (num_cols[0]['column_name'] if num_cols else None) metric_label = elem.get('metric_label') or (next( (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) if metric_col else '') page_title = page_def.title if page_def and page_def.title else f'{cat_label}分布' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) dist = calc_generic_distribution(df, cat_col, metric_col, top_n=8) if dist.get('categories'): chart_zone = get_chart_left_zone(content_top, 0.55, ctx=ctx) text_zone = get_insight_right_zone(content_top, 0.55, ctx=ctx) if len(dist['categories']) <= 8: add_doughnut_chart(slide, dist['categories'], dist['values'], Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(chart_zone.height), colors=colors.get('series')) else: add_bar_chart(slide, dist['categories'], dist['values'], Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(chart_zone.height), series_name=metric_label, color=colors.get('primary')) cats, vals, pcts = dist['categories'], dist['values'], dist['percentages'] grand_total = sum(vals) top3_pct = sum(pcts[:3]) top1_name, top1_val, top1_pct = cats[0], vals[0], pcts[0] metric_suffix = metric_label if metric_label else '数量' insight_items = [ { 'title': f'{cat_label}分布概况', 'content': f'共有 {len(cats)} 个不同的{cat_label},覆盖范围' f'{"广泛" if len(cats) >= 8 else "较为丰富" if len(cats) >= 5 else "相对集中"}。' f'前3名合计占比 {top3_pct:.1f}%,集中度' f'{"较高,呈现显著的头部集中特征" if top3_pct > 70 else "中等,呈现梯度递减分布" if top3_pct > 50 else "较低,分布较为均衡"}。', }, { 'title': f'排名第一: {top1_name}', 'content': f'{top1_name}以 {top1_val:,}{metric_suffix}(占比 {top1_pct:.1f}%)位居榜首,' f'{"是第二名" + cats[1] + "的" + f"{round(top1_val/vals[1],1)}" + "倍,优势极为显著" if len(cats) > 1 else "是该维度中最重要的类别"}。' f'该类别贡献了超过三分之一的{metric_label},是整体业务的基本盘和核心增长极。', }, ] if len(vals) >= 3: top3_sum = sum(vals[:3]) tail_sum = sum(vals[3:]) tail_pct = sum(pcts[3:]) insight_items.append({ 'title': '长尾分布特征', 'content': f'前三名累计 {top3_sum:,}{metric_suffix}({top3_pct:.1f}%),' f'剩余 {len(cats)-3} 个合计 {tail_sum:,}{metric_suffix}({tail_pct:.1f}%),' f'属于{"头部集中型分布" if top3_pct > 70 else "相对均衡分布" if top3_pct < 50 else "梯度递减型分布"}。' f'头部贡献了绝大部分{metric_label},尾部虽数量众多但单个贡献有限。', }) if len(vals) > 1: avg_val = sum(vals) / len(vals) cv = round(vals[0] / avg_val, 1) if avg_val else 0 median_idx = len(vals) // 2 median_val = vals[median_idx] insight_items.append({ 'title': '差异化与离散度分析', 'content': f'排名第一的{cat_label}{top1_name}的{metric_suffix}是全部分类均值的 {cv} 倍,' f'中位数分类(第{median_idx+1}名)为 {median_val:,}{metric_suffix},' f'表明该维度{"差异化显著,资源集中度较高" if cv > 3 else "差异化适中,各分类间差距可控" if cv > 1.5 else "分布较为均匀"}。' f'头部与中位数的差距反映了{cat_label}维度上的分层特征,是运营资源重点倾斜方向。', }) insight_items.append({ 'title': '业务启示', 'content': f'建议重点关注 {cats[0]} 的增量拓展与存量维护,同时深入分析排名中位类别的提升空间。' f'对于 {metric_label}贡献较小的尾部类别(如占比低于3%的分类),可评估是否优化资源配置、' f'调整运营策略或将资源向高回报类别倾斜。结合{cat_label}维度持续跟踪分布变化,及时把握结构性机会。', }) _add_structured_insight(slide, insight_items, Emu(text_zone.x), Emu(text_zone.y), Emu(text_zone.width), Emu(text_zone.height)) return True return False def _build_ranking_page(prs, config, df, profile, colors, fonts, content_top, page_def=None, ctx=None): slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content") cat_cols = profile.get('category_columns', []) num_cols = profile.get('numeric_columns', []) if not cat_cols or not num_cols: _remove_slide(prs, slide) return False elem = (page_def.elements or [{}])[0] if page_def else {} rank_col = elem.get('category') or cat_cols[-1]['column_name'] rank_label = elem.get('category_label') or next( (c.get('inferred_label', rank_col) for c in cat_cols if c['column_name'] == rank_col), rank_col) metric_col = elem.get('metric') or num_cols[0]['column_name'] metric_label = elem.get('metric_label') or next( (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) page_title = page_def.title if page_def and page_def.title else f'{rank_label}TOP排行' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) ranking = calc_generic_ranking(df, rank_col, metric_col, top_n=15) if ranking: chart_zone = get_chart_left_zone(content_top, 0.6, ctx=ctx) text_zone = get_insight_right_zone(content_top, 0.6, ctx=ctx) names = [r['name'] for r in ranking] vals = [r['value'] for r in ranking] add_bar_chart(slide, names, vals, Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(chart_zone.height), series_name=metric_label, color=colors.get('primary')) total_val = sum(vals) top3_names = [r['name'] for r in ranking[:3]] top3_vals = [r['value'] for r in ranking[:3]] top3_pct = [round(v / total_val * 100, 1) for v in top3_vals] if total_val else [0, 0, 0] top1_vs_last = round(vals[0] / vals[-1], 1) if len(vals) > 1 and vals[-1] > 0 else 'N/A' insight_items = [ { 'title': f'{rank_label}TOP排行概况', 'content': f'共展示 {len(ranking)} 个排名项,前3名分别为 {top3_names[0]}、{top3_names[1]}、' f'{top3_names[2]},累计 {sum(top3_vals):,}{metric_label}({sum(top3_pct):.1f}%)。' f'前三名合计贡献超过总量的三分之一,表明{rank_label}维度呈现{"显著的头部集中特征" if sum(top3_pct) > 60 else "梯度递减的分布格局" if sum(top3_pct) > 40 else "相对均衡的分布态势"}。', }, { 'title': f'榜首分析: {top3_names[0]}', 'content': f'{top3_names[0]}以 {top3_vals[0]:,}{metric_label}(占比 {top3_pct[0]:.1f}%)位居榜首,' f'{"是第2名" + top3_names[1] + "的" + f"{round(top3_vals[0]/top3_vals[1],1)}倍,领先优势显著" if len(ranking) > 1 and top3_vals[1] > 0 else "优势突出"}。' f'作为排名第一的{rank_label},其业绩表现直接影响整体业务大盘,建议重点关注其可持续增长策略。', }, { 'title': '头部与尾部差距分析', 'content': f'第1名与第{len(ranking)}名差距达 {top1_vs_last} 倍,' f'前5名平均 {round(sum(vals[:5])/5):,}{metric_label},' f'后5名平均 {round(sum(vals[-5:])/5):,}{metric_label},' f'前后差距约 {round((sum(vals[:5])/5)/(sum(vals[-5:])/5),1) if sum(vals[-5:]) > 0 else "N/A"} 倍。' f'{"头部效应极为明显,需关注是否因资源分配不均导致" if isinstance(top1_vs_last, float) and top1_vs_last > 10 else "差距较为显著,存在分层优化的空间" if isinstance(top1_vs_last, float) and top1_vs_last > 5 else "梯度分布相对均衡,可针对性提升各层级表现"}。', }, { 'title': '累计贡献率与分层分析', 'content': f'前5名累计贡献 {sum(vals[:5]):,}{metric_label}({round(sum(vals[:5])/total_val*100,1) if total_val else 0}%),' f'前10名累计贡献 {sum(vals[:10]):,}{metric_label}({round(sum(vals[:10])/total_val*100,1) if total_val else 0}%),' f'剩余 {len(ranking)-10} 名合计贡献 {sum(vals[10:]):,}{metric_label}({round(sum(vals[10:])/total_val*100,1) if total_val else 0}%)。' f'从分层结构来看,可划分为三个梯队:第一梯队(前3名)为业绩核心贡献者,第二梯队(第4-8名)为稳定输出层,' f'第三梯队(第9名及以后)为潜力提升层。', }, { 'title': '业务建议', 'content': f'重点关注 {", ".join(top3_names)} 的发展动态,提炼其成功经验并推广至团队。' f'对于排名靠后的{rank_label},可评估其增长潜力与资源匹配度,' f'识别可突破的增量空间。建议建立{rank_label}的绩效考核与激励体系,' f'通过标杆带动和梯队培养实现整体业绩提升。', }, ] _add_structured_insight(slide, insight_items, Emu(text_zone.x), Emu(text_zone.y), Emu(text_zone.width), Emu(text_zone.height)) return True return False def _build_summary_page(prs, config, metrics, profile, colors, fonts, content_top, page_def=None, ctx=None): slide = _duplicate_master_slide(prs, _resolve_template_profile(config), "content") page_title = page_def.title if page_def and page_def.title else '总结与建议' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) elem = (page_def.elements or [{}])[0] if page_def else {} if elem.get('support_status') is not None: status = elem['support_status'] dept = elem.get('support_by_dept', {}) sc = elem.get('support_count', 0) cc = elem.get('closed_count', 0) close_rate = round(cc / sc * 100, 1) if sc else 0 fully_closed = status.get('已闭环', 0) partial_closed = status.get('部分闭环', 0) not_closed = status.get('未闭环', 0) insight_items = [{ 'title': '支持需求总览', 'content': f'本期共产生 {sc} 项跨部门支持需求,其中已闭环 {cc} 项(含完全闭环 {fully_closed} 项、部分闭环 {partial_closed} 项),' f'闭环率 {close_rate}%。未闭环需求 {sc - cc} 项(占比 {round((sc-cc)/sc*100,1) if sc else 0}%),' f'闭环率{"较高,跨部门协作效率良好" if close_rate >= 60 else "处于中等水平,仍有提升空间" if close_rate >= 30 else "偏低,需重点关注闭环推动"}。' f'跨部门支持是保障项目推进的重要环节,高效的闭环机制有助于提升客户满意度和订单转化效率。', }] if status: total_status = sum(status.values()) fully_pct = round(fully_closed / total_status * 100, 1) if total_status else 0 partial_pct = round(partial_closed / total_status * 100, 1) if total_status else 0 not_pct = round(not_closed / total_status * 100, 1) if total_status else 0 insight_items.append({ 'title': '闭环状态明细', 'content': f'已闭环 {fully_closed} 项({fully_pct}%)、部分闭环 {partial_closed} 项({partial_pct}%)、' f'未闭环 {not_closed} 项({not_pct}%)。' f'其中完全闭环占比{"超过七成,闭环质量较高" if fully_pct >= 70 else "处于中等水平" if fully_pct >= 40 else "偏低,需提升闭环完整性"}。' f'部分闭环表明需求已部分满足但未完全解决,需持续跟踪至彻底闭环。', }) if dept: dept_top = list(dept.items())[:5] dept_top_sum = sum(v for _, v in dept_top) dept_total = sum(dept.values()) dept_str = '、'.join([f'{k}({v}项)' for k, v in dept_top]) avg_dept_load = round(dept_total / len(dept), 1) if dept else 0 max_dept = dept_top[0] insight_items.append({ 'title': '支持部门工作量分布', 'content': f'需求覆盖 {len(dept)} 个部门/科室,前5个部门承接 {dept_top_sum} 项({round(dept_top_sum/dept_total*100,1) if dept_total else 0}%)。' f'Top部门:{dept_str}。其中{max_dept[0]}承接最多({max_dept[1]}项),' f'平均每个部门承接 {avg_dept_load} 项。请关注工作量较大的部门资源分配是否充足,' f'同时识别是否有部门长期未被分配需求(可能表明资源未充分利用)。', }) if sc - cc > 0: insight_items.append({ 'title': '未闭环需求跟进建议', 'content': f'当前仍有 {sc - cc} 项需求未完成闭环。建议按以下策略推进:第一,按紧急程度和影响范围对未闭环需求进行优先级排序,' f'高优需求指定专人负责限期解决;第二,建立周度闭环跟踪机制,定期更新需求处理进展;' f'第三,对于跨部门协同的复杂需求,建议指定牵头部门统筹协调推进,' f'并建立问题升级机制(当需求超期未解决时自动升级至更高层级协调)。', }) insight_items.append({ 'title': '闭环效率提升建议', 'content': f'为持续提升支持需求闭环效率,建议:一是建立标准化的需求流转流程,明确各环节责任人和响应时限;' f'二是定期开展闭环案例复盘,提炼最佳实践并在团队内推广;' f'三是建立闭环率考核指标,将闭环时效纳入部门协作评价体系,' f'通过制度保障跨部门协作的效率和质量。', }) else: insight_items = generate_generic_insights(profile, metrics) insight_items = _ensure_min_insight_items( insight_items, profile=profile, metrics=metrics, min_count=2, context_label='总结页', ) zone = get_full_width_zone(content_top, ctx=ctx) _add_structured_insight(slide, insight_items, Emu(zone.x), Emu(zone.y), Emu(zone.width), Emu(zone.height)) def _build_end_page(prs, config, colors, fonts, template_profile): slide = _duplicate_master_slide(prs, template_profile, "end", keep_shapes=True) total = len([p for p in config.pages if p.selected]) _add_footer_if_missing(slide, f'数据来源:{config.source_label} | {total}/{total}', colors=colors) _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str or '', '{department}': config.source_label, }, fonts) # Remove empty KPI placeholders on end page (same as cover) _remove_empty_cover_kpi_placeholders(slide) def _find_metric_def_by_column(config, column): for metric in getattr(config, 'metrics', []) or []: if getattr(metric, 'column', None) == column: return metric return None def _forecast_items_from_page_def(page_def, df, profile, metrics, config): elem = (page_def.elements or [{}])[0] if page_def else {} items = [] explicit_items = elem.get('forecast_items') or elem.get('goals') if explicit_items: for idx, item in enumerate(explicit_items[:6], 1): title = item.get('title') or item.get('label') or f'预测项{idx}' value = item.get('value') or item.get('number') or item.get('target') or 0 items.append({'title': str(title), 'number': value}) return items metric_names = elem.get('metrics') or elem.get('metric_names') or [] for metric_name in metric_names[:6]: if metric_name in metrics: metric_def = next((m for m in getattr(config, 'metrics', []) if m.name == metric_name), None) label = metric_def.label if metric_def else str(metric_name) items.append({'title': label, 'number': metrics.get(metric_name, 0)}) if items: return items num_cols = profile.get('numeric_columns', []) if profile else [] keyword_cols = [] keywords = ('预测', 'forecast', '目标', '计划', 'target', 'plan') for col in num_cols: col_name = col.get('column_name', '') label = col.get('inferred_label', col_name) if any(k in str(col_name).lower() or k in str(label).lower() for k in keywords): keyword_cols.append(col) for col in keyword_cols[:6]: col_name = col.get('column_name') metric_def = _find_metric_def_by_column(config, col_name) label = metric_def.label if metric_def else col.get('inferred_label', col_name) if metric_def and metric_def.name in metrics: value = metrics.get(metric_def.name, 0) elif col_name in df.columns: series = df[col_name].dropna() value = int(series.sum()) if not series.empty else 0 else: value = 0 items.append({'title': label, 'number': value}) return items def _generic_forecast_insights(page_def, forecast_items, profile, metrics): title = page_def.title if page_def else '预测与行动计划' total = sum(float(item.get('number') or 0) for item in forecast_items) item_desc = '、'.join(f"{item['title']} {item.get('number', 0):,.0f}" for item in forecast_items[:5]) if forecast_items: return [ { 'title': f'{title}目标概览', 'content': f'本页围绕已确认的预测/计划指标展开,当前纳入 {len(forecast_items)} 个量化项,' f'合计规模约 {total:,.0f}。主要项目包括:{item_desc}。' f'这些指标应与本期实际结果、历史同期和资源约束一起判断,避免只看单点预测值。', }, { 'title': '达成路径与风险控制', 'content': f'建议将预测目标拆解为“责任人、关键动作、时间节点、风险预案”四类信息。' f'如果目标值明显高于本期实际表现,应同步确认新增订单、库存、产能、交付或预算等支撑条件;' f'如果目标值低于当前趋势,则需要说明保守假设,防止业务团队误判资源投入强度。', }, ] total_rows = profile.get('total_rows', 0) if profile else 0 return [ { 'title': f'{title}口径说明', 'content': f'当前页面未检测到明确的预测或目标数值字段,因此以数据画像和核心指标进行预测口径说明。' f'本期数据覆盖 {total_rows or "若干"} 条记录,建议在六项确认阶段明确预测指标、目标字段和统计口径,' f'例如下月交付、销售目标、库存消化、需求闭环或风险事件数量。', }, { 'title': '补充数据建议', 'content': f'为了生成更可靠的预测页,建议在源数据中补充至少一个预测/目标字段,并提供历史实际值用于校准。' f'报告生成后应检查预测值是否与图表一致,文字洞察是否说明关键假设、达成路径和偏差处理机制。', }, ] def _build_forecast_page(prs, config, df, profile, metrics, colors, content_top, page_def=None): slide = _duplicate_slide(prs, prs.slides[1]) page_title = page_def.title if page_def and page_def.title else '预测与行动计划' _replace_all_placeholders(slide, { '{report_title}': config.title, '{date}': config.period_str, '{page_title}': page_title, '{source}': config.source_label, '{period}': '', '{page_num}': '', }, fonts) forecast_items = _forecast_items_from_page_def(page_def, df, profile, metrics, config) if not forecast_items and metrics.get('next_month_goals'): forecast_items = [ {'title': g['title'].split(':')[0], 'number': g.get('number', 0)} for g in metrics.get('next_month_goals', [])[:6] ] chart_zone = get_chart_left_zone(content_top, 0.58, ctx=ctx) text_zone = get_insight_right_zone(content_top, 0.58, ctx=ctx) if forecast_items: names = [item['title'] for item in forecast_items[:6]] values = [float(item.get('number') or 0) for item in forecast_items[:6]] add_column_chart(slide, names, values, Emu(chart_zone.x), Emu(chart_zone.y), Emu(chart_zone.width), Emu(min(chart_zone.height, Emu(5100000))), series_name='预测/目标值', color=colors.get('accent', C_ACCENT), category_axis_title='预测项', value_axis_title='数值') insight_items = _generic_forecast_insights(page_def, forecast_items, profile, metrics) insight_items = _ensure_min_insight_items(insight_items, profile, metrics, context_label='预测页') _add_structured_insight(slide, insight_items, Emu(text_zone.x), Emu(text_zone.y), Emu(text_zone.width), Emu(text_zone.height)) # ============================================================================== # CLI # ============================================================================== if __name__ == '__main__': import sys if len(sys.argv) >= 3: from report_config import load_report_config data_file = sys.argv[1] config_file = sys.argv[2] output = sys.argv[3] if len(sys.argv) >= 4 else 'output.pptx' config = load_report_config(config_file) quality_assured_build(data_file, config, output) else: print("Usage: python ppt_builder.py [output_path]")