|
|
@@ -0,0 +1,587 @@
|
|
|
+"""
|
|
|
+Template parser engine for the universal data report generator.
|
|
|
+Reads any .pptx template and outputs a structured TemplateProfile describing
|
|
|
+master slide types, placeholders, colors, fonts, and layout geometry.
|
|
|
+"""
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+import os
|
|
|
+import re
|
|
|
+from dataclasses import dataclass, field
|
|
|
+from pathlib import Path
|
|
|
+from typing import Optional
|
|
|
+
|
|
|
+from pptx import Presentation
|
|
|
+from pptx.dml.color import RGBColor
|
|
|
+from pptx.util import Emu
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# DATA MODELS
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class MasterSlideInfo:
|
|
|
+ slide_index: int
|
|
|
+ master_type: str # 'cover' | 'content' | 'toc' | 'end' | 'unknown'
|
|
|
+ placeholders: list[str] = field(default_factory=list)
|
|
|
+ content_top: int = 0 # EMU
|
|
|
+ has_footer: bool = False
|
|
|
+ has_background: bool = False
|
|
|
+ shape_count: int = 0
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class TemplateProfile:
|
|
|
+ path: str
|
|
|
+ is_builtin: bool
|
|
|
+ slide_width: int
|
|
|
+ slide_height: int
|
|
|
+ master_slides: list[MasterSlideInfo] = field(default_factory=list)
|
|
|
+ placeholder_map: dict[str, list[int]] = field(default_factory=dict)
|
|
|
+ detected_theme: dict[str, str] = field(default_factory=dict)
|
|
|
+ detected_fonts: dict[str, str] = field(default_factory=dict)
|
|
|
+ safe_margins: dict[str, int] = field(default_factory=dict)
|
|
|
+
|
|
|
+ def get_master_for(self, page_type: str) -> Optional[MasterSlideInfo]:
|
|
|
+ """Return the first master slide matching page_type, or None."""
|
|
|
+ for ms in self.master_slides:
|
|
|
+ if ms.master_type == page_type:
|
|
|
+ return ms
|
|
|
+ return None
|
|
|
+
|
|
|
+ def get_content_top(self, page_type: str = "content") -> int:
|
|
|
+ """Return content_top for the given page_type, or best guess."""
|
|
|
+ ms = self.get_master_for(page_type)
|
|
|
+ if ms and ms.content_top > 0:
|
|
|
+ return ms.content_top
|
|
|
+ # Fallback to any content page
|
|
|
+ for ms in self.master_slides:
|
|
|
+ if ms.master_type == "content" and ms.content_top > 0:
|
|
|
+ return ms.content_top
|
|
|
+ # Hard fallback
|
|
|
+ return int(Emu(1422400))
|
|
|
+
|
|
|
+ def get_master_index_for(self, page_type: str) -> int:
|
|
|
+ """Return slide index for page_type, with fallback rules."""
|
|
|
+ ms = self.get_master_for(page_type)
|
|
|
+ if ms:
|
|
|
+ return ms.slide_index
|
|
|
+ # Fallback heuristics
|
|
|
+ if page_type == "cover" and self.master_slides:
|
|
|
+ return self.master_slides[0].slide_index
|
|
|
+ if page_type == "end" and self.master_slides:
|
|
|
+ return self.master_slides[-1].slide_index
|
|
|
+ if page_type == "toc" and len(self.master_slides) >= 3:
|
|
|
+ return self.master_slides[2].slide_index
|
|
|
+ if len(self.master_slides) >= 2:
|
|
|
+ return self.master_slides[1].slide_index
|
|
|
+ return 0
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# PLACEHOLDER DETECTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+_PLACEHOLDER_RE = re.compile(r"\{[^{}]+\}")
|
|
|
+
|
|
|
+# Canonical placeholder -> list of aliases (including itself)
|
|
|
+PLACEHOLDER_ALIASES: dict[str, list[str]] = {
|
|
|
+ "{report_title}": ["{report_title}", "{标题}", "{title}", "{报告标题}"],
|
|
|
+ "{report_type}": ["{report_type}", "{报告类型}", "{type}"],
|
|
|
+ "{date}": ["{date}", "{日期}", "{report_date}", "{报告日期}"],
|
|
|
+ "{department}": ["{department}", "{部门}", "{source}", "{来源}", "{dept}"],
|
|
|
+ "{period}": ["{period}", "{周期}", "{report_period}", "{时间周期}"],
|
|
|
+ "{gen_time}": ["{gen_time}", "{生成时间}", "{generated_time}"],
|
|
|
+ "{page_title}": ["{page_title}", "{页面标题}", "{subtitle}", "{page_header}"],
|
|
|
+ "{source}": ["{source}", "{数据来源}", "{data_source}"],
|
|
|
+ "{page_num}": ["{page_num}", "{页码}", "{page_number}"],
|
|
|
+}
|
|
|
+
|
|
|
+# Chapter placeholders are generated dynamically
|
|
|
+for i in range(1, 13):
|
|
|
+ PLACEHOLDER_ALIASES[f"{{chapter{i}_title}}"] = [f"{{chapter{i}_title}}", f"{{章节{i}标题}}"]
|
|
|
+ PLACEHOLDER_ALIASES[f"{{chapter{i}_desc}}"] = [f"{{chapter{i}_desc}}", f"{{章节{i}描述}}"]
|
|
|
+
|
|
|
+# KPI placeholders
|
|
|
+for i in range(1, 13):
|
|
|
+ PLACEHOLDER_ALIASES[f"{{kpi{i}_label}}"] = [f"{{kpi{i}_label}}", f"{{kpi{i}_name}}"]
|
|
|
+ PLACEHOLDER_ALIASES[f"{{kpi{i}_value}}"] = [f"{{kpi{i}_value}}", f"{{kpi{i}_val}}"]
|
|
|
+
|
|
|
+
|
|
|
+def _scan_placeholders(slide) -> list[str]:
|
|
|
+ """Scan a slide for all placeholder-like strings {xxx}."""
|
|
|
+ found = set()
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if shape.has_text_frame:
|
|
|
+ text = shape.text_frame.text or ""
|
|
|
+ for match in _PLACEHOLDER_RE.finditer(text):
|
|
|
+ found.add(match.group(0))
|
|
|
+ return sorted(found)
|
|
|
+
|
|
|
+
|
|
|
+def _normalize_placeholder(raw: str) -> Optional[str]:
|
|
|
+ """Map a raw placeholder to its canonical form, if known."""
|
|
|
+ raw_lower = raw.lower()
|
|
|
+ for canonical, aliases in PLACEHOLDER_ALIASES.items():
|
|
|
+ if raw_lower in [a.lower() for a in aliases]:
|
|
|
+ return canonical
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# MASTER SLIDE TYPE DETECTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+_TYPE_KEYWORDS: dict[str, list[str]] = {
|
|
|
+ "cover": ["{report_title}", "{date}", "{department}", "{report_type}", "{gen_time}"],
|
|
|
+ "content": ["{page_title}", "{source}", "{page_num}", "{period}"],
|
|
|
+ "toc": ["{chapter", "contents", "目录", "catalog", "agenda"],
|
|
|
+ "end": ["{report_title}", "感谢", "thank", "结语", "尾页", "end"],
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def _detect_master_type(slide, slide_index: int, total_slides: int) -> str:
|
|
|
+ """Detect the semantic type of a master slide."""
|
|
|
+ texts = []
|
|
|
+ placeholders = []
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if shape.has_text_frame:
|
|
|
+ t = (shape.text_frame.text or "").strip()
|
|
|
+ if t:
|
|
|
+ texts.append(t.lower())
|
|
|
+ placeholders.extend(_PLACEHOLDER_RE.findall(t))
|
|
|
+
|
|
|
+ text_block = " ".join(texts)
|
|
|
+ ph_block = " ".join(placeholders).lower()
|
|
|
+
|
|
|
+ scores: dict[str, int] = {"cover": 0, "content": 0, "toc": 0, "end": 0, "unknown": 0}
|
|
|
+
|
|
|
+ # Score by keywords
|
|
|
+ for ptype, keywords in _TYPE_KEYWORDS.items():
|
|
|
+ for kw in keywords:
|
|
|
+ if kw.lower() in ph_block or kw.lower() in text_block:
|
|
|
+ scores[ptype] += 1
|
|
|
+
|
|
|
+ # Position heuristics
|
|
|
+ if slide_index == 0:
|
|
|
+ scores["cover"] += 2
|
|
|
+ if slide_index == total_slides - 1:
|
|
|
+ scores["end"] += 2
|
|
|
+ if total_slides >= 3 and slide_index == 2:
|
|
|
+ scores["toc"] += 1
|
|
|
+
|
|
|
+ # Content page has page_title but not report_title (cover does)
|
|
|
+ if "{page_title}" in ph_block:
|
|
|
+ if "{report_title}" in ph_block:
|
|
|
+ # Could be cover with both; check position of report_title
|
|
|
+ # If report_title is at top-left small text, it's a header → content
|
|
|
+ scores["cover"] += 1
|
|
|
+ else:
|
|
|
+ scores["content"] += 3
|
|
|
+
|
|
|
+ # TOC strongly signaled by chapter placeholders
|
|
|
+ if "{chapter" in ph_block:
|
|
|
+ scores["toc"] += 5
|
|
|
+
|
|
|
+ # Distinguish end from cover: end usually lacks date/department placeholders
|
|
|
+ if "{date}" in ph_block and "{department}" in ph_block:
|
|
|
+ scores["cover"] += 2
|
|
|
+ scores["end"] -= 1
|
|
|
+
|
|
|
+ # Cover usually has KPI placeholders
|
|
|
+ if "{kpi1_label}" in ph_block:
|
|
|
+ scores["cover"] += 2
|
|
|
+
|
|
|
+ best = max(scores, key=lambda k: scores[k])
|
|
|
+ if scores[best] == 0:
|
|
|
+ # Default fallback by position
|
|
|
+ if slide_index == 0:
|
|
|
+ return "cover"
|
|
|
+ if slide_index == total_slides - 1:
|
|
|
+ return "end"
|
|
|
+ return "content"
|
|
|
+ return best
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# CONTENT TOP DETECTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def _detect_content_top(slide, default_gap: int = 381000) -> int:
|
|
|
+ """Detect content start Y by finding page_title placeholder bottom + gap."""
|
|
|
+ page_title_bottom = None
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if not shape.has_text_frame:
|
|
|
+ continue
|
|
|
+ text = shape.text_frame.text or ""
|
|
|
+ # Match any page_title alias
|
|
|
+ if _matches_any_placeholder(text, "{page_title}"):
|
|
|
+ page_title_bottom = int(shape.top) + int(shape.height)
|
|
|
+ break
|
|
|
+
|
|
|
+ if page_title_bottom is not None:
|
|
|
+ return page_title_bottom + default_gap
|
|
|
+
|
|
|
+ # Fallback: find any text shape in the upper area that looks like a title
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if not shape.has_text_frame:
|
|
|
+ continue
|
|
|
+ if int(shape.top) > Emu(500000) and int(shape.top) < Emu(1500000):
|
|
|
+ text = (shape.text_frame.text or "").strip()
|
|
|
+ if text and len(text) < 40 and "{" not in text:
|
|
|
+ return int(shape.top) + int(shape.height) + default_gap
|
|
|
+
|
|
|
+ return int(Emu(1422400))
|
|
|
+
|
|
|
+
|
|
|
+def _matches_any_placeholder(text: str, canonical: str) -> bool:
|
|
|
+ aliases = PLACEHOLDER_ALIASES.get(canonical, [canonical])
|
|
|
+ for alias in aliases:
|
|
|
+ if alias in text:
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# COLOR EXTRACTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def _extract_colors(slide) -> dict[str, str]:
|
|
|
+ """Extract dominant colors from a slide's shapes and theme."""
|
|
|
+ colors: dict[str, str] = {}
|
|
|
+
|
|
|
+ # Try theme color scheme first
|
|
|
+ try:
|
|
|
+ theme = slide.slide_layout.slide_master.theme
|
|
|
+ cs = theme.color_scheme
|
|
|
+ # Map theme colors
|
|
|
+ theme_map = {
|
|
|
+ "primary": cs.accent1,
|
|
|
+ "accent": cs.accent2,
|
|
|
+ "accent2": cs.accent3,
|
|
|
+ "accent_neg": cs.accent6, # often red/orange
|
|
|
+ "text": cs.text1,
|
|
|
+ "background": cs.background1,
|
|
|
+ }
|
|
|
+ for key, color_obj in theme_map.items():
|
|
|
+ try:
|
|
|
+ rgb = color_obj.rgb
|
|
|
+ if rgb:
|
|
|
+ colors[key] = _rgb_to_hex(rgb)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # Extract from shape fills (heuristic for primary color)
|
|
|
+ fill_colors: dict[str, int] = {}
|
|
|
+ text_colors: dict[str, int] = {}
|
|
|
+
|
|
|
+ for shape in slide.shapes:
|
|
|
+ # Fill colors
|
|
|
+ try:
|
|
|
+ if hasattr(shape, "fill") and shape.fill.type is not None:
|
|
|
+ if hasattr(shape.fill, "fore_color") and shape.fill.fore_color:
|
|
|
+ rgb = getattr(shape.fill.fore_color, "rgb", None)
|
|
|
+ if rgb:
|
|
|
+ hex_str = _rgb_to_hex(rgb)
|
|
|
+ fill_colors[hex_str] = fill_colors.get(hex_str, 0) + 1
|
|
|
+ # Weight by area
|
|
|
+ area = int(shape.width) * int(shape.height)
|
|
|
+ fill_colors[hex_str] += area // 1000000000
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # Text colors
|
|
|
+ try:
|
|
|
+ if shape.has_text_frame:
|
|
|
+ for para in shape.text_frame.paragraphs:
|
|
|
+ for run in para.runs:
|
|
|
+ if run.font.color and run.font.color.rgb:
|
|
|
+ hex_str = _rgb_to_hex(run.font.color.rgb)
|
|
|
+ text_colors[hex_str] = text_colors.get(hex_str, 0) + 1
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # Determine primary from most common dark fill
|
|
|
+ dark_fills = {h: c for h, c in fill_colors.items() if _is_dark_color(h)}
|
|
|
+ if dark_fills:
|
|
|
+ primary = max(dark_fills, key=lambda k: dark_fills[k])
|
|
|
+ colors["primary"] = primary
|
|
|
+
|
|
|
+ # Determine accent from bright fills
|
|
|
+ bright_fills = {h: c for h, c in fill_colors.items() if _is_bright_color(h) and not _is_dark_color(h)}
|
|
|
+ if bright_fills:
|
|
|
+ accent = max(bright_fills, key=lambda k: bright_fills[k])
|
|
|
+ colors["accent"] = accent
|
|
|
+
|
|
|
+ # Text color
|
|
|
+ if text_colors:
|
|
|
+ text_col = max(text_colors, key=lambda k: text_colors[k])
|
|
|
+ if text_col.upper() not in ("FFFFFF", "000000") or len(text_colors) == 1:
|
|
|
+ colors["text"] = text_col
|
|
|
+
|
|
|
+ return colors
|
|
|
+
|
|
|
+
|
|
|
+def _rgb_to_hex(rgb) -> str:
|
|
|
+ if rgb is None:
|
|
|
+ return "#333333"
|
|
|
+ try:
|
|
|
+ return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}"
|
|
|
+ except Exception:
|
|
|
+ try:
|
|
|
+ return f"#{int(rgb):06X}"
|
|
|
+ except Exception:
|
|
|
+ return "#333333"
|
|
|
+
|
|
|
+
|
|
|
+def _is_dark_color(hex_str: str) -> bool:
|
|
|
+ hex_str = hex_str.lstrip("#")
|
|
|
+ if len(hex_str) != 6:
|
|
|
+ return False
|
|
|
+ try:
|
|
|
+ r, g, b = int(hex_str[0:2], 16), int(hex_str[2:4], 16), int(hex_str[4:6], 16)
|
|
|
+ luminance = 0.299 * r + 0.587 * g + 0.114 * b
|
|
|
+ return luminance < 120
|
|
|
+ except Exception:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def _is_bright_color(hex_str: str) -> bool:
|
|
|
+ hex_str = hex_str.lstrip("#")
|
|
|
+ if len(hex_str) != 6:
|
|
|
+ return False
|
|
|
+ try:
|
|
|
+ r, g, b = int(hex_str[0:2], 16), int(hex_str[2:4], 16), int(hex_str[4:6], 16)
|
|
|
+ saturation = max(r, g, b) - min(r, g, b)
|
|
|
+ return saturation > 40
|
|
|
+ except Exception:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# FONT EXTRACTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def _extract_fonts(slide) -> dict[str, str]:
|
|
|
+ """Extract dominant title and body fonts from a slide."""
|
|
|
+ title_fonts: dict[str, int] = {}
|
|
|
+ body_fonts: dict[str, int] = {}
|
|
|
+
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if not shape.has_text_frame:
|
|
|
+ continue
|
|
|
+ top = int(shape.top)
|
|
|
+ for para in shape.text_frame.paragraphs:
|
|
|
+ for run in para.runs:
|
|
|
+ font_name = run.font.name
|
|
|
+ if not font_name:
|
|
|
+ continue
|
|
|
+ # Title area: top < ~1.5M EMU (approx 3.8cm)
|
|
|
+ if top < Emu(1500000):
|
|
|
+ title_fonts[font_name] = title_fonts.get(font_name, 0) + 1
|
|
|
+ else:
|
|
|
+ body_fonts[font_name] = body_fonts.get(font_name, 0) + 1
|
|
|
+
|
|
|
+ result: dict[str, str] = {}
|
|
|
+ if title_fonts:
|
|
|
+ result["title_font"] = max(title_fonts, key=lambda k: title_fonts[k])
|
|
|
+ if body_fonts:
|
|
|
+ result["body_font"] = max(body_fonts, key=lambda k: body_fonts[k])
|
|
|
+ # Number font often same as body or Arial; keep it simple
|
|
|
+ result["number_font"] = result.get("body_font", "Arial")
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# SAFE MARGIN DETECTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def _extract_safe_margins(slide) -> dict[str, int]:
|
|
|
+ """Estimate safe margins by looking at leftmost/topmost shapes."""
|
|
|
+ lefts = []
|
|
|
+ tops = []
|
|
|
+ for shape in slide.shapes:
|
|
|
+ try:
|
|
|
+ l = int(shape.left)
|
|
|
+ t = int(shape.top)
|
|
|
+ if l > 0 and l < Emu(2000000):
|
|
|
+ lefts.append(l)
|
|
|
+ if t > 0 and t < Emu(2000000):
|
|
|
+ tops.append(t)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ margins = {}
|
|
|
+ if lefts:
|
|
|
+ margins["left"] = min(lefts)
|
|
|
+ margins["right"] = min(lefts)
|
|
|
+ if tops:
|
|
|
+ margins["top"] = min(tops)
|
|
|
+ # Bottom margin harder to detect; use default
|
|
|
+ margins["bottom"] = int(Emu(254000))
|
|
|
+ return margins
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# BACKGROUND DETECTION
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def _has_background(slide) -> bool:
|
|
|
+ """Check if slide has explicit background shapes or images."""
|
|
|
+ try:
|
|
|
+ if slide.background.fill.type is not None:
|
|
|
+ return True
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ for shape in slide.shapes:
|
|
|
+ try:
|
|
|
+ if int(shape.left) == 0 and int(shape.top) == 0:
|
|
|
+ if int(shape.width) > Emu(10000000) and int(shape.height) > Emu(5000000):
|
|
|
+ return True
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def _has_footer(slide) -> bool:
|
|
|
+ """Check if slide has footer-like text at bottom."""
|
|
|
+ for shape in slide.shapes:
|
|
|
+ if not shape.has_text_frame:
|
|
|
+ continue
|
|
|
+ try:
|
|
|
+ top = int(shape.top)
|
|
|
+ if top > Emu(8000000):
|
|
|
+ text = (shape.text_frame.text or "").strip()
|
|
|
+ if text and ("{source}" in text or "{period}" in text or "{page_num}" in text):
|
|
|
+ return True
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# MAIN PARSER
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+def parse_template(path: str) -> TemplateProfile:
|
|
|
+ """Parse a .pptx template file and return a TemplateProfile."""
|
|
|
+ abs_path = os.path.abspath(path)
|
|
|
+ prs = Presentation(abs_path)
|
|
|
+
|
|
|
+ total_slides = len(prs.slides)
|
|
|
+ is_builtin = "assets" in abs_path.replace("\\", "/").lower()
|
|
|
+
|
|
|
+ master_slides: list[MasterSlideInfo] = []
|
|
|
+ placeholder_map: dict[str, list[int]] = {}
|
|
|
+ all_colors: dict[str, dict[str, int]] = {}
|
|
|
+ all_fonts: dict[str, dict[str, int]] = {}
|
|
|
+
|
|
|
+ for idx, slide in enumerate(prs.slides):
|
|
|
+ mtype = _detect_master_type(slide, idx, total_slides)
|
|
|
+ placeholders = _scan_placeholders(slide)
|
|
|
+ content_top = _detect_content_top(slide)
|
|
|
+
|
|
|
+ ms = MasterSlideInfo(
|
|
|
+ slide_index=idx,
|
|
|
+ master_type=mtype,
|
|
|
+ placeholders=placeholders,
|
|
|
+ content_top=content_top,
|
|
|
+ has_footer=_has_footer(slide),
|
|
|
+ has_background=_has_background(slide),
|
|
|
+ shape_count=len(list(slide.shapes)),
|
|
|
+ )
|
|
|
+ master_slides.append(ms)
|
|
|
+
|
|
|
+ # Build placeholder -> master index map
|
|
|
+ for ph in placeholders:
|
|
|
+ canonical = _normalize_placeholder(ph) or ph
|
|
|
+ if canonical not in placeholder_map:
|
|
|
+ placeholder_map[canonical] = []
|
|
|
+ if idx not in placeholder_map[canonical]:
|
|
|
+ placeholder_map[canonical].append(idx)
|
|
|
+
|
|
|
+ # Aggregate colors
|
|
|
+ colors = _extract_colors(slide)
|
|
|
+ for k, v in colors.items():
|
|
|
+ if k not in all_colors:
|
|
|
+ all_colors[k] = {}
|
|
|
+ all_colors[k][v] = all_colors[k].get(v, 0) + 1
|
|
|
+
|
|
|
+ # Aggregate fonts
|
|
|
+ fonts = _extract_fonts(slide)
|
|
|
+ for k, v in fonts.items():
|
|
|
+ if k not in all_fonts:
|
|
|
+ all_fonts[k] = {}
|
|
|
+ all_fonts[k][v] = all_fonts[k].get(v, 0) + 1
|
|
|
+
|
|
|
+ # Determine final detected_theme by voting across master slides
|
|
|
+ detected_theme: dict[str, str] = {}
|
|
|
+ for key, vote in all_colors.items():
|
|
|
+ if vote:
|
|
|
+ detected_theme[key] = max(vote, key=lambda k: vote[k])
|
|
|
+
|
|
|
+ # Determine final detected_fonts by voting
|
|
|
+ detected_fonts: dict[str, str] = {}
|
|
|
+ for key, vote in all_fonts.items():
|
|
|
+ if vote:
|
|
|
+ detected_fonts[key] = max(vote, key=lambda k: vote[k])
|
|
|
+
|
|
|
+ # Safe margins: use first content-like slide or cover
|
|
|
+ safe_margins: dict[str, int] = {}
|
|
|
+ for ms in master_slides:
|
|
|
+ if ms.master_type in ("content", "cover"):
|
|
|
+ slide = prs.slides[ms.slide_index]
|
|
|
+ safe_margins = _extract_safe_margins(slide)
|
|
|
+ break
|
|
|
+ if not safe_margins:
|
|
|
+ safe_margins = {"left": int(Emu(762000)), "right": int(Emu(762000)), "top": int(Emu(254000)), "bottom": int(Emu(254000))}
|
|
|
+
|
|
|
+ # Resolve slide dimensions
|
|
|
+ slide_width = int(prs.slide_width) if prs.slide_width else 16256000
|
|
|
+ slide_height = int(prs.slide_height) if prs.slide_height else 9144000
|
|
|
+
|
|
|
+ return TemplateProfile(
|
|
|
+ path=abs_path,
|
|
|
+ is_builtin=is_builtin,
|
|
|
+ slide_width=slide_width,
|
|
|
+ slide_height=slide_height,
|
|
|
+ master_slides=master_slides,
|
|
|
+ placeholder_map=placeholder_map,
|
|
|
+ detected_theme=detected_theme,
|
|
|
+ detected_fonts=detected_fonts,
|
|
|
+ safe_margins=safe_margins,
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+def get_builtin_template_profile(report_type: str = "daily") -> TemplateProfile:
|
|
|
+ """Parse a built-in template and return its profile."""
|
|
|
+ base = os.path.join(os.path.dirname(__file__), "..", "assets")
|
|
|
+ template_map = {
|
|
|
+ "daily": os.path.join(base, "report-master.pptx"),
|
|
|
+ "weekly": os.path.join(base, "weekly-master.pptx"),
|
|
|
+ "monthly": os.path.join(base, "monthly-master.pptx"),
|
|
|
+ }
|
|
|
+ path = template_map.get(report_type, template_map["daily"])
|
|
|
+ return parse_template(path)
|
|
|
+
|
|
|
+
|
|
|
+# ==============================================================================
|
|
|
+# DEBUG
|
|
|
+# ==============================================================================
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ import json
|
|
|
+ for rtype in ["daily", "weekly", "monthly"]:
|
|
|
+ profile = get_builtin_template_profile(rtype)
|
|
|
+ print(f"\n=== {rtype.upper()} TEMPLATE PROFILE ===")
|
|
|
+ print(f" Path: {profile.path}")
|
|
|
+ print(f" Size: {profile.slide_width} x {profile.slide_height}")
|
|
|
+ print(f" Masters:")
|
|
|
+ for ms in profile.master_slides:
|
|
|
+ print(f" [{ms.slide_index}] {ms.master_type}: placeholders={ms.placeholders}, content_top={ms.content_top}")
|
|
|
+ print(f" Theme: {profile.detected_theme}")
|
|
|
+ print(f" Fonts: {profile.detected_fonts}")
|
|
|
+ print(f" Margins: {profile.safe_margins}")
|