2 месяцев назад · cd6afa90f9
--- a/5月6日数据.xlsx
+++ b/5月6日数据.xlsx
--- a/5月6日质检测试_v2.pptx
+++ b/5月6日质检测试_v2.pptx
--- a/generate-data-report-ppt/SKILL.md
+++ b/generate-data-report-ppt/SKILL.md
@@ -1,25 +1,30 @@
 
															 ---
														
 
															 name: generate-data-report-ppt
														
 
															 description: >
														
 
															-  基于 Excel 业务明细数据，自动生成数据日报、周报、月报 PPT。
														
 
															+  通用数据报告 PPT 生成器。输入任意 Excel 数据文件，自动探查数据结构、计算指标、通过六项用户确认后生成高质量 PPT 报告。
														
 
															   图表使用原生 python-pptx 可编辑 Chart 对象（非 matplotlib PNG 插入）。
														
 
															-  当用户请求"生成日报/周报/月报"、"创建数据报告 PPT"、"输出业务报告 PPT"
														
 
															-  或任何涉及周期性数据报告的 PowerPoint 格式需求时触发此技能。
														
 
															+  内置质量自检引擎，自动检测布局/视觉/内容/数据问题并迭代修复至理想效果。
														
 
															 ---
														
 
															-# 生成数据报告 PPT
														
 
															+# 通用数据报告 PPT 生成器
														
 
															-自动生成周期性业务数据报告（日报 / 周报 / 月报）为原生可编辑 PowerPoint 文件。
														
 
															+输入任意 Excel 文件，自动生成高质量可编辑数据报告 PPT。
														
 
															 ## 工作流程
														
 
															-1. **接收输入**：数据文件路径、报告类型、日期/周期参数、可选部门/来源名称。
														
 
															-2. **加载数据**：`scripts/data_loader.py` 按日期范围读取 Excel 工作表，清洗并校验数据。
														
 
															-3. **计算指标**：`scripts/metrics_calculator.py` 根据报告类型计算 KPI、分布、趋势、告警、深度洞察。
														
 
															-4. **生成洞察**：`scripts/deep_insights.py` 生成结构化深度分析文本（标题 + 正文段落）。
														
 
															-5. **构建 PPT**：`scripts/ppt_builder.py` 从模板复制幻灯片，替换占位符，动态绘制导航标签，
														
 
															-   通过 `scripts/chart_factory.py` 插入原生图表、KPI 卡片、告警卡片、洞察文本块。
														
 
															-6. **输出**：保存 `.pptx` 文件。所有图表和表格均可在 PowerPoint 中编辑（右键 → 编辑数据）。
														
 
															+1.  **接收 Excel 文件**：用户提供任意格式的 Excel 数据文件。
														
 
															+2.  **数据探查**：`scripts/data_profiler.py` 自动检测 Schema、统计特征、数据质量及字段语义。
														
 
															+3.  **智能分析与推荐**：`scripts/agent_analyzer.py` 基于探查结果生成指标推荐、页面结构建议及可视化方案。
														
 
															+4.  **用户确认（六项）**：生成 PPT 前必须与用户确认以下内容：
														
 
															+    - 报告周期与页数范围
														
 
															+    - 核心指标集
														
 
															+    - 受众与决策场景
														
 
															+    - 视觉风格与配色方向
														
 
															+    - 页面结构与模板方案
														
 
															+    - （合计六项确认内容）
														
 
															+5.  **生成 PPT**：`scripts/ppt_builder.py` 按用户确认的配置，动态选择布局模板、应用主题配色、插入原生可编辑图表。
														
 
															+6.  **质量自检与修复**：`scripts/quality_inspector.py` 检查四类 25 项质量标准，发现问题自动修复，迭代至阈值达标。
														
 
															+7.  **输出**：保存 `.pptx` 文件。所有图表和表格均可在 PowerPoint 中编辑。
														
 
															 ## 目录结构
														
@@ -27,153 +32,138 @@ description: >
 
															 generate-data-report-ppt/
														
 
															 ├── SKILL.md
														
 
															 ├── scripts/
														
 
															-│   ├── data_loader.py          # Excel 加载与数据清洗
														
 
															-│   ├── metrics_calculator.py   # KPI 计算引擎（含日报/周报/月报指标）
														
 
															-│   ├── deep_insights.py        # 结构化深度洞察生成（周报/月报各页面）
														
 
															-│   ├── chart_factory.py        # 原生可编辑图表创建
														
 
															-│   └── ppt_builder.py          # PPT 组装编排器
														
 
															+│   ├── data_loader.py              # Excel 加载与数据清洗（保留原有兼容）
														
 
															+│   ├── data_profiler.py            # 通用数据探查引擎（新增）
														
 
															+│   ├── report_config.py            # ReportConfig 数据模型定义（新增）
														
 
															+│   ├── theme_manager.py            # 多主题配色与模板管理（新增）
														
 
															+│   ├── agent_analyzer.py           # LLM 智能分析与推荐（新增）
														
 
															+│   ├── metrics_calculator.py       # KPI 计算引擎（新增通用计算函数）
														
 
															+│   ├── chart_factory.py            # 原生可编辑图表创建
														
 
															+│   ├── page_layouts.py             # 动态页面布局引擎（新增）
														
 
															+│   ├── quality_rules.py            # 质量检查规则定义（新增）
														
 
															+│   ├── quality_inspector.py        # 质量自检与自动修复引擎（新增）
														
 
															+│   ├── deep_insights.py            # 结构化深度洞察生成（周报/月报）
														
 
															+│   └── ppt_builder.py              # PPT 组装编排器（新增 build_report / quality_assured_build）
														
 
															 ├── references/
														
 
															-│   ├── data-schema.md          # Excel 字段映射与校验规则
														
 
															-│   ├── report-structures.md    # 日报/周报/月报页面结构
														
 
															-│   ├── chart-specs.md          # 原生图表类型、配色、数据绑定
														
 
															-│   └── visual-style-guide.md   # 布局、字体、配色方案
														
 
															+│   ├── data-schema.md              # Excel 字段映射与校验规则
														
 
															+│   ├── report-structures.md        # 日报/周报/月报页面结构
														
 
															+│   ├── chart-specs.md              # 原生图表类型、配色、数据绑定
														
 
															+│   └── visual-style-guide.md       # 布局、字体、配色方案
														
 
															 └── assets/
														
 
															-    ├── report-master.pptx      # 日报模板（封面、内容页、目录、尾页）
														
 
															-    ├── weekly-master.pptx      # 周报模板
														
 
															-    └── monthly-master.pptx     # 月报模板
														
 
															+    ├── report-master.pptx          # 日报模板（封面、内容页、目录、尾页）
														
 
															+    ├── weekly-master.pptx          # 周报模板
														
 
															+    └── monthly-master.pptx         # 月报模板
														
 
															 ```
														
 
															-## 报告类型
														
 
															+## 新增核心模块
														
 
															+
														
 
															+### ReportConfig（report_config.py）
														
 
															+通用数据模型，定义报告配置的所有要素：
														
 
															+- `ReportConfig`：报告标题、周期、数据来源、主题、页数范围、质量阈值、最大修复迭代次数
														
 
															+- `MetricDef`：指标名称、列映射、聚合方式、数值格式、单位、指标类型
														
 
															+- `PageDef`：页面类型、标题、结论标题、布局模板、可选图表类型、选中状态
														
 
															+- `PeriodType` 枚举：DAILY / WEEKLY / MONTHLY / CUSTOM
														
 
															+- `ChartType` 枚举：BAR / LINE / PIE / DOUGHNUT / FUNNEL / TABLE / AUTO
														
 
															+
														
 
															+### 数据探查（data_profiler.py）
														
 
															+自动分析任意 Excel 数据结构：
														
 
															+- Schema 检测：列名、类型推断、缺失率、唯一值统计
														
 
															+- 统计特征提取：数值列的 min/max/mean/std/分位数，分类列的分布/基数
														
 
															+- 语义推断：自动识别时间列、分类列、数值列、ID 列
														
 
															+- 数据质量评分：完整性、唯一性、合理性三维度评分
														
 
															+
														
 
															+### 多主题管理（theme_manager.py）
														
 
															+- 5 套预设主题：商务经典（默认）、清新简约、科技蓝调、暖橙活力、暗夜深邃
														
 
															+- 每套主题包含：主色、辅色、强调色、背景色、文字色、系列色盘
														
 
															+- 支持自定义配色覆盖
														
 
															+- `theme_to_rgb_colors()` 一键转换为 pptx RGBColor 对象
														
 
															+
														
 
															+### 智能分析（agent_analyzer.py）
														
 
															+- 自动识别可量化的数值指标
														
 
															+- 推荐可视化方案（图表类型、配色、数据准备方式）
														
 
															+- 生成页面结构建议（含结论标题和洞察文案模板）
														
 
															+- 所有推荐需经用户确认后注入 ReportConfig
														
 
															+
														
 
															+### 页面布局引擎（page_layouts.py）
														
 
															+- 预定义布局模板：KPI 网格、图表左+洞察右、两栏、两行、卡片网格、全宽
														
 
															+- `calculate_content_area()` 计算可用内容区域
														
 
															+- `calculate_fill_ratio()` 计算页面内容填充率
														
 
															+- `ensure_safe_position()` 确保元素在页面安全区域内
														
 
															+
														
 
															+### 质量自检（quality_rules.py + quality_inspector.py）
														
 
															+
														
 
															+**四类 25 项检查规则**：
														
 
															+
														
 
															+| 类别 | 检查项 | 自动修复 |
														
 
															+|------|--------|---------|
														
 
															+| layout（布局） | 元素飞出边界（4方向）、图文重叠、占位符未替换、元素紧贴边缘 | ✅ |
														
 
															+| visual（视觉） | 字体不一致、字号过小/过大、颜色对比度不足、图片拉伸变形 | ✅ |
														
 
															+| content（内容） | 页面留白过多、KPI数值为空、图表无数据、文本截断、分析文本过短、缺少标题 | ✅ |
														
 
															+| data（数据） | 图表与文本矛盾、页码错乱、数据来源缺失、表格列宽不合理、图表刻度异常 | 部分 |
														
 
															+
														
 
															+**迭代修复机制**：
														
 
															+1. 生成 PPT → 执行全量检查 → 报告问题
														
 
															+2. 对可自动修复的问题执行修复 → 重新检查
														
 
															+3. 重复至无严重/主要问题或达最大迭代次数
														
 
															+4. 计算质量评分（0-100），评分 ≥ 阈值（默认 85）输出
														
 
															+
														
 
															+**质量评分计算**：
														
 
															+- 严重问题：-20 分/页
														
 
															+- 主要问题：-10 分/页
														
 
															+- 次要问题：-3 分/页
														
 
															+- 加权归一化到 100 分制
														
 
															+
														
 
															+## 报告类型（原有，保持兼容）
														
 
															 ### 日报
														
 
															-- **结构**：封面 → 核心指标概览 → 近10天趋势 → 订单状态分布 → 负责人分布 → 目的国家 TOP8 → 异常告警 → 今日要点
														
 
															-- **分析维度**：与昨日对比
														
 
															-- **页数**：8
														
 
															-
														
 
															+- 结构：封面 → 核心指标概览 → 近10天趋势 → 订单状态分布 → 负责人分布 → 目的国家 TOP8 → 异常告警 → 今日要点
														
 
															+- 页数：8
														
 
															 ### 周报
														
 
															-- **结构**：封面 → 周汇总 → 7日趋势 → 环比分析 → 区域分布 → 国家排行 → 团队追踪 → 问题与建议 → 下周计划
														
 
															-- **分析维度**：周环比（WoW）、7日移动平均
														
 
															-- **页数**：9
														
 
															-- **导航标签**：周汇总 / 趋势图 / 环比分析 / 区域排行 / 问题建议 / 下周计划
														
 
															-
														
 
															+- 结构：封面 → 周汇总 → 7日趋势 → 环比分析 → 区域分布 → 国家排行 → 团队追踪 → 问题与建议 → 下周计划
														
 
															+- 页数：9
														
 
															 ### 月报
														
 
															-- **结构**：封面 → 目录 → 月度总览 → 订单状态漏斗 → 区域分布 → TOP10 目的国 → 30日追踪趋势 → 团队绩效 → 支持需求分析 → 下月规划 → 尾页
														
 
															-- **分析维度**：环比（MoM）、同比（YoY）、日均值、结构占比
														
 
															-- **页数**：11
														
 
															-- **导航标签**：月度总览 / 订单状态 / 区域趋势 / 团队展望
														
 
															-
														
 
															-## 模板使用
														
 
															-
														
 
															-`assets/` 下包含三种报告模板：
														
 
															-
														
 
															-| 报告类型 | 模板文件 | 包含幻灯片 |
														
 
															-|---------|---------|-----------|
														
 
															-| 日报 | `report-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
														
 
															-| 周报 | `weekly-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
														
 
															-| 月报 | `monthly-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
														
 
															-
														
 
															-**复制机制**：`ppt_builder._duplicate_slide(prs, source_slide)` 深度复制模板幻灯片到输出文稿。
														
 
															-
														
 
															-**导航标签**：由 `ppt_builder._add_nav_tabs()` 在内容页上动态绘制，不内嵌在模板中。
														
 
															-
														
 
															-## 占位符替换
														
 
															-
														
 
															-所有模板形状使用 `{placeholder}` 语法。脚本遍历 `slide.shapes` 匹配段落文本进行替换。
														
 
															-
														
 
															-| 占位符 | 出现位置 | 替换内容 |
														
 
															-|--------|---------|---------|
														
 
															-| `{report_title}` | 封面、内容页眉、尾页 | 如"海外订单数据日报" |
														
 
															-| `{report_type}` | 封面副标题 | 如"数据日报" |
														
 
															-| `{date}` | 封面、页眉、尾页 | 报告日期或周期 |
														
 
															-| `{department}` | 封面、尾页 | 如"海外事业部" |
														
 
															-| `{period}` | 封面、底部来源条 | 数据周期描述 |
														
 
															-| `{gen_time}` | 封面 | 报告生成时间 |
														
 
															-| `{page_title}` | 内容页 | 当前页面标题 |
														
 
															-| `{page_num}` | 右下角 | 如"3 / 8" |
														
 
															-| `{source}` | 底部来源条 | 数据来源 |
														
 
															-| `{kpiN_label}` / `{kpiN_value}` | 封面/尾页卡片 | 第N个指标的标签和数值 |
														
 
															-| `{chapterN_title}` / `{chapterN_desc}` | 目录网格 | 第N章标题和描述 |
														
 
															-
														
 
															-## 图表插入规则
														
 
															-
														
 
															-**严格使用原生图表**，禁止生成 matplotlib PNG 图片。
														
 
															-
														
 
															-| 图表类型 | XL_CHART_TYPE | 使用场景 |
														
 
															-|---------|---------------|---------|
														
 
															-| 簇状柱形图 | COLUMN_CLUSTERED | 区域分布、团队追踪、支持需求分布 |
														
 
															-| 簇状条形图（横向） | BAR_CLUSTERED | 国家排名、负责人排名、状态漏斗 |
														
 
															-| 折线图（带标记） | LINE_MARKERS | 多日趋势（10天/7天/30天） |
														
 
															-| 环形图 | DOUGHNUT | 状态占比、区域占比 |
														
 
															-| 饼图 | PIE | 状态占比、区域占比（替代场景） |
														
 
															-| 表格 | TABLE | 明细列表、TOP列表、状态变化、超期订单 |
														
 
															-
														
 
															-调用 `chart_factory.add_*_chart()` 传入数据数组。图表数据嵌入 PPT 内部 Excel 工作簿，用户可直接编辑。
														
 
															-
														
 
															-## 数据输入要求
														
 
															-
														
 
															-Excel 文件按自然日分 Sheet，工作表命名：`YYYY年MM月DD日`（如 `2026年04月10日`）。
														
 
															-
														
 
															-**必填字段**：`目的国家`、`合同号`、`订单总数量`、`负责人`、`当前状态`、`拟定合同时间`
														
 
															-
														
 
															-**推荐字段**：`今日进度更新`、`是否更新`、`支持需求`、`4月交付`、`5月预测`
														
 
															-
														
 
															-完整字段映射、状态枚举（A-F）及校验规则见 `references/data-schema.md`。
														
 
															-
														
 
															-## 配色方案
														
 
															-
														
 
															-| 角色 | 色值 | 用途 |
														
 
															-|------|------|------|
														
 
															-| 主色 | `#1E3A5F` | 页眉标题、导航标签、强调色、顶部蓝线 |
														
 
															-| 辅色 | `#5B9BD5` | 图表主系列、CONTENTS 标签 |
														
 
															-| 深色背景 | `#1F3A5C` | 封面左侧块 |
														
 
															-| 增长色 | `#10B981` | 上涨指标、正面变化 |
														
 
															-| 下跌色 | `#EF4444` | 下跌指标、负面变化、严重告警 |
														
 
															-| 警告色 | `#ED7D31` | 中度告警、关注提示 |
														
 
															-| 卡片背景 | `#E7F0F7` | KPI 卡片背景 |
														
 
															-| 深灰文字 | `#333333` | 正文、主标题 |
														
 
															-| 中灰文字 | `#666666` | 副标题、次要信息 |
														
 
															-
														
 
															-## 核心功能模块
														
 
															-
														
 
															-### 指标计算（metrics_calculator.py）
														
 
															-- **日报指标**：在跟订单数、订单总量、今日更新、已发运、支持需求、下月预测、单均台数、状态分布、负责人分布、国家 TOP8、超期订单（A阶段>30天）、告警列表
														
 
															-- **周报指标**：周订单量、周车辆数、日均订单、单均台数、7日趋势、状态环比（WoW）、区域分布（含各国 TOP3）、国家排行（含环比变化）、团队绩效（人均产出）、支持需求分类、问题识别、下周目标（G1-G4自动生成）
														
 
															-- **月报指标**：月度合同数、月度车辆数、新签合同、已发运、覆盖国家数、支持需求占比、日均订单、状态漏斗（含阶段分析：前期/中期/后期）、区域分布（含各国 TOP3）、TOP10 国家（含环比变化）、30日趋势（含上中下旬均值、峰值日期）、团队绩效（人均订单/人均台数）、超期订单、下月目标（5项自动生成）、风险列表
														
 
															-
														
 
															-### 深度洞察（deep_insights.py）
														
 
															-为周报和月报各页面生成结构化洞察文本，每条洞察包含标题和正文：
														
 
															-- **周报**：周内节奏分析、周环比趋势偏移、月度进度推演、关键驱动因素、区域引擎识别、结构健康度、转化效率、瓶颈诊断、库存资金占用、发运端效率、漏斗健康度、区域战略矩阵、国家组合健康度、团队人均产出、问题根因分类、目标拆解、风险对冲等
														
 
															-- **月报**：月度节奏、目标达成率、季节性同比、年度进度、漏斗结构诊断、区域投入 ROI、国家增速梯队、团队均衡性、支持需求趋势、下月里程碑等
														
 
															-
														
 
															-### PPT 组件（ppt_builder.py）
														
 
															-- **KPI 卡片**：3×2 网格，支持数值、单位、变化徽章、情感标签（自动着色）
														
 
															-- **告警卡片**：1-3 个横向排列，支持严重/警告/关注三级颜色
														
 
															-- **问题卡片**：纵向堆叠，含严重度、标题、详情、建议措施
														
 
															-- **目标卡片**：2×2 网格，含图标、目标编号、标题、详情
														
 
															-- **结构化洞察文本块**：多段落洞察，自适应字号压缩以适配高度，带 emoji 前缀
														
 
															-- **页脚**：自动添加数据来源条和页码
														
 
															+- 结构：封面 → 目录 → 月度总览 → 订单状态漏斗 → 区域分布 → TOP10 目的国 → 30日追踪趋势 → 团队绩效 → 支持需求分析 → 下月规划 → 尾页
														
 
															+- 页数：11
														
 
															 ## 执行示例
														
 
															 ```python
														
 
															-from scripts.ppt_builder import build_daily_report, build_weekly_report, build_monthly_report
														
 
															-from datetime import datetime
														
 
															+from scripts.ppt_builder import build_daily_report, build_report, quality_assured_build
														
 
															+from scripts.report_config import ReportConfig, PageDef, MetricDef
														
 
															+
														
 
															+# === 原有方式（保持兼容）===
														
 
															+build_daily_report('data.xlsx', datetime(2026, 4, 10), 'daily.pptx')
														
 
															+
														
 
															+# === 新通用方式 ===
														
 
															+config = ReportConfig(
														
 
															+    title='销售数据报告',
														
 
															+    period_type='monthly',
														
 
															+    source_label='销售部',
														
 
															+    theme='business_classic',
														
 
															+    quality_threshold=85,
														
 
															+    max_fix_iterations=3,
														
 
															+)
														
 
															+# config 需经用户确认后填充 metrics 和 pages
														
 
															+
														
 
															+build_report('any_data.xlsx', config, 'output.pptx')
														
 
															+
														
 
															+# === 带质量保证的方式（推荐）===
														
 
															+prs, issues = quality_assured_build('any_data.xlsx', config, 'output_qa.pptx')
														
 
															+```
														
 
															-# 日报
														
 
															-build_daily_report('data.xlsx', datetime(2026, 4, 10), 'daily_20260410.pptx')
														
 
															+## V2 generation contract
														
 
															-# 周报（2026年第14周）
														
 
															-build_weekly_report('data.xlsx', 2026, 14, 'weekly_w14.pptx')
														
 
															+Before calling `build_report` or `quality_assured_build`, fill and validate the six user confirmations:
														
 
															-# 月报（2026年4月）
														
 
															-build_monthly_report('data.xlsx', 2026, 4, 'monthly_202604.pptx')
														
 
															-```
														
 
															+1. 报告周期与页数范围
														
 
															+2. 核心指标集
														
 
															+3. 受众与决策场景
														
 
															+4. 视觉风格与配色方向
														
 
															+5. 页面结构与模板方案
														
 
															+6. 数据范围与字段映射
														
 
															+
														
 
															+Use `ConfirmationSpec` on `ReportConfig.user_confirmation` to record completion. Generic builds default to `require_six_confirmations=True`; missing confirmations or invalid metric-to-column mappings must stop generation before any PPT is written.
														
 
															-## 扩展技能
														
 
															+Data profiling serves the confirmed business intent. It should map the confirmed metrics and dimensions to actual Excel columns, then select feasible pages and layouts. It must not invent a different business focus when the user has already confirmed the core metrics.
														
 
															-添加新报告类型（如季报）：
														
 
															-1. 在 `references/report-structures.md` 中添加页面结构定义
														
 
															-2. 在 `scripts/metrics_calculator.py` 中添加指标计算函数
														
 
															-3. 在 `scripts/deep_insights.py` 中添加洞察生成函数
														
 
															-4. 在 `scripts/ppt_builder.py` 中添加构建函数
														
 
															-5. 若内容页布局通用，无需修改模板文件
														
 
															+For visual quality, treat master PPTX files as style assets, not rigid page contracts. If a template placeholder cannot be populated, remove the whole placeholder component. If a KPI grid consumes the available vertical space, do not add bottom insight text; use a later analysis page or a different layout instead.
														
--- a/generate-data-report-ppt/references/chart-specs.md
+++ b/generate-data-report-ppt/references/chart-specs.md
@@ -76,3 +76,28 @@ series = chart.series[0]
 
															 series.marker.size = 7
														
 
															 series.marker.style = XL_MARKER_STYLE.CIRCLE
														
 
															 ```
														
 
															+
														
 
															+
														
 
															+## 通用图表自动选择（新增）
														
 
															+
														
 
															+通用构建器支持自动选择图表类型，根据数据特征和页面场景智能推荐。
														
 
															+
														
 
															+### 智能推荐规则
														
 
															+
														
 
															+| 数据特征 | 推荐图表 | 原因 |
														
 
															+|---------|---------|------|
														
 
															+| 时间序列 + 数值 | LINE_MARKERS | 展示趋势变化 |
														
 
															+| 1个分类列 + 数值列 | BAR_CLUSTERED | 横向对比排名 |
														
 
															+| 1个分类列（含百分比） | DOUGHNUT | 展示占比结构 |
														
 
															+| 多个数值维度 | COLUMN_CLUSTERED | 多维度对比 |
														
 
															+| 需要展示精确数值 | TABLE | 数据明细 |
														
 
															+| 发现数据分层 | FUNNEL | 漏斗转化 |
														
 
															+| 经纬度数据 | 散点图/气泡图 | 地理分布 |
														
 
															+
														
 
															+### 图表质量自检
														
 
															+
														
 
															+| 检查项 | 问题 | 修复 |
														
 
															+|--------|------|------|
														
 
															+| 图表刻度异常 | Y轴从非零开始导致误导 | 重置Y轴最小值 |
														
 
															+| 图表无数据 | 数据系列为空 | 跳过该图表插入 |
														
 
															+| 表格列宽不合理 | 列宽与内容不匹配 | 按内容自适应列宽 |
														
--- a/generate-data-report-ppt/references/data-schema.md
+++ b/generate-data-report-ppt/references/data-schema.md
@@ -1,61 +1,238 @@
 
															 # 数据源 Schema
														
 
															-数据输入为 Excel 文件，每个工作表（Sheet）代表一个自然日的订单明细数据。
														
 
															-
														
 
															-## 工作表命名规则
														
 
															-
														
 
															-- 日报数据源：`YYYY年MM月DD日`（如 `2026年04月10日`）
														
 
															-- 脚本通过日期字符串匹配对应工作表
														
 
															-
														
 
															-## 字段映射
														
 
															-
														
 
															-| Excel 列名 | 内部字段名 | 数据类型 | 说明 |
														
 
															-|-----------|-----------|---------|------|
														
 
															-| 序号 | `seq` | int | 行序号 |
														
 
															-| 目的国家 | `country` | str | 订单目的国家/地区 |
														
 
															-| 合同号 | `contract_no` | str | 唯一合同编号 |
														
 
															-| 用户名称/公司 | `customer` | str | 客户名称 |
														
 
															-| 意向车型及数量 | `product_info` | str | 车型及台数描述 |
														
 
															-| 订单总数量 | `order_qty` | int | 该合同的车辆总台数 |
														
 
															-| 负责人 | `owner` | str | 跟单负责人姓名 |
														
 
															-| 当前状态 | `status` | str | 订单阶段，见下方状态枚举 |
														
 
															-| 拟定合同时间 | `contract_date` | datetime | 合同拟定日期 |
														
 
															-| 跟单天数 | `tracking_days` | int | 从合同拟定到当前日期的天数 |
														
 
															-| 定金支付时间 | `deposit_date` | datetime | 定金支付日期 |
														
 
															-| 订金认领时间 | `deposit_claim_date` | datetime | 订金认领日期 |
														
 
															-| 订单生成时间 | `order_gen_date` | datetime | 订单在系统生成日期 |
														
 
															-| 价格评审时间 | `price_review_date` | datetime | 价格评审完成日期 |
														
 
															-| 合同评审时间 | `contract_review_date` | datetime | 合同评审完成日期 |
														
 
															-| 合同提交盖章申请时间 | `seal_apply_date` | datetime | 盖章申请日期 |
														
 
															-| 合同盖章时间 | `seal_date` | datetime | 合同盖章完成日期 |
														
 
															-| 车辆下线入库状态 | `inventory_status` | str | 车辆生产/入库状态描述 |
														
 
															-| 尾款支付时间 | `final_pay_date` | datetime | 尾款支付日期 |
														
 
															-| 尾款认领时间 | `final_claim_date` | datetime | 尾款认领日期 |
														
 
															-| 智慧关务信息维护 | `customs_date` | datetime | 关务信息维护日期 |
														
 
															-| 许可证办理时间 | `license_date` | datetime | 进口许可证办理日期 |
														
 
															-| 车辆发运时间 | `ship_date` | datetime | 实际发运日期 |
														
 
															-| 预计开票时间 | `invoice_date` | datetime | 预计开票日期 |
														
 
															-| 今日进度更新 | `progress_update` | str | 当日最新进度描述 |
														
 
															-| 是否更新 | `is_updated` | str (是/否) | 当日是否有进度更新 |
														
 
															-| 支持需求 | `support_request` | str | 需要跨部门支持的需求描述 |
														
 
															-| 4月交付 | `deliver_apr` | int | 标记为4月交付的台数 |
														
 
															-| 5月预测 | `forecast_may` | int | 标记为5月预测交付的台数 |
														
 
															-
														
 
															-## 订单状态枚举
														
 
															-
														
 
															-| 状态代码 | 状态名称 | 说明 |
														
 
															-|---------|---------|------|
														
 
															-| A | 合同拟定中 | 合同尚未盖章确认 |
														
 
															-| B | 已锁定合同待付订金 | 合同已盖章，等待客户支付订金 |
														
 
															-| C | 已付订金待生产 | 订金已到账，等待排产 |
														
 
															-| D | 已生产待付尾款 | 车辆已生产/入库，等待尾款 |
														
 
															-| E | 已付尾款待发运 | 尾款已到账，等待发运安排 |
														
 
															-| F | 已发运 | 车辆已发运 |
														
 
															-
														
 
															-## 数据校验规则
														
 
															-
														
 
															-1. **必填字段**：`country`, `contract_no`, `order_qty`, `owner`, `status`, `contract_date`
														
 
															-2. `order_qty` 必须为正整数
														
 
															-3. `status` 必须为 A-F 中的一个
														
 
															-4. `is_updated` 只能为 "是" 或 "否"
														
 
															-5. 日期字段若为字符串，尝试按 `YYYY-MM-DD` 解析
														
 
															+本报告生成器为 **通用型数据报告引擎**，不依赖固定的 Schema，可接受任意结构的 Excel 文件作为输入。
														
 
															+
														
 
															+## 数据要求
														
 
															+
														
 
															+### 基本要求
														
 
															+
														
 
															+- **格式**：Excel 文件（.xlsx / .xls）或 CSV 文件（.csv）
														
 
															+- **编码**：UTF-8（推荐），CSV 文件自动编码检测（支持 utf-8 / gbk / latin-1 等）
														
 
															+- **文件大小**：建议单文件不超过 50MB
														
 
															+- **行数**：支持数百～数十万行
														
 
															+
														
 
															+### 列类型建议
														
 
															+
														
 
															+引擎通过 `data_profiler.py` 自动检测每列的数据角色，支持以下类型：
														
 
															+
														
 
															+| 角色 | 检测方式 | 适用场景 |
														
 
															+|------|---------|---------|
														
 
															+| **数值列** | dtype 判断 + 列名关键词 + 值分布分析 | 销售额、台数、金额、数量、评分等可计算指标 |
														
 
															+| **分类列** | 唯一值基数 + 列名关键词 + 值内容分析 | 国家、状态、类型、部门、负责人等分组维度 |
														
 
															+| **时间列** | dtype 判断 + 列名关键词 + 日期格式解析 | 日期、时间戳、月份等时间序列维度 |
														
 
															+| **ID 列** | 列名关键词 + 值模式（邮箱/电话/长数字串） | 客户编号、订单号、合同号、邮箱等标识字段 |
														
 
															+| **文本列** | 高基数文本 + 列名关键词 | 备注、描述、地址、摘要等非结构化内容 |
														
 
															+| **布尔列** | 值内容（是/否、true/false、0/1） | 开关状态、是否标记等二值字段 |
														
 
															+
														
 
															+> 引擎支持**值内容驱动的自动识别**：当列名无法明确判断时，通过采样分析列中实际数据内容（如检测到 >50% 的邮箱格式则自动归类为 ID 列，检测到是/否值则归类为分类列）来提高识别准确率。
														
 
															+
														
 
															+## 自动推断能力
														
 
															+
														
 
															+`data_profiler.py` 是数据探查核心引擎，提供以下自动推断能力：
														
 
															+
														
 
															+### 1. 列类型与角色推断
														
 
															+
														
 
															+| 分析维度 | 检测内容 |
														
 
															+|---------|---------|
														
 
															+| 列类型推断 | 数值列（int/float）、分类列（低基数）、时间列、文本列、ID 列、布尔列 |
														
 
															+| 值模式分析 | 百分比值、二值（是/否）、序数值（高/中/低）、电话格式、邮箱格式、URL、纯数字ID |
														
 
															+| 语义关键词匹配 | 通过列名关键词推断业务含义（支持中英文，覆盖 100+ 关键词） |
														
 
															+
														
 
															+### 2. 统计特征提取
														
 
															+
														
 
															+| 分析维度 | 检测内容 |
														
 
															+|---------|---------|
														
 
															+| 基础统计 | count、sum、mean、median、min、max、std |
														
 
															+| 分位数 | p25（下四分位）、p75（上四分位） |
														
 
															+| 分布形态 | 偏度（skewness）、峰度（kurtosis）、偏态方向、变差系数（CV） |
														
 
															+| 集中度分析 | 高度集中（CV<0.3）/ 中度集中 / 适度分散 / 高度分散 |
														
 
															+
														
 
															+### 3. 分类维度分析
														
 
															+
														
 
															+| 分析维度 | 检测内容 |
														
 
															+|---------|---------|
														
 
															+| 基数统计 | 唯一值数量、占比 |
														
 
															+| 集中度 | Herfindahl-Hirschman 指数（HHI），识别头部集中度 |
														
 
															+| 分布描述 | Top-N 项及其占比 |
														
 
															+
														
 
															+### 4. 数据质量评估
														
 
															+
														
 
															+| 评分维度 | 权重 | 检测内容 |
														
 
															+|---------|------|---------|
														
 
															+| 完整性 | 30% | 缺失率综合评分，高缺失列标识（>30%） |
														
 
															+| 数值健康度 | 25% | 异常值比例、负值检查、零值过多检查 |
														
 
															+| 唯一性 | 20% | ID 列的识别和覆盖度 |
														
 
															+| 时间一致性 | 15% | 时间范围的合法性和有序性 |
														
 
															+| 分类完整性 | 10% | 分类列缺失比例 |
														
 
															+
														
 
															+### 5. 衍生关系检测
														
 
															+
														
 
															+引擎自动检测数值列之间的潜在算术关系：
														
 
															+
														
 
															+| 关系类型 | 示例 | 用途 |
														
 
															+|---------|------|------|
														
 
															+| 减法关系 | `A - B ≈ C` | 发现派生指标（如：总需求 - 已下单 = 未下单） |
														
 
															+| 加法关系 | `A + B ≈ C` | 发现总和关系 |
														
 
															+| 比例关系 | `A / B ≈ 常数` | 发现固定比率（如：转化率、占比） |
														
 
															+
														
 
															+### 6. 数据问题检测
														
 
															+
														
 
															+`detect_data_issues()` 自动扫描：
														
 
															+
														
 
															+- **高缺失率列**（>50%）— 建议排除或补全
														
 
															+- **中度缺失列**（>10%）— 提示关注
														
 
															+- **异常值**（IQR 3倍以外）— 标识离群点
														
 
															+- **负值** — 对非负指标列进行标记
														
 
															+- **常量列** — 仅 1 个唯一值，对分析无贡献
														
 
															+
														
 
															+## 数据加载
														
 
															+
														
 
															+`data_loader.py` 提供自动格式检测、编码识别、智能清洗等通用加载能力。
														
 
															+
														
 
															+### 自动格式检测
														
 
															+
														
 
															+`auto_detect_file_format()` 根据扩展名自动识别：
														
 
															+
														
 
															+| 格式 | 扩展名 | 支持说明 |
														
 
															+|------|--------|---------|
														
 
															+| Excel (.xlsx) | `.xlsx` | 标准 Excel 格式，主力支持 |
														
 
															+| Excel 97 (.xls) | `.xls` | 兼容模式，如遇读取错误建议另存为 .xlsx |
														
 
															+| CSV | `.csv` | 自动编码检测（utf-8 → gbk → latin-1 等逐级尝试） |
														
 
															+
														
 
															+### 核心加载函数
														
 
															+
														
 
															+```python
														
 
															+from scripts.data_loader import (
														
 
															+    load_generic_excel,
														
 
															+    load_generic_all_sheets,
														
 
															+    load_generic_csv,
														
 
															+    auto_detect_date_column,
														
 
															+    load_generic_file_info,
														
 
															+    normalize_column_names,
														
 
															+)
														
 
															+
														
 
															+# 加载主表（自动识别 xlsx/xls/csv，自动清洗）
														
 
															+df = load_generic_excel('任意数据文件.xlsx')
														
 
															+df = load_generic_excel('任意数据文件.csv', encoding='gbk')  # CSV 可指定编码
														
 
															+df = load_generic_excel('data.xlsx', sheet_name='Sheet1')    # 指定 sheet
														
 
															+
														
 
															+# 合并所有 Sheet（Excel 文件）
														
 
															+df_all = load_generic_all_sheets('多sheet文件.xlsx')
														
 
															+
														
 
															+# 直接加载 CSV（带自动编码检测）
														
 
															+df_csv = load_generic_csv('data.csv')
														
 
															+
														
 
															+# 轻量文件信息（不加载全量数据）
														
 
															+info = load_generic_file_info('data.xlsx')
														
 
															+# 返回: {format, sheet_names, sheet_count, file_size_mb}
														
 
															+```
														
 
															+
														
 
															+### 智能清洗特性
														
 
															+
														
 
															+`load_generic_excel()` 与 `_clean_generic_dataframe()` 自动执行：
														
 
															+
														
 
															+| 清洗步骤 | 说明 |
														
 
															+|---------|------|
														
 
															+| 去除全空行列 | `dropna(how='all')` 清除完全为空的行和列 |
														
 
															+| 去除 Unnamed 列 | 自动过滤 pandas 自动生成的 Unnamed 列 |
														
 
															+| **列名规范化** | 全角括号→半角、去除首尾空格、统一空白字符 |
														
 
															+| **汇总行自动去除** | 自动检测底部"合计/总计/小计/total/sum"等汇总行并移除 |
														
 
															+| 空首尾行清理 | 检测并裁剪前导和尾随的完全空行 |
														
 
															+| 日期智能解析 | object 类型列尝试 `pd.to_datetime()`，成功 >70% 则转换 |
														
 
															+| 数值智能解析 | object 类型列尝试 `pd.to_numeric()`，成功 >70% 则转换 |
														
 
															+
														
 
															+> 注意：`load_generic_all_sheets()` 会给每行添加 `_source_sheet` 列标记来源 sheet。
														
 
															+
														
 
															+### 列名规范化示例
														
 
															+
														
 
															+```python
														
 
															+from scripts.data_loader import normalize_column_names
														
 
															+
														
 
															+# 全角括号 → 半角：   描述（国家+车型+台数+交期） → 描述(国家+车型+台数+交期)
														
 
															+# 首尾空格去除：         "  客户姓名  "           → "客户姓名"
														
 
															+# 换行符替换：           "客户\n姓名"             → "客户 姓名"
														
 
															+```
														
 
															+
														
 
															+## 数据探查
														
 
															+
														
 
															+```python
														
 
															+from scripts.data_profiler import profile_dataframe, detect_data_issues
														
 
															+
														
 
															+# 自动探查数据结构
														
 
															+profile = profile_dataframe(df)
														
 
															+
														
 
															+# 检测数据问题
														
 
															+issues = detect_data_issues(df)
														
 
															+
														
 
															+# 生成探索摘要
														
 
															+from scripts.data_profiler import generate_summary_text
														
 
															+print(generate_summary_text(profile))
														
 
															+
														
 
															+# 分类分布细化分析
														
 
															+from scripts.data_profiler import profile_category_distribution, profile_numeric_series
														
 
															+dist = profile_category_distribution(df, '客户类型')       # 分类分布（含 HHI 集中度）
														
 
															+stats = profile_numeric_series(df, '总需求台数')           # 数值详细统计（含分布形态）
														
 
															+```
														
 
															+
														
 
															+## 配置驱动的指标计算
														
 
															+
														
 
															+```python
														
 
															+from scripts.metrics_calculator import (
														
 
															+    calc_generic_metrics,
														
 
															+    calc_generic_trend,
														
 
															+    calc_generic_distribution,
														
 
															+    calc_generic_ranking,
														
 
															+    generate_generic_insights,
														
 
															+)
														
 
															+from scripts.report_config import ReportConfig, MetricDef
														
 
															+
														
 
															+# 按配置计算指标
														
 
															+metrics = calc_generic_metrics(df, config)
														
 
															+
														
 
															+# 按指定列计算趋势
														
 
															+trend = calc_generic_trend(df, '日期列', '数值列')
														
 
															+
														
 
															+# 分布分析
														
 
															+dist = calc_generic_distribution(df, '分类列', '数值列')
														
 
															+
														
 
															+# 排行分析
														
 
															+ranking = calc_generic_ranking(df, '排行维度列', '数值列')
														
 
															+
														
 
															+# 智能洞察生成
														
 
															+insights = generate_generic_insights(profile, metrics)
														
 
															+```
														
 
															+
														
 
															+## 推荐配置生成
														
 
															+
														
 
															+```python
														
 
															+from scripts.agent_analyzer import analyze_and_recommend
														
 
															+
														
 
															+# 根据数据探查结果自动推荐指标集和页面结构
														
 
															+recommendations = analyze_and_recommend(profile, period_type)
														
 
															+```
														
 
															+
														
 
															+## 完整工作流示例
														
 
															+
														
 
															+```python
														
 
															+# 1. 加载数据
														
 
															+df = load_generic_excel('data.xlsx')
														
 
															+
														
 
															+# 2. 数据探查
														
 
															+profile = profile_dataframe(df)
														
 
															+
														
 
															+# 3. 自动推荐
														
 
															+recs = analyze_and_recommend(profile)
														
 
															+
														
 
															+# 4. 构建配置（可人工确认调整）
														
 
															+config = ReportConfig(
														
 
															+    title='数据分析报告',
														
 
															+    metrics=[...],  # 从 recs['suggested_metrics'] 选取
														
 
															+    pages=[...],    # 从 recs['suggested_pages'] 选取
														
 
															+)
														
 
															+
														
 
															+# 5. 计算指标
														
 
															+metrics = calc_generic_metrics(df, config)
														
 
															+
														
 
															+# 6. 生成 PPT
														
 
															+output_path, issues = quality_assured_build(DATA_FILE, config, OUTPUT_FILE)
														
 
															+```
														
--- a/generate-data-report-ppt/references/quality-standards.md
+++ b/generate-data-report-ppt/references/quality-standards.md
@@ -0,0 +1,244 @@
 
															+# PPT 生成质量强制规范
														
 
															+
														
 
															+> **核心原则**：生成的每一页 PPT 都必须包含**数据可视化图表 + 深度分析文本**，
														
 
															+> 严禁出现空页、纯图表页、纯文字页。质量检查不合格的页面必须自动修复或回退重建。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 一、页面内容完整性强制标准
														
 
															+
														
 
															+### 1.1 每页必须包含的元素（MINIMUM_REQUIRED）
														
 
															+
														
 
															+| 页面类型 | 至少图表数 | 至少分析段数 | 每段最少字数 |
														
 
															+|---------|-----------|------------|------------|
														
 
															+| KPI概览页 | 6个KPI卡片 | **4段** | 150字/段 |
														
 
															+| 趋势分析页 | 1个趋势图 | **4段** | 150字/段 |
														
 
															+| 分布分析页 | 1个图表（环形/柱状） | **3段** | 150字/段 |
														
 
															+| 排行分析页 | 1个条形图 | **4段** | 150字/段 |
														
 
															+| 总结/建议页 | 无硬性要求 | **4段** | 150字/段 |
														
 
															+| 封面/目录/尾页 | 无硬性要求 | 1段简介 | 30字/段 |
														
 
															+
														
 
															+### 1.2 分析文本深度要求
														
 
															+
														
 
															+分析文本必须包含**具体数值引用**和**业务洞察建议**，不得是泛泛概括：
														
 
															+
														
 
															+| 分析维度 | 必须包含的内容 |
														
 
															+|---------|-------------|
														
 
															+| 数据引用 | 引用具体数值（含单位），如"XXX 台"、"占比 XX%"、"增长 XX%" |
														
 
															+| 对比分析 | 与同类/历史/目标进行对比，说明高低/好坏 |
														
 
															+| 原因解读 | 对数据背后的原因进行分析（至少 2 条可能原因） |
														
 
															+| 业务建议 | 给出可执行的业务行动建议（不空泛说"加强"、"优化"） |
														
 
															+
														
 
															+### 1.3 页面为空判定（CRITICAL）
														
 
															+
														
 
															+以下任一种情况判定为**页面为空**，严重级别 **critical**：
														
 
															+
														
 
															+- 页面内所有文本框总字数 < 50 字
														
 
															+- 页面没有任何图表（shape_type 为 chart 的元素数为 0）
														
 
															+- 页面有图表但无分析文本（图表下方/右侧无 insight 文本块）
														
 
															+- 页面所有文本均为占位符替换后的默认文本（如标题"数据详情"无实质内容）
														
 
															+- 页面仅有一行文字（如仅有标题没有正文）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 二、图表完整性强制标准
														
 
															+
														
 
															+### 2.1 图表必须可读
														
 
															+
														
 
															+- 图表标题不可为空
														
 
															+- 坐标轴标签必须显示完整（不被截断）
														
 
															+- 数据系列颜色必须与主题配色一致
														
 
															+- 图表尺寸必须占据合理区域（宽度 ≥ 页面宽度的 35%）
														
 
															+
														
 
															+### 2.2 图表与分析文本的关联
														
 
															+
														
 
															+- 图表中的关键数据必须在右侧/下方分析文本中被引用
														
 
															+- 分析文本中的数值必须与图表数据一致
														
 
															+- 图表和分析文本必须在视觉上属于同一页（不能有割裂感）
														
 
															+
														
 
															+### 2.3 图表尺寸下限
														
 
															+
														
 
															+| 图表类型 | 最小宽度 | 最小高度 |
														
 
															+|---------|---------|---------|
														
 
															+| 趋势折线图 | SLIDE_WIDTH × 0.40 | SLIDE_HEIGHT × 0.35 |
														
 
															+| 分布环形图/饼图 | SLIDE_WIDTH × 0.30 | SLIDE_HEIGHT × 0.30 |
														
 
															+| 排行条形图 | SLIDE_WIDTH × 0.45 | SLIDE_HEIGHT × 0.45 |
														
 
															+| KPI 卡片 | SLIDE_WIDTH × 0.28（单张） | SLIDE_HEIGHT × 0.15（单张） |
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 三、布局合理性强制标准
														
 
															+
														
 
															+### 3.1 安全区域
														
 
															+
														
 
															+所有元素必须位于以下安全区域内：
														
 
															+
														
 
															+- 左边界 ≥ CONTENT_LEFT (Emu(762000) ≈ 2cm)
														
 
															+- 右边界 ≤ SLIDE_WIDTH - Emu(762000)
														
 
															+- 上边界 ≥ Emu(1524000)（避开页眉导航栏）
														
 
															+- 下边界 ≤ SLIDE_HEIGHT - Emu(700000)（避开页脚区域）
														
 
															+
														
 
															+### 3.2 图文重叠检测
														
 
															+
														
 
															+- 任意两个形状的重叠面积 > 任一形状面积的 10% 则判定为重叠
														
 
															+- 标题文字与导航标签的重叠豁免
														
 
															+- 必须检测 chart 与 textbox 的重叠
														
 
															+
														
 
															+### 3.3 填充率
														
 
															+
														
 
															+- 内容填充率 < 20% → **critical**（页面基本为空）
														
 
															+- 内容填充率 < 35% → **major**（留白严重）
														
 
															+- 内容填充率 < 50% → **minor**（留白偏多）
														
 
															+- 内容填充率 ≥ 65% → 合格
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 四、视觉一致性强制标准
														
 
															+
														
 
															+### 4.1 字体规范
														
 
															+
														
 
															+- 中文字体：微软雅黑
														
 
															+- 数字/英文字体：Arial
														
 
															+- 标题字号：24pt - 32pt
														
 
															+- 正文字号：11pt - 14pt
														
 
															+- KPI 数值字号：28pt - 36pt
														
 
															+- 同一页面字体种类 ≤ 2 种
														
 
															+
														
 
															+### 4.2 颜色规范
														
 
															+
														
 
															+- 主色：主题 primary 色
														
 
															+- 强调色：主题 accent 色
														
 
															+- 图表系列色：主题 series 色板
														
 
															+- 正文色：#333333
														
 
															+- 次要文字色：#666666
														
 
															+- 背景色：#FFFFFF
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 五、六项用户确认的强制校验
														
 
															+
														
 
															+### 5.1 报告周期与页数范围
														
 
															+
														
 
															+| 校验项 | 说明 |
														
 
															+|--------|------|
														
 
															+| 封面日期 | 必须与用户确认的周期一致 |
														
 
															+| 总页数 | 必须在用户确认的范围内（允许 ±1 页） |
														
 
															+| 目录页 | 若页数 ≥ 5 则必须包含目录页 |
														
 
															+
														
 
															+### 5.2 核心指标集
														
 
															+
														
 
															+- 每个 KPI 指标必须在 KPI 概览页出现
														
 
															+- 每个 KPI 的值必须来自数据计算而非硬编码
														
 
															+- KPI 卡片不得超过 6 个（超出则合并或精简）
														
 
															+
														
 
															+### 5.3 受众与决策场景
														
 
															+
														
 
															+- 分析文本的语言风格必须匹配受众（管理层 → 结论优先，执行层 → 细节优先）
														
 
															+- 建议内容必须对应决策场景
														
 
															+
														
 
															+### 5.4 视觉风格与配色方向
														
 
															+
														
 
															+- 每页遵循相同的主题配色
														
 
															+- 不允许出现硬编码的颜色值（必须从 theme 获取）
														
 
															+
														
 
															+### 5.5 页面结构与模板方案
														
 
															+
														
 
															+- 实际生成的页面类型和顺序必须与用户确认的 pages 列表一致
														
 
															+- 不允许跳过任何用户选定的页面（除非数据不支持且已告警）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 六、自检修复迭代标准
														
 
															+
														
 
															+### 6.1 迭代退出条件
														
 
															+
														
 
															+| 条件 | 说明 |
														
 
															+|------|------|
														
 
															+| 无任何 critical 级别问题 | 必须满足 |
														
 
															+| 无任何 major 级别问题 | 必须满足 |
														
 
															+| minor 级别问题 ≤ 2 个 | 强烈推荐 |
														
 
															+| 质量评分 ≥ 85 | 必须满足 |
														
 
															+
														
 
															+### 6.2 修复策略
														
 
															+
														
 
															+| 问题类型 | 修复方式 |
														
 
															+|---------|---------|
														
 
															+| 页面为空 | **不允许简单添加提示文字"建议补充"**，必须回退重建页面，调用 build 函数重新生成完整分析内容 |
														
 
															+| 图文重叠 | 重新计算位置，向下/右偏移；若空间不足则缩小图表 |
														
 
															+| 飞出页面 | 裁剪到安全区域内，必要时缩放 |
														
 
															+| 分析文本过短 | 扩写分析文本，补充数据引用和业务洞察 |
														
 
															+| 图表无数据 | 检查数据来源，使用备用数据列或降级为表格 |
														
 
															+| 占位符未替换 | 清空未替换的占位符文本 |
														
 
															+
														
 
															+### 6.3 最大迭代次数
														
 
															+
														
 
															+- 默认最大迭代 5 次
														
 
															+- 若第 5 次仍有 critical 问题，**必须报错**，不允许输出不合格 PPT
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 七、禁止事项（DO NOT）
														
 
															+
														
 
															+1. **禁止** 生成只有图表没有分析文本的页面
														
 
															+2. **禁止** 生成只有分析文本没有图表的分析页（KPI/趋势/分布/排行页）
														
 
															+3. **禁止** 分析文本中出现"暂无数据"、"数据不足"作为唯一内容（必须挖掘现有数据维度）
														
 
															+4. **禁止** 在页面空白处简单添加"建议补充图表"的提示文字代替实际内容
														
 
															+5. **禁止** 使用 placeholder 文本（如 `{page_title}` 未替换）
														
 
															+6. **禁止** 任何元素飞出或紧贴页面边缘（安全边距≥2cm）
														
 
															+7. **禁止** 在总结页仅列出不足 3 条建议
														
 
															+8. **禁止** 分析文本使用模糊措辞如"要加强"、"进一步优化"（必须具体可执行）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 八、理想 PPT 页面范例
														
 
															+
														
 
															+### KPI 概览页 范例
														
 
															+
														
 
															+```
														
 
															+┌──────────────────────────────────────────────────────┐
														
 
															+│  KPI1: 总需求 1,250 台   KPI2: 已下单 780 台         │
														
 
															+│  KPI3: 转化率 62.4%      KPI4: 覆盖客户 93 个          │
														
 
															+│  KPI5: 支持闭环率 78%    KPI6: 覆盖国家 96 个          │
														
 
															+├──────────────────────────────────────────────────────┤
														
 
															+│  需求总量与转化结构                                    │
														
 
															+│  本期客户意向项目总需求台数为 1,250 台，其中累计已下单    │
														
 
															+│  780 台（占比 62.4%），未下单 470 台（占比 37.6%），    │
														
 
															+│  下单转化率 62.4%...（≥150字深度分析）                  │
														
 
															+├──────────────────────────────────────────────────────┤
														
 
															+│  客户覆盖与服务广度                                    │
														
 
															+│  本期覆盖客户 93 个，涉及 96 个意向国家...              │
														
 
															+├──────────────────────────────────────────────────────┤
														
 
															+│  跨部门支持闭环效率                                    │
														
 
															+│  支持需求闭环率 78%，...                               │
														
 
															+├──────────────────────────────────────────────────────┤
														
 
															+│  未下单需求跟进策略                                    │
														
 
															+│  当前未下单 470 台，...                                │
														
 
															+└──────────────────────────────────────────────────────┘
														
 
															+```
														
 
															+
														
 
															+### 分布分析页 范例
														
 
															+
														
 
															+```
														
 
															+┌───────────────────────┬──────────────────────────────┐
														
 
															+│                       │  意向级别分布概况              │
														
 
															+│    环形图              │  共有 6 个不同的意向级别...    │
														
 
															+│    (左 55%)           │                              │
														
 
															+│                       │  排名第一: A级                │
														
 
															+│                       │  A级以 450 台（占比 36%）...   │
														
 
															+│                       │                              │
														
 
															+│                       │  长尾分布特征                  │
														
 
															+│                       │  前三名累计占比 72%...         │
														
 
															+└───────────────────────┴──────────────────────────────┘
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 九、质检流程集成
														
 
															+
														
 
															+### Agent 在生成 PPT 前必须读取本文档
														
 
															+
														
 
															+Agent 在调用 `quality_assured_build` 之前必须：
														
 
															+1. 读取本文档了解质量标准
														
 
															+2. 将本文档的质量要求映射到 `QualityRules` 参数中
														
 
															+3. 确保 `ReportConfig` 的 `quality_threshold` ≥ 85
														
 
															+4. 确保 `ReportConfig` 的 `max_fix_iterations` ≥ 5
														
--- a/generate-data-report-ppt/references/report-structures.md
+++ b/generate-data-report-ppt/references/report-structures.md
@@ -82,3 +82,29 @@
 
															 | `{page_num}` | 内容页底部 | 页码 |
														
 
															 | `{kpiN_label}` / `{kpiN_value}` | 封面/尾页KPI卡片 | 第N个指标的标签和数值 |
														
 
															 | `{chapterN_title}` / `{chapterN_desc}` | 目录页 | 第N章标题和描述 |
														
 
															+
														
 
															+
														
 
															+## 通用报告页面结构（新增）
														
 
															+
														
 
															+通用构建器支持动态页面结构，通过 `ReportConfig.pages` 配置，无需硬编码。
														
 
															+
														
 
															+### 支持的页面类型
														
 
															+
														
 
															+| page_type | 用途 | 布局模板 |
														
 
															+|-----------|------|---------|
														
 
															+| `cover` | 封面页 | 固定封面布局 |
														
 
															+| `toc` | 目录页 | 章节目录网格 |
														
 
															+| `kpi_overview` | 核心指标概览 | KPI 卡片网格（3×2 / 自定义行列） |
														
 
															+| `trend` | 趋势分析 | 左侧趋势图 + 右侧洞察文本 |
														
 
															+| `distribution` | 分布分析 | 左侧图表 + 右侧洞察文本 |
														
 
															+| `ranking` | 排行分析 | 左侧条形图 + 右侧排行说明 |
														
 
															+| `summary` | 总结与建议 | 全宽洞察文本块 |
														
 
															+| `end` | 结束页 | 固定尾页布局 |
														
 
															+
														
 
															+### 页面确认项
														
 
															+
														
 
															+用户需确认每页的：
														
 
															+1. 页面标题（如"月度销售额趋势"）
														
 
															+2. 结论标题（用于导航标签和洞察总结）
														
 
															+3. 图表类型（BAR / LINE / PIE / DOUGHNUT / TABLE / AUTO）
														
 
															+4. 布局模板（chart_left / two_column / full_width / kpi_grid）
														
--- a/generate-data-report-ppt/references/visual-style-guide.md
+++ b/generate-data-report-ppt/references/visual-style-guide.md
@@ -53,3 +53,38 @@
 
															 - **KPI卡片**：圆角矩形（ROUNDED_RECTANGLE），填充 `#E7F0F7`，无边框
														
 
															 - **告警卡片**：矩形，左侧带 50800 EMU 宽度的色条（严重=红色/警告=橙色/关注=蓝色）
														
 
															 - **分隔线**：高度 0-50800 EMU 的矩形，填充 `#D9D9D9` 或 `#2E5B8B`
														
 
															+
														
 
															+
														
 
															+## 多主题配色方案（新增）
														
 
															+
														
 
															+通用报告支持 5 套预设主题，可通过 `ReportConfig.theme` 选择。
														
 
															+
														
 
															+| 主题ID | 名称 | 主色 | 辅色 | 强调色 | 风格描述 |
														
 
															+|--------|------|------|------|--------|---------|
														
 
															+| `business_classic` | 商务经典 | `#1E3A5F` | `#10B981` | `#5B9BD5` | 深海蓝主调，稳重大气，适合正式汇报 |
														
 
															+| `fresh_minimal` | 清新简约 | `#059669` | `#34D399` | `#F59E0B` | 翠绿主调，清爽现代，适合创新团队 |
														
 
															+| `tech_blue` | 科技蓝调 | `#2563EB` | `#06B6D4` | `#8B5CF6` | 科技蓝主调，年轻动感，适合数字化报告 |
														
 
															+| `warm_orange` | 暖橙活力 | `#EA580C` | `#F97316` | `#EAB308` | 暖橙主调，温暖亲和，适合运营报告 |
														
 
															+| `dark_pro` | 暗夜深邃 | `#0F172A` | `#38BDF8` | `#818CF8` | 深色主调，高端神秘，适合战略报告 |
														
 
															+
														
 
															+### 自定义配色
														
 
															+
														
 
															+```python
														
 
															+from scripts.report_config import ReportConfig
														
 
															+
														
 
															+config = ReportConfig(
														
 
															+    custom_colors={
														
 
															+        'primary': '#4F46E5',
														
 
															+        'secondary': '#EC4899',
														
 
															+        'accent': '#F59E0B',
														
 
															+    }
														
 
															+)
														
 
															+```
														
 
															+
														
 
															+### 质量自检视觉效果
														
 
															+
														
 
															+- 检测字体是否超过 3 种，超过自动统一为微软雅黑
														
 
															+- 字号 < 8pt 时自动提升至 8pt，> 60pt 时自动降至 60pt
														
 
															+- 元素飞出页面边界时自动推回安全区域
														
 
															+- 页面填充率 < 25% 时追加补充建议文本框
														
 
															+- 图文重叠时自动错开位置
														
--- a/generate-data-report-ppt/scripts/agent_analyzer.py
+++ b/generate-data-report-ppt/scripts/agent_analyzer.py
@@ -0,0 +1,360 @@
 
															+"""
														
 
															+Agent analyzer: intelligent analysis of data profile to generate
														
 
															+recommendations for metrics, pages, charts, and overall report structure.
														
 
															+Uses rule-based heuristics for analysis and generates structured recommendations.
														
 
															+"""
														
 
															+from report_config import (
														
 
															+    MetricDef, PageDef, MetricType, AggregationType, ChartType,
														
 
															+    PeriodType, ColumnRole
														
 
															+)
														
 
															+
														
 
															+
														
 
															+def analyze_and_recommend(profile: dict, period_type: PeriodType = PeriodType.MONTHLY) -> dict:
														
 
															+    recommendations = {
														
 
															+        'suggested_metrics': _recommend_metrics(profile),
														
 
															+        'suggested_pages': _recommend_pages(profile, period_type),
														
 
															+        'suggested_period': period_type.value,
														
 
															+        'suggested_page_range': (6, 15),
														
 
															+        'data_summary': _build_summary(profile),
														
 
															+        'chart_mapping': _build_chart_mapping(profile),
														
 
															+        'analysis_notes': _build_analysis_notes(profile),
														
 
															+    }
														
 
															+    recommendations.update(_suggest_period_and_range(profile))
														
 
															+    return recommendations
														
 
															+
														
 
															+
														
 
															+def _recommend_metrics(profile: dict) -> list[dict]:
														
 
															+    metrics = []
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+
														
 
															+    for i, col in enumerate(num_cols):
														
 
															+        ns = col.get('numeric_stats', {}) or {}
														
 
															+        label = col.get('inferred_label', col['column_name'])
														
 
															+        unit = _infer_unit(col['column_name'])
														
 
															+        is_primary = i < 4
														
 
															+
														
 
															+        metrics.append({
														
 
															+            'name': f"{label}_{col['column_name']}",
														
 
															+            'label': label,
														
 
															+            'column': col['column_name'],
														
 
															+            'aggregation': 'sum',
														
 
															+            'metric_type': 'kpi',
														
 
															+            'unit': unit,
														
 
															+            'selected': is_primary,
														
 
															+            'is_primary': is_primary,
														
 
															+            'sample_value': ns.get('sum', 0),
														
 
															+        })
														
 
															+
														
 
															+        if len(num_cols) <= 4 and ns.get('sum', 0) > 100:
														
 
															+            metrics.append({
														
 
															+                'name': f"日均{label}",
														
 
															+                'label': f"日均{label}",
														
 
															+                'column': col['column_name'],
														
 
															+                'aggregation': 'avg',
														
 
															+                'metric_type': 'kpi',
														
 
															+                'unit': unit,
														
 
															+                'selected': False,
														
 
															+                'is_primary': False,
														
 
															+                'sample_value': ns.get('mean', 0),
														
 
															+            })
														
 
															+
														
 
															+    if cat_cols:
														
 
															+        top_cat = cat_cols[0]
														
 
															+        metrics.append({
														
 
															+            'name': f"覆盖{top_cat['inferred_label']}数",
														
 
															+            'label': f"覆盖{top_cat['inferred_label']}数",
														
 
															+            'column': top_cat['column_name'],
														
 
															+            'aggregation': 'distinct_count',
														
 
															+            'metric_type': 'kpi',
														
 
															+            'unit': '个',
														
 
															+            'selected': True,
														
 
															+            'is_primary': False,
														
 
															+            'sample_value': top_cat.get('unique_count', 0),
														
 
															+        })
														
 
															+
														
 
															+    return metrics
														
 
															+
														
 
															+
														
 
															+def _recommend_pages(profile: dict, period_type: PeriodType) -> list[dict]:
														
 
															+    pages = []
														
 
															+    order = 0
														
 
															+
														
 
															+    pages.append({
														
 
															+        'page_id': 'cover',
														
 
															+        'title': '封面',
														
 
															+        'page_type': 'cover',
														
 
															+        'order': order,
														
 
															+        'selected': True,
														
 
															+        'elements': [],
														
 
															+        'conclusion_title': '',
														
 
															+    })
														
 
															+    order += 1
														
 
															+
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    if period_type in (PeriodType.MONTHLY, PeriodType.QUARTERLY):
														
 
															+        pages.append({
														
 
															+            'page_id': 'toc',
														
 
															+            'title': '目录',
														
 
															+            'page_type': 'toc',
														
 
															+            'order': order,
														
 
															+            'selected': True,
														
 
															+            'elements': [],
														
 
															+        })
														
 
															+        order += 1
														
 
															+
														
 
															+    pages.append({
														
 
															+        'page_id': 'kpi_overview',
														
 
															+        'title': '核心指标概览',
														
 
															+        'page_type': 'kpi_overview',
														
 
															+        'order': order,
														
 
															+        'selected': True,
														
 
															+        'elements': [{'type': 'kpi_cards', 'count': min(6, len(num_cols))}],
														
 
															+        'conclusion_title': '核心指标概览',
														
 
															+    })
														
 
															+    order += 1
														
 
															+
														
 
															+    time_cols = profile.get('time_columns', [])
														
 
															+    if time_cols and num_cols:
														
 
															+        top_num = num_cols[0]
														
 
															+        pages.append({
														
 
															+            'page_id': 'trend',
														
 
															+            'title': f'{top_num["inferred_label"]}趋势',
														
 
															+            'page_type': 'trend',
														
 
															+            'order': order,
														
 
															+            'selected': True,
														
 
															+            'elements': [
														
 
															+                {'type': 'line_chart', 'metric': top_num['column_name'],
														
 
															+                 'dimension': time_cols[0]['column_name'], 'title': f'{top_num["inferred_label"]}趋势'}
														
 
															+            ],
														
 
															+            'conclusion_title': f'{top_num["inferred_label"]}趋势',
														
 
															+        })
														
 
															+        order += 1
														
 
															+
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+    if cat_cols and num_cols:
														
 
															+        top_cat = cat_cols[0]
														
 
															+        top_num = num_cols[0]
														
 
															+        pages.append({
														
 
															+            'page_id': 'distribution',
														
 
															+            'title': f'{top_cat["inferred_label"]}分布',
														
 
															+            'page_type': 'distribution',
														
 
															+            'order': order,
														
 
															+            'selected': True,
														
 
															+            'elements': [
														
 
															+                {'type': 'doughnut_chart', 'metric': top_num['column_name'],
														
 
															+                 'dimension': top_cat['column_name'], 'title': f'{top_cat["inferred_label"]}占比'}
														
 
															+            ],
														
 
															+            'conclusion_title': f'{top_cat["inferred_label"]}分布',
														
 
															+        })
														
 
															+        order += 1
														
 
															+
														
 
															+        if len(cat_cols) >= 2:
														
 
															+            cat2 = cat_cols[1] if len(cat_cols) > 1 else cat_cols[0]
														
 
															+            pages.append({
														
 
															+                'page_id': 'ranking',
														
 
															+                'title': f'{cat2["inferred_label"]}排行',
														
 
															+                'page_type': 'ranking',
														
 
															+                'order': order,
														
 
															+                'selected': True,
														
 
															+                'elements': [
														
 
															+                    {'type': 'bar_chart', 'metric': num_cols[0]['column_name'],
														
 
															+                     'dimension': cat2['column_name'], 'title': f'{cat2["inferred_label"]}TOP排行'}
														
 
															+                ],
														
 
															+                'conclusion_title': f'{cat2["inferred_label"]}TOP排行',
														
 
															+            })
														
 
															+            order += 1
														
 
															+
														
 
															+    pages.append({
														
 
															+        'page_id': 'summary',
														
 
															+        'title': '总结与建议',
														
 
															+        'page_type': 'summary',
														
 
															+        'order': order,
														
 
															+        'selected': True,
														
 
															+        'elements': [{'type': 'insight_block', 'title': '总结与建议'}],
														
 
															+        'conclusion_title': '总结与建议',
														
 
															+    })
														
 
															+    order += 1
														
 
															+
														
 
															+    pages.append({
														
 
															+        'page_id': 'end',
														
 
															+        'title': '尾页',
														
 
															+        'page_type': 'end',
														
 
															+        'order': order,
														
 
															+        'selected': True,
														
 
															+        'elements': [],
														
 
															+    })
														
 
															+
														
 
															+    return pages
														
 
															+
														
 
															+
														
 
															+def _suggest_period_and_range(profile: dict) -> dict:
														
 
															+    granularity = profile.get('time_granularity', 'monthly')
														
 
															+    dr = profile.get('date_range', (None, None))
														
 
															+
														
 
															+    period_map = {
														
 
															+        'daily': PeriodType.DAILY,
														
 
															+        'weekly': PeriodType.WEEKLY,
														
 
															+        'monthly': PeriodType.MONTHLY,
														
 
															+        'quarterly': PeriodType.QUARTERLY,
														
 
															+        'yearly': PeriodType.MONTHLY,
														
 
															+    }
														
 
															+    suggested = period_map.get(granularity, PeriodType.MONTHLY)
														
 
															+
														
 
															+    page_range_map = {
														
 
															+        'daily': (6, 9),
														
 
															+        'weekly': (7, 11),
														
 
															+        'monthly': (8, 14),
														
 
															+        'quarterly': (10, 18),
														
 
															+        'yearly': (12, 20),
														
 
															+    }
														
 
															+    page_range = page_range_map.get(granularity, (8, 14))
														
 
															+
														
 
															+    return {
														
 
															+        'suggested_period': suggested.value,
														
 
															+        'suggested_page_range': page_range,
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def _build_chart_mapping(profile: dict) -> list[dict]:
														
 
															+    mapping = []
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    time_cols = profile.get('time_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+
														
 
															+    if time_cols and num_cols:
														
 
															+        for nc in num_cols[:3]:
														
 
															+            mapping.append({
														
 
															+                'metric': nc['inferred_label'],
														
 
															+                'metric_col': nc['column_name'],
														
 
															+                'dimension': time_cols[0]['column_name'],
														
 
															+                'dimension_label': '时间',
														
 
															+                'chart_type': ChartType.LINE.value,
														
 
															+                'rationale': f'{nc["inferred_label"]}随时间变化趋势',
														
 
															+            })
														
 
															+
														
 
															+    if cat_cols and num_cols:
														
 
															+        top_num = num_cols[0]
														
 
															+        for cc in cat_cols[:3]:
														
 
															+            chart_type = ChartType.DOUGHNUT.value if cc['unique_count'] <= 8 else ChartType.BAR.value
														
 
															+            mapping.append({
														
 
															+                'metric': top_num['inferred_label'],
														
 
															+                'metric_col': top_num['column_name'],
														
 
															+                'dimension': cc['column_name'],
														
 
															+                'dimension_label': cc['inferred_label'],
														
 
															+                'chart_type': chart_type,
														
 
															+                'rationale': f'{top_num["inferred_label"]}按{cc["inferred_label"]}的分布',
														
 
															+            })
														
 
															+
														
 
															+    return mapping
														
 
															+
														
 
															+
														
 
															+def _build_summary(profile: dict) -> str:
														
 
															+    lines = []
														
 
															+    lines.append(f"数据量: {profile['total_rows']:,} 行 × {profile['total_columns']} 列")
														
 
															+
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+    time_cols = profile.get('time_columns', [])
														
 
															+
														
 
															+    lines.append(f"可计算指标: {len(num_cols)} 个数值列")
														
 
															+    lines.append(f"可分析维度: {len(cat_cols)} 个分类列")
														
 
															+    if time_cols:
														
 
															+        lines.append(f"时间列: {time_cols[0]['column_name']}")
														
 
															+    lines.append(f"数据粒度: {profile.get('time_granularity', 'unknown')}")
														
 
															+
														
 
															+    dr = profile.get('date_range', (None, None))
														
 
															+    if dr[0]:
														
 
															+        lines.append(f"时间范围: {dr[0]} ~ {dr[1]}")
														
 
															+    q = profile.get('data_quality', {})
														
 
															+    lines.append(f"质量评分: {q.get('score', 0)}/100")
														
 
															+
														
 
															+    return '\n'.join(lines)
														
 
															+
														
 
															+
														
 
															+def _build_analysis_notes(profile: dict) -> list[str]:
														
 
															+    notes = []
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+
														
 
															+    if not cat_cols:
														
 
															+        notes.append('数据中缺少分类维度列，报告将以数值汇总为主，建议补充分类字段以增强分析深度。')
														
 
															+
														
 
															+    if len(num_cols) >= 4:
														
 
															+        names = [c['inferred_label'] for c in num_cols[:4]]
														
 
															+        notes.append(f'核心数值指标: {", ".join(names)}')
														
 
															+
														
 
															+    if len(cat_cols) == 1:
														
 
															+        notes.append(f'仅有一个分类维度列 ({cat_cols[0]["inferred_label"]})，报告分析维度较窄。')
														
 
															+    elif len(cat_cols) >= 3:
														
 
															+        names = [c['inferred_label'] for c in cat_cols[:3]]
														
 
															+        notes.append(f'分类维度丰富 ({", ".join(names)})，可支撑多维交叉分析。')
														
 
															+
														
 
															+    q = profile.get('data_quality', {})
														
 
															+    if q.get('score', 100) < 85:
														
 
															+        notes.append(f'数据质量评分偏低 ({q["score"]}/100)，建议在生成前检查缺失值与异常值。')
														
 
															+
														
 
															+    return notes
														
 
															+
														
 
															+
														
 
															+def _infer_unit(col_name: str) -> str:
														
 
															+    col_lower = col_name.lower().strip()
														
 
															+    unit_map = {
														
 
															+        '金额': '元', '销售额': '元', '收入': '元', '利润': '元',
														
 
															+        '成本': '元', '费用': '元', '台数': '台', '件数': '件',
														
 
															+        '数量': '', '人数': '人', '天数': '天', '占比': '%',
														
 
															+        '比率': '%', '比例': '%', '率': '%',
														
 
															+    }
														
 
															+    for kw, unit in unit_map.items():
														
 
															+        if kw in col_lower:
														
 
															+            return unit
														
 
															+    return ''
														
 
															+
														
 
															+
														
 
															+def generate_interaction_prompts(recommendations: dict, profile: dict) -> dict:
														
 
															+    return {
														
 
															+        'period': {
														
 
															+            'question': '报告周期与页数范围',
														
 
															+            'detail': f"建议周期: {recommendations['suggested_period']}报\n建议页数: {recommendations['suggested_page_range'][0]}-{recommendations['suggested_page_range'][1]} 页\n请确认或调整",
														
 
															+        },
														
 
															+        'metrics': {
														
 
															+            'question': '核心指标集',
														
 
															+            'detail': f"检测到 {len(recommendations['suggested_metrics'])} 个可计算指标\n已自动推荐主要的 {min(6, len(recommendations['suggested_metrics']))} 个\n请确认或增删",
														
 
															+        },
														
 
															+        'audience': {
														
 
															+            'question': '受众与决策场景',
														
 
															+            'detail': '请选择: 管理层汇报 | 运营分析会 | 对外客户报告 | 自定义描述',
														
 
															+        },
														
 
															+        'style': {
														
 
															+            'question': '视觉风格与配色方向',
														
 
															+            'detail': '推荐方案: 商务经典(深蓝) | 清新简约(绿色) | 深色专业 | 温暖品牌\n请选择配色方案',
														
 
															+        },
														
 
															+        'pages': {
														
 
															+            'question': '页面结构与模板方案',
														
 
															+            'detail': f'推荐 {len(recommendations["suggested_pages"])} 个页面\n可增删调整页面顺序',
														
 
															+        },
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    profile = {
														
 
															+        'total_rows': 3240,
														
 
															+        'total_columns': 15,
														
 
															+        'numeric_columns': [
														
 
															+            {'column_name': '销售额', 'inferred_label': '销售额', 'numeric_stats': {'sum': 500000, 'mean': 154}},
														
 
															+            {'column_name': '订单量', 'inferred_label': '订单量', 'numeric_stats': {'sum': 3240, 'mean': 1.0}},
														
 
															+            {'column_name': '利润', 'inferred_label': '利润', 'numeric_stats': {'sum': 80000, 'mean': 25}},
														
 
															+        ],
														
 
															+        'category_columns': [
														
 
															+            {'column_name': '区域', 'inferred_label': '区域', 'unique_count': 5},
														
 
															+            {'column_name': '产品', 'inferred_label': '产品', 'unique_count': 12},
														
 
															+        ],
														
 
															+        'time_columns': [{'column_name': '日期', 'inferred_label': '日期'}],
														
 
															+        'time_granularity': 'monthly',
														
 
															+        'date_range': ('2026-01-01', '2026-04-30'),
														
 
															+        'data_quality': {'score': 92},
														
 
															+    }
														
 
															+    recs = analyze_and_recommend(profile, PeriodType.MONTHLY)
														
 
															+    prompts = generate_interaction_prompts(recs, profile)
														
 
															+    for k, v in prompts.items():
														
 
															+        print(f"\n{k}: {v['question']}\n{v['detail']}")
														
--- a/generate-data-report-ppt/scripts/data_loader.py
+++ b/generate-data-report-ppt/scripts/data_loader.py
@@ -1,12 +1,19 @@
 
															 """
														
 
															 Excel data loader for daily/weekly/monthly report generation.
														
 
															+Contains both legacy order-specific loaders and enhanced generic loaders.
														
 
															 """
														
 
															 import pandas as pd
														
 
															 from datetime import datetime, timedelta
														
 
															 import re
														
 
															 import warnings
														
 
															+import os
														
 
															+import io
														
 
															+import csv
														
 
															+
														
 
															+# =====================================================================
														
 
															+# LEGACY SECTION — Order-specific loaders (kept for backward compat)
														
 
															+# =====================================================================
														
 
															-# Field mapping: Excel column name -> internal field name
														
 
															 FIELD_MAP = {
														
 
															     '序号': 'seq',
														
 
															     '目的国家': 'country',
														
@@ -55,7 +62,6 @@ def _normalize_status(val):
 
															     if pd.isna(val):
														
 
															         return None
														
 
															     s = str(val).strip()
														
 
															-    # Match pattern like "A（合同拟定中）" or "A"
														
 
															     m = re.match(r'^([A-F])', s)
														
 
															     if m:
														
 
															         return m.group(1)
														
@@ -131,28 +137,21 @@ def load_weekly(filepath: str, year: int, week_num: int, week_start_day=0) -> tu
 
															     Returns (current_week_df, prev_week_df).
														
 
															     week_start_day: 0=Monday, 6=Sunday
														
 
															     """
														
 
															-    # Find the first day of the given week
														
 
															-    # Simplified: assume data starts from a known reference
														
 
															     meta = load_workbook_metadata(filepath)
														
 
															     first_date, last_date = meta['date_range']
														
 
															     if first_date is None:
														
 
															         raise ValueError("No valid date sheets found")
														
 
															-    # Find the Monday of the target week (using ISO week definition)
														
 
															-    # Jan 4 is always in week 1
														
 
															     jan4 = datetime(year, 1, 4)
														
 
															-    # Adjust to Monday
														
 
															     jan4_monday = jan4 - timedelta(days=jan4.weekday())
														
 
															     target_monday = jan4_monday + timedelta(weeks=week_num - 1)
														
 
															     target_sunday = target_monday + timedelta(days=6)
														
 
															-    # Clamp to available data range
														
 
															     start = max(target_monday, first_date)
														
 
															     end = min(target_sunday, last_date)
														
 
															     current = load_date_range(filepath, start, end)
														
 
															-    # Previous week
														
 
															     prev_start = start - timedelta(days=7)
														
 
															     prev_end = end - timedelta(days=7)
														
 
															     if prev_start >= first_date:
														
@@ -169,7 +168,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
															     Returns (current_month_df, prev_month_df, yoy_month_df).
														
 
															     """
														
 
															     start = datetime(year, month, 1)
														
 
															-    # Last day of month
														
 
															     if month == 12:
														
 
															         end = datetime(year + 1, 1, 1) - timedelta(days=1)
														
 
															     else:
														
@@ -177,7 +175,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
															     current = load_date_range(filepath, start, end)
														
 
															-    # Previous month
														
 
															     if month == 1:
														
 
															         prev_start = datetime(year - 1, 12, 1)
														
 
															         prev_end = datetime(year, 1, 1) - timedelta(days=1)
														
@@ -190,7 +187,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
															     except ValueError:
														
 
															         previous = pd.DataFrame(columns=current.columns)
														
 
															-    # YoY (same month last year)
														
 
															     yoy_start = datetime(year - 1, month, 1)
														
 
															     if month == 12:
														
 
															         yoy_end = datetime(year, 1, 1) - timedelta(days=1)
														
@@ -206,20 +202,16 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
															 def _clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
														
 
															-    """Rename columns, parse dates, clean statuses."""
														
 
															-    # Rename known columns
														
 
															+    """Rename columns, parse dates, clean statuses (legacy)."""
														
 
															     rename_map = {k: v for k, v in FIELD_MAP.items() if k in df.columns}
														
 
															     df = df.rename(columns=rename_map)
														
 
															-    # Normalize status
														
 
															     if 'status' in df.columns:
														
 
															         df['status_code'] = df['status'].apply(_normalize_status)
														
 
															-    # Parse numeric fields
														
 
															     if 'order_qty' in df.columns:
														
 
															         df['order_qty'] = pd.to_numeric(df['order_qty'], errors='coerce')
														
 
															-    # Parse date fields
														
 
															     date_fields = ['contract_date', 'deposit_date', 'order_gen_date',
														
 
															                    'price_review_date', 'contract_review_date', 'seal_apply_date',
														
 
															                    'seal_date', 'final_pay_date', 'customs_date', 'license_date',
														
@@ -228,22 +220,293 @@ def _clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 
															         if field in df.columns:
														
 
															             df[field] = df[field].apply(_parse_date)
														
 
															-    # Tracking days
														
 
															     if 'tracking_days' in df.columns:
														
 
															         df['tracking_days'] = pd.to_numeric(df['tracking_days'], errors='coerce')
														
 
															-    # Boolean updated
														
 
															     if 'is_updated' in df.columns:
														
 
															         df['is_updated_flag'] = df['is_updated'].astype(str).str.strip() == '是'
														
 
															     return df
														
 
															+# =====================================================================
														
 
															+# GENERIC LOADING SECTION — Universal loaders for any Excel data
														
 
															+# =====================================================================
														
 
															+
														
 
															+# Summary row keywords (Chinese and English) to auto-detect and skip
														
 
															+SUMMARY_KEYWORDS = [
														
 
															+    '合计', '总计', '小计', '汇总', '累计', '总和',
														
 
															+    'total', 'sum', 'subtotal', 'grand total', '合计：', '总计：',
														
 
															+    '平均', 'avg', 'average',
														
 
															+]
														
 
															+
														
 
															+# Encoding priority list for CSV detection
														
 
															+CSV_ENCODINGS = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'gb18030', 'latin-1', 'cp1252']
														
 
															+
														
 
															+
														
 
															+def auto_detect_file_format(filepath: str) -> str:
														
 
															+    """Auto-detect file format: xlsx, xls, csv, or unknown."""
														
 
															+    ext = os.path.splitext(filepath)[1].lower()
														
 
															+    if ext in ('.xlsx', '.xls'):
														
 
															+        return ext[1:]
														
 
															+    if ext == '.csv':
														
 
															+        return 'csv'
														
 
															+    if ext in ('.xlsm', '.xlsb'):
														
 
															+        return ext[1:]
														
 
															+    return 'unknown'
														
 
															+
														
 
															+
														
 
															+def load_generic_csv(filepath: str, encoding=None, **kwargs) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Load a CSV file with auto-encoding detection.
														
 
															+    Tries common encodings until one succeeds.
														
 
															+    """
														
 
															+    if encoding:
														
 
															+        try:
														
 
															+            return pd.read_csv(filepath, encoding=encoding, **kwargs)
														
 
															+        except (UnicodeDecodeError, UnicodeError):
														
 
															+            raise ValueError(f"Failed to decode {filepath} with encoding {encoding}")
														
 
															+
														
 
															+    last_error = None
														
 
															+    for enc in CSV_ENCODINGS:
														
 
															+        try:
														
 
															+            df = pd.read_csv(filepath, encoding=enc, **kwargs)
														
 
															+            if len(df.columns) > 0:
														
 
															+                return df
														
 
															+        except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError) as e:
														
 
															+            last_error = e
														
 
															+            continue
														
 
															+
														
 
															+    raise ValueError(
														
 
															+        f"Unable to decode CSV file {filepath}. Tried encodings: "
														
 
															+        f"{CSV_ENCODINGS}. Last error: {last_error}"
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _detect_and_skip_footer_rows(df_raw: pd.DataFrame) -> pd.DataFrame:
														
 
															+    """Detect and remove summary/aggregation rows at the end of the data."""
														
 
															+    if df_raw.empty:
														
 
															+        return df_raw
														
 
															+
														
 
															+    rows_to_drop = []
														
 
															+    text_cols = [c for c in df_raw.columns if df_raw[c].dtype == 'object']
														
 
															+
														
 
															+    for idx in range(len(df_raw) - 1, -1, -1):
														
 
															+        row = df_raw.iloc[idx]
														
 
															+        is_summary = False
														
 
															+        for col in text_cols:
														
 
															+            val = str(row.get(col, '')).strip().lower()
														
 
															+            if any(kw in val for kw in SUMMARY_KEYWORDS):
														
 
															+                is_summary = True
														
 
															+                break
														
 
															+        if is_summary:
														
 
															+            rows_to_drop.append(idx)
														
 
															+        else:
														
 
															+            break
														
 
															+
														
 
															+        if len(rows_to_drop) > 20:
														
 
															+            break
														
 
															+
														
 
															+    if rows_to_drop:
														
 
															+        df_raw = df_raw.drop(index=rows_to_drop)
														
 
															+        df_raw = df_raw.reset_index(drop=True)
														
 
															+
														
 
															+    return df_raw
														
 
															+
														
 
															+
														
 
															+def _detect_empty_or_notes_rows(df_raw: pd.DataFrame) -> pd.DataFrame:
														
 
															+    """Remove leading empty rows and trailing fully-empty rows."""
														
 
															+    if df_raw.empty:
														
 
															+        return df_raw
														
 
															+
														
 
															+    non_empty_rows = df_raw.notna().any(axis=1)
														
 
															+    first_valid = non_empty_rows.idxmax() if non_empty_rows.any() else 0
														
 
															+    last_valid = non_empty_rows[non_empty_rows].index[-1] if non_empty_rows.any() else len(df_raw)
														
 
															+
														
 
															+    df_raw = df_raw.iloc[first_valid:last_valid + 1].reset_index(drop=True)
														
 
															+    return df_raw
														
 
															+
														
 
															+
														
 
															+def normalize_column_names(col_name: str) -> str:
														
 
															+    """
														
 
															+    Normalize a single column name: strip whitespace, unify brackets, remove special chars.
														
 
															+    """
														
 
															+    if not isinstance(col_name, str):
														
 
															+        return col_name
														
 
															+    name = col_name.strip()
														
 
															+    name = name.replace('（', '(').replace('）', ')')
														
 
															+    name = name.replace('【', '[').replace('】', ']')
														
 
															+    name = name.replace('\n', ' ').replace('\r', ' ')
														
 
															+    name = re.sub(r'\s+', ' ', name)
														
 
															+    return name
														
 
															+
														
 
															+
														
 
															+def _clean_generic_dataframe(df: pd.DataFrame, skip_summary_rows=True) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Universal DataFrame cleaning:
														
 
															+    - Remove fully empty rows/columns
														
 
															+    - Drop 'Unnamed' columns
														
 
															+    - Normalize column names (strip whitespace, unify brackets)
														
 
															+    - Auto-detect and remove summary/total rows
														
 
															+    - Try to parse date columns
														
 
															+    - Try to parse numeric columns
														
 
															+    """
														
 
															+    if df.empty:
														
 
															+        return df
														
 
															+
														
 
															+    df = df.dropna(how='all').reset_index(drop=True)
														
 
															+    df = df.dropna(axis=1, how='all')
														
 
															+
														
 
															+    df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed', na=False)]
														
 
															+
														
 
															+    df = df.rename(columns=normalize_column_names)
														
 
															+
														
 
															+    if skip_summary_rows:
														
 
															+        df = _detect_and_skip_footer_rows(df)
														
 
															+        df = _detect_empty_or_notes_rows(df)
														
 
															+
														
 
															+    for col in df.columns:
														
 
															+        if df[col].dtype == 'object':
														
 
															+            try:
														
 
															+                parsed = pd.to_datetime(df[col], errors='coerce', infer_datetime_format=True)
														
 
															+                if parsed.notna().sum() > len(df) * 0.7:
														
 
															+                    df[col] = parsed
														
 
															+                    continue
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+            try:
														
 
															+                numeric = pd.to_numeric(df[col], errors='coerce')
														
 
															+                if numeric.notna().sum() > len(df) * 0.7:
														
 
															+                    df[col] = numeric
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+
														
 
															+    return df
														
 
															+
														
 
															+
														
 
															+def load_generic_excel(filepath: str, sheet_name=0, skip_summary_rows=True,
														
 
															+                       encoding=None, dtype_backend=None) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Load any Excel/CSV file into a cleaned DataFrame.
														
 
															+
														
 
															+    Args:
														
 
															+        filepath: Path to the data file (.xlsx, .xls, or .csv)
														
 
															+        sheet_name: Sheet name or index (for Excel). Ignored for CSV.
														
 
															+        skip_summary_rows: Auto-detect and remove summary/total footer rows
														
 
															+        encoding: File encoding (auto-detected for CSV if None)
														
 
															+        dtype_backend: Optional pandas dtype backend ('numpy_nullable', 'pyarrow')
														
 
															+    """
														
 
															+    fmt = auto_detect_file_format(filepath)
														
 
															+
														
 
															+    kwargs = {}
														
 
															+    if dtype_backend:
														
 
															+        kwargs['dtype_backend'] = dtype_backend
														
 
															+
														
 
															+    if fmt == 'csv':
														
 
															+        df = load_generic_csv(filepath, encoding=encoding, **kwargs)
														
 
															+    else:
														
 
															+        try:
														
 
															+            df = pd.read_excel(filepath, sheet_name=sheet_name, **kwargs)
														
 
															+        except Exception as e:
														
 
															+            if fmt == 'xls':
														
 
															+                raise ValueError(
														
 
															+                    f"Failed to read .xls file. Try converting to .xlsx format. "
														
 
															+                    f"Error: {e}"
														
 
															+                )
														
 
															+            raise
														
 
															+
														
 
															+    return _clean_generic_dataframe(df, skip_summary_rows=skip_summary_rows)
														
 
															+
														
 
															+
														
 
															+def load_generic_all_sheets(filepath: str, skip_summary_rows=True) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Load all sheets from an Excel file, merge into a single DataFrame.
														
 
															+    Adds '_source_sheet' column to track the source sheet.
														
 
															+    """
														
 
															+    fmt = auto_detect_file_format(filepath)
														
 
															+    if fmt == 'csv':
														
 
															+        return load_generic_excel(filepath, skip_summary_rows=skip_summary_rows)
														
 
															+
														
 
															+    xl = pd.ExcelFile(filepath)
														
 
															+    if len(xl.sheet_names) == 1:
														
 
															+        df = pd.read_excel(filepath, sheet_name=xl.sheet_names[0])
														
 
															+        return _clean_generic_dataframe(df, skip_summary_rows=skip_summary_rows)
														
 
															+
														
 
															+    frames = []
														
 
															+    for sheet in xl.sheet_names:
														
 
															+        try:
														
 
															+            df = pd.read_excel(filepath, sheet_name=sheet)
														
 
															+            df['_source_sheet'] = sheet
														
 
															+            frames.append(df)
														
 
															+        except Exception:
														
 
															+            continue
														
 
															+    if not frames:
														
 
															+        raise ValueError(f"No valid sheets found in {filepath}")
														
 
															+    combined = pd.concat(frames, ignore_index=True)
														
 
															+    return _clean_generic_dataframe(combined, skip_summary_rows=skip_summary_rows)
														
 
															+
														
 
															+
														
 
															+def auto_detect_date_column(df: pd.DataFrame) -> str:
														
 
															+    """Auto-detect the primary date/time column in a DataFrame."""
														
 
															+    date_keywords = ['日期', '时间', 'date', 'time', '年', '月', '日']
														
 
															+    for col in df.columns:
														
 
															+        col_str = str(col).lower().strip()
														
 
															+        if any(kw in col_str for kw in date_keywords):
														
 
															+            parsed = pd.to_datetime(df[col], errors='coerce')
														
 
															+            if parsed.notna().sum() > len(df) * 0.5:
														
 
															+                return col
														
 
															+    return ''
														
 
															+
														
 
															+
														
 
															+def auto_parse_single_sheet(filepath: str, sheet_name=0) -> pd.DataFrame:
														
 
															+    """Load and clean a single sheet (shortcut for load_generic_excel)."""
														
 
															+    return load_generic_excel(filepath, sheet_name=sheet_name)
														
 
															+
														
 
															+
														
 
															+def load_generic_file_info(filepath: str) -> dict:
														
 
															+    """
														
 
															+    Return file metadata without full data loading.
														
 
															+    Useful for quick inspection before deciding how to load.
														
 
															+    """
														
 
															+    info = {'filepath': filepath, 'format': auto_detect_file_format(filepath)}
														
 
															+
														
 
															+    if info['format'] == 'csv':
														
 
															+        try:
														
 
															+            with open(filepath, 'r', encoding='utf-8-sig') as f:
														
 
															+                sample = f.read(8192)
														
 
															+            dialect = csv.Sniffer().sniff(sample[:4096])
														
 
															+            info['delimiter'] = dialect.delimiter
														
 
															+            info['has_header'] = csv.Sniffer().has_header(sample)
														
 
															+            info['approx_rows'] = sample.count('\n')
														
 
															+        except Exception:
														
 
															+            info['delimiter'] = ','
														
 
															+    else:
														
 
															+        try:
														
 
															+            xl = pd.ExcelFile(filepath)
														
 
															+            info['sheet_names'] = xl.sheet_names
														
 
															+            info['sheet_count'] = len(xl.sheet_names)
														
 
															+        except Exception as e:
														
 
															+            info['error'] = str(e)
														
 
															+
														
 
															+    info['file_size_mb'] = round(os.path.getsize(filepath) / (1024 * 1024), 2)
														
 
															+    return info
														
 
															+
														
 
															+
														
 
															 if __name__ == '__main__':
														
 
															     import sys
														
 
															     if len(sys.argv) > 1:
														
 
															         fp = sys.argv[1]
														
 
															-        meta = load_workbook_metadata(fp)
														
 
															-        print(f"Sheets: {meta['sheets'][:5]}...")
														
 
															-        print(f"Date range: {meta['date_range'][0]} ~ {meta['date_range'][1]}")
														
 
															-        print(f"Total days: {meta['total_days']}")
														
 
															+        fmt = auto_detect_file_format(fp)
														
 
															+        print(f"File: {fp}")
														
 
															+        print(f"Format: {fmt}")
														
 
															+
														
 
															+        file_info = load_generic_file_info(fp)
														
 
															+        print(f"Size: {file_info.get('file_size_mb', '?')} MB")
														
 
															+        if 'sheet_names' in file_info:
														
 
															+            print(f"Sheets ({file_info['sheet_count']}): {file_info['sheet_names'][:5]}...")
														
 
															+
														
 
															+        df = load_generic_excel(fp)
														
 
															+        date_col = auto_detect_date_column(df)
														
 
															+        print(f"Generic load: {len(df)} rows x {len(df.columns)} cols, "
														
 
															+              f"date column: {date_col}")
														
 
															+        print(f"Columns: {list(df.columns)}")
														
--- a/generate-data-report-ppt/scripts/data_profiler.py
+++ b/generate-data-report-ppt/scripts/data_profiler.py
@@ -0,0 +1,838 @@
 
															+"""
														
 
															+Universal data profiling engine: auto-detect schema, statistical features,
														
 
															+and semantic inference from arbitrary Excel data.
														
 
															+Enhanced with content-based value analysis, distribution shape analysis,
														
 
															+derived metric detection, and multi-dimensional quality scoring.
														
 
															+"""
														
 
															+import pandas as pd
														
 
															+import numpy as np
														
 
															+from datetime import datetime, date
														
 
															+from collections import Counter
														
 
															+import re
														
 
															+import math
														
 
															+
														
 
															+from report_config import ColumnProfile, ColumnRole
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# DATE PATTERNS — expanded for broader format coverage
														
 
															+# =====================================================================
														
 
															+DATE_PATTERNS = [
														
 
															+    re.compile(r'^\d{4}年\d{1,2}月\d{1,2}日$'),
														
 
															+    re.compile(r'^\d{4}-\d{2}-\d{2}$'),
														
 
															+    re.compile(r'^\d{4}/\d{1,2}/\d{1,2}$'),
														
 
															+    re.compile(r'^\d{4}\.\d{1,2}\.\d{1,2}$'),
														
 
															+    re.compile(r'^\d{4}年\d{1,2}月$'),
														
 
															+    re.compile(r'^\d{4}-\d{2}$'),
														
 
															+    re.compile(r'^\d{2}-\d{2}$'),
														
 
															+    re.compile(r'^\d{2}/\d{2}$'),
														
 
															+    re.compile(r'^\d{8}$'),  # YYYYMMDD
														
 
															+]
														
 
															+
														
 
															+TIME_KEYWORDS = [
														
 
															+    '日期', '时间', 'date', 'time', '年', '月', '日', '周', '期', '季度',
														
 
															+    'period', 'month', 'year', 'quarter', 'week', 'day', 'timestamp',
														
 
															+    '月份', '年份', '周期', '时段', 'time', 'datetime',
														
 
															+    'date', 'created', 'updated', 'modified', '发生', '录入', '创建',
														
 
															+]
														
 
															+
														
 
															+NUMERIC_KEYWORDS = [
														
 
															+    '金额', '数量', '台数', '件数', '元', '价格', '收入', '支出',
														
 
															+    '利润', '成本', '费用', '销量', '销售额', '总数', '合计',
														
 
															+    'amount', 'price', 'qty', 'quantity', 'revenue', 'cost',
														
 
															+    'sales', 'volume', 'value', 'total', 'sum', 'count',
														
 
															+    '单数', '笔数', '人数', '天数', '比率', '占比', '比例', '率',
														
 
															+    '预算', 'budget', '花费', 'spend', 'fee', '金额', '单价',
														
 
															+    'unit', '得分', 'score', 'rating', '评分',
														
 
															+]
														
 
															+
														
 
															+CATEGORY_KEYWORDS = [
														
 
															+    '国家', '区域', '地区', '城市', '省份', '状态', '类型', '类别',
														
 
															+    '分类', '部门', '组', '等级', '级别', '品牌', '渠道',
														
 
															+    'country', 'region', 'city', 'status', 'type', 'category',
														
 
															+    'department', 'group', 'level', 'brand', 'channel',
														
 
															+    '负责人', 'owner', 'manager', '产品', 'product', '阶段',
														
 
															+    '供应商', 'supplier', '客户', 'customer', '行业', 'industry',
														
 
															+    '性别', 'gender', '职位', 'title', '角色', 'role', '标签', 'tag',
														
 
															+    '科目', 'account', '方向', 'direction', '方式', 'method',
														
 
															+    '意向', 'intent', 'intention', 'priority', '优先级',
														
 
															+]
														
 
															+
														
 
															+ID_KEYWORDS = [
														
 
															+    'id', '编号', '序号', '代码', 'code', 'no', '编码', '合同号',
														
 
															+    '订单号', '工单号', '流水号', '单号', '标识', 'key',
														
 
															+    'uuid', 'guid', 'sn', '序列号', '身份证', 'phone', '手机',
														
 
															+    '邮箱', 'email', '电话', 'tel', 'mobile',
														
 
															+]
														
 
															+
														
 
															+TEXT_KEYWORDS = [
														
 
															+    '备注', '描述', '说明', '详情', '内容', '意见', '建议', '进度更新',
														
 
															+    'note', 'description', 'detail', 'remark', 'comment', 'memo',
														
 
															+    '地址', 'address', '介绍', '摘要', 'summary', '附注',
														
 
															+    '反馈', 'feedback', '理由', 'reason', '原因', 'cause',
														
 
															+]
														
 
															+
														
 
															+RATE_KEYWORDS = [
														
 
															+    '率', 'ratio', 'rate', '占比', '比例', 'percentage', 'pct',
														
 
															+    'percent', 'conversion', '转化率', '完成率', '增长率',
														
 
															+]
														
 
															+
														
 
															+# Patterns for value-based content detection
														
 
															+PHONE_PATTERN = re.compile(r'^[\+]?[\d\-\(\)\s]{6,20}$')
														
 
															+EMAIL_PATTERN = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
														
 
															+URL_PATTERN = re.compile(r'^https?://', re.IGNORECASE)
														
 
															+YEAR_PATTERN = re.compile(r'^\d{4}$')
														
 
															+
														
 
															+
														
 
															+def _parse_date(val):
														
 
															+    if pd.isna(val):
														
 
															+        return None
														
 
															+    if isinstance(val, (datetime, date)):
														
 
															+        return val
														
 
															+    if isinstance(val, (int, float)) and not math.isnan(val):
														
 
															+        try:
														
 
															+            return pd.Timestamp(val).to_pydatetime()
														
 
															+        except (ValueError, OverflowError):
														
 
															+            pass
														
 
															+    s = str(val).strip()
														
 
															+    for pattern in DATE_PATTERNS:
														
 
															+        if pattern.match(s):
														
 
															+            for fmt in ('%Y年%m月%d日', '%Y-%m-%d', '%Y/%m/%d',
														
 
															+                        '%Y.%m.%d', '%Y年%m月', '%Y-%m',
														
 
															+                        '%m-%d', '%m/%d', '%Y%m%d'):
														
 
															+                try:
														
 
															+                    return datetime.strptime(s, fmt)
														
 
															+                except ValueError:
														
 
															+                    continue
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# VALUE-BASED CONTENT ANALYSIS
														
 
															+# =====================================================================
														
 
															+
														
 
															+def _analyze_value_patterns(series: pd.Series, sample_count: int = 100) -> dict:
														
 
															+    """Analyze actual data values to detect patterns and content types."""
														
 
															+    non_null = series.dropna().astype(str).head(sample_count)
														
 
															+    if len(non_null) == 0:
														
 
															+        return {}
														
 
															+
														
 
															+    patterns = {}
														
 
															+
														
 
															+    # Check if values look like percentages
														
 
															+    # Only flag as percentage if: ends with % OR is a decimal fraction (0.0-1.0)
														
 
															+    pct_like = sum(1 for v in non_null if v.endswith('%') or
														
 
															+                   (v.replace('.', '', 1).lstrip('-').isdigit() and
														
 
															+                    0 < float(v) <= 1 and not v.isdigit()))
														
 
															+    patterns['pct_ratio'] = pct_like / len(non_null)
														
 
															+
														
 
															+    # Check for yes/no or true/false patterns
														
 
															+    yn_vals = {'是', '否', 'yes', 'no', 'true', 'false', 'y', 'n', 't', 'f',
														
 
															+               '有', '无', '0', '1'}
														
 
															+    yn_like = sum(1 for v in non_null if v.lower() in yn_vals)
														
 
															+    patterns['binary_ratio'] = yn_like / len(non_null)
														
 
															+
														
 
															+    # Check for ordinal/categorical short text
														
 
															+    short_text = sum(1 for v in non_null if len(v) <= 20)
														
 
															+    patterns['short_text_ratio'] = short_text / len(non_null)
														
 
															+
														
 
															+    # Check for phone-like patterns
														
 
															+    phone_like = sum(1 for v in non_null if PHONE_PATTERN.match(v))
														
 
															+    patterns['phone_ratio'] = phone_like / len(non_null)
														
 
															+
														
 
															+    # Check for email-like patterns
														
 
															+    email_like = sum(1 for v in non_null if EMAIL_PATTERN.match(v))
														
 
															+    patterns['email_ratio'] = email_like / len(non_null)
														
 
															+
														
 
															+    # Check for URL-like patterns
														
 
															+    url_like = sum(1 for v in non_null if URL_PATTERN.match(v))
														
 
															+    patterns['url_ratio'] = url_like / len(non_null)
														
 
															+
														
 
															+    # Check for pure digit strings (possible IDs)
														
 
															+    digit_only = sum(1 for v in non_null if v.isdigit() and len(v) >= 6)
														
 
															+    patterns['digit_id_ratio'] = digit_only / len(non_null)
														
 
															+
														
 
															+    # Check for year-like values
														
 
															+    year_like = sum(1 for v in non_null if YEAR_PATTERN.match(v))
														
 
															+    patterns['year_ratio'] = year_like / len(non_null)
														
 
															+
														
 
															+    # Detect ordinal levels
														
 
															+    ordinal_sets = [
														
 
															+        {'高', '中', '低', 'A', 'B', 'C', '甲', '乙', '丙'},
														
 
															+        {'一级', '二级', '三级', '四级', 'level 1', 'level 2', 'level 3'},
														
 
															+        {'critical', 'major', 'minor', 'high', 'medium', 'low'},
														
 
															+    ]
														
 
															+    for oset in ordinal_sets:
														
 
															+        ord_like = sum(1 for v in non_null if v in oset)
														
 
															+        if ord_like / len(non_null) > 0.3:
														
 
															+            patterns['ordinal'] = True
														
 
															+            break
														
 
															+    else:
														
 
															+        patterns['ordinal'] = False
														
 
															+
														
 
															+    # Average text length
														
 
															+    patterns['avg_text_len'] = round(non_null.str.len().mean(), 1)
														
 
															+
														
 
															+    # Unique ratio
														
 
															+    unique_ratio = series.nunique() / max(len(non_null), 1)
														
 
															+    patterns['unique_ratio'] = round(unique_ratio, 4)
														
 
															+
														
 
															+    return patterns
														
 
															+
														
 
															+
														
 
															+def _infer_role_from_values(value_patterns: dict, col_name: str,
														
 
															+                            dtype_str: str, unique_count: int, total_rows: int) -> str:
														
 
															+    """Infer column role based on value content analysis results."""
														
 
															+    up = value_patterns
														
 
															+
														
 
															+    # High ratio of email patterns
														
 
															+    if up.get('email_ratio', 0) > 0.5:
														
 
															+        return 'id'
														
 
															+
														
 
															+    # High ratio of phone patterns
														
 
															+    if up.get('phone_ratio', 0) > 0.5:
														
 
															+        return 'id'
														
 
															+
														
 
															+    # High ratio of URL patterns
														
 
															+    if up.get('url_ratio', 0) > 0.5:
														
 
															+        return 'text'
														
 
															+
														
 
															+    # Mostly binary values (yes/no)
														
 
															+    if up.get('binary_ratio', 0) > 0.6:
														
 
															+        return 'category'
														
 
															+
														
 
															+    # Mostly percentage values
														
 
															+    if up.get('pct_ratio', 0) > 0.5:
														
 
															+        return 'numeric'
														
 
															+
														
 
															+    # High ratio of digit-only long strings (likely IDs)
														
 
															+    if up.get('digit_id_ratio', 0) > 0.5 and unique_count > total_rows * 0.5:
														
 
															+        return 'id'
														
 
															+
														
 
															+    # Ordinal level detected
														
 
															+    if up.get('ordinal', False):
														
 
															+        return 'category'
														
 
															+
														
 
															+    return None  # No clear signal from values
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# SEMANTIC KEYWORD-BASED ROLE INFERENCE
														
 
															+# =====================================================================
														
 
															+
														
 
															+def _infer_column_role(col_name: str, dtype_str: str, sample_values: list,
														
 
															+                       null_rate: float, unique_count: int, total_rows: int,
														
 
															+                       value_patterns: dict = None) -> ColumnRole:
														
 
															+    col_lower = col_name.lower().strip()
														
 
															+
														
 
															+    # 1) Value-based inference first (stronger signal)
														
 
															+    if value_patterns:
														
 
															+        value_role = _infer_role_from_values(value_patterns, col_name,
														
 
															+                                              dtype_str, unique_count, total_rows)
														
 
															+        if value_role:
														
 
															+            return ColumnRole(value_role)
														
 
															+
														
 
															+    # 2) Keyword-based inference (expanded)
														
 
															+    if any(kw in col_lower for kw in ID_KEYWORDS):
														
 
															+        return ColumnRole.ID
														
 
															+
														
 
															+    if any(kw in col_lower for kw in TIME_KEYWORDS):
														
 
															+        return ColumnRole.TIME
														
 
															+
														
 
															+    if any(kw in col_lower for kw in NUMERIC_KEYWORDS):
														
 
															+        return ColumnRole.NUMERIC
														
 
															+
														
 
															+    if any(kw in col_lower for kw in CATEGORY_KEYWORDS):
														
 
															+        return ColumnRole.CATEGORY
														
 
															+
														
 
															+    if any(kw in col_lower for kw in TEXT_KEYWORDS):
														
 
															+        return ColumnRole.TEXT
														
 
															+
														
 
															+    # 3) dtype-based fallback
														
 
															+    if 'int' in dtype_str or 'float' in dtype_str:
														
 
															+        if unique_count <= 15 and total_rows > 20:
														
 
															+            return ColumnRole.CATEGORY
														
 
															+        return ColumnRole.NUMERIC
														
 
															+
														
 
															+    if 'bool' in dtype_str:
														
 
															+        return ColumnRole.CATEGORY
														
 
															+
														
 
															+    if 'datetime' in dtype_str:
														
 
															+        return ColumnRole.TIME
														
 
															+
														
 
															+    # 4) Cardinality-based inference
														
 
															+    if total_rows > 0:
														
 
															+        cardinality_ratio = unique_count / total_rows
														
 
															+        if cardinality_ratio > 0.8 and unique_count > 20:
														
 
															+            return ColumnRole.TEXT
														
 
															+        if cardinality_ratio < 0.3 and unique_count <= 30:
														
 
															+            return ColumnRole.CATEGORY
														
 
															+
														
 
															+    return ColumnRole.TEXT
														
 
															+
														
 
															+
														
 
															+def _infer_metric_label(col_name: str, role: ColumnRole, value_patterns: dict = None) -> str:
														
 
															+    col_lower = col_name.lower().strip()
														
 
															+
														
 
															+    # If values are percentage-like, mark as '比率'
														
 
															+    if value_patterns and value_patterns.get('pct_ratio', 0) > 0.5:
														
 
															+        for kw in ['率', '转化', '占比', '比例']:
														
 
															+            if kw in col_lower:
														
 
															+                return col_name
														
 
															+        return col_name + '(占比)'
														
 
															+
														
 
															+    label_map = {
														
 
															+        '金额': '金额', '销售额': '销售额', '收入': '收入', '利润': '利润',
														
 
															+        '数量': '数量', '台数': '台数', '件数': '件数', '订单数': '订单数',
														
 
															+        '成本': '成本', '费用': '费用', '销量': '销量', '占比': '占比',
														
 
															+        '天数': '天数', '人数': '人数', '比率': '比率', '转化率': '转化率',
														
 
															+        '增长率': '增长率', '完成率': '完成率', '单价': '单价',
														
 
															+        '价格': '价格', '得分': '得分', '评分': '评分',
														
 
															+    }
														
 
															+    for kw, label in label_map.items():
														
 
															+        if kw in col_lower:
														
 
															+            return label
														
 
															+
														
 
															+    # Check for rate-related keywords
														
 
															+    if any(kw in col_lower for kw in RATE_KEYWORDS):
														
 
															+        return '比率'
														
 
															+
														
 
															+    if role == ColumnRole.NUMERIC:
														
 
															+        return col_name
														
 
															+    elif role == ColumnRole.TIME:
														
 
															+        return '日期'
														
 
															+    elif role == ColumnRole.CATEGORY:
														
 
															+        return col_name
														
 
															+    return col_name
														
 
															+
														
 
															+
														
 
															+def _infer_unit(col_name: str, value_patterns: dict = None) -> str:
														
 
															+    col_lower = col_name.lower().strip()
														
 
															+
														
 
															+    # If values are percentage-like
														
 
															+    if value_patterns and value_patterns.get('pct_ratio', 0) > 0.5:
														
 
															+        return '%'
														
 
															+
														
 
															+    unit_map = {
														
 
															+        '金额': '元', '销售额': '元', '收入': '元', '利润': '元',
														
 
															+        '成本': '元', '费用': '元', '台数': '台', '件数': '件',
														
 
															+        '数量': '', '人数': '人', '天数': '天', '占比': '%',
														
 
															+        '比率': '%', '比例': '%', '率': '%', '转化率': '%',
														
 
															+        '增长率': '%', '完成率': '%', '单价': '元', '价格': '元',
														
 
															+        '得分': '分', '评分': '分',
														
 
															+    }
														
 
															+    for kw, unit in unit_map.items():
														
 
															+        if kw in col_lower:
														
 
															+            return unit
														
 
															+    return ''
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# DISTRIBUTION SHAPE ANALYSIS
														
 
															+# =====================================================================
														
 
															+
														
 
															+def _calc_distribution_shape(series: pd.Series) -> dict:
														
 
															+    """Compute skewness, kurtosis and distribution type for numeric series."""
														
 
															+    try:
														
 
															+        s = series.dropna()
														
 
															+        if len(s) < 4:
														
 
															+            return {}
														
 
															+        skew = round(float(s.skew()), 3)
														
 
															+        kurt = round(float(s.kurtosis()), 3)
														
 
															+
														
 
															+        # Determine distribution type
														
 
															+        abs_skew = abs(skew)
														
 
															+        if abs_skew < 0.5:
														
 
															+            skew_type = '近似对称'
														
 
															+        elif abs_skew < 1.0:
														
 
															+            skew_type = '轻度偏态'
														
 
															+        else:
														
 
															+            skew_type = '显著偏态'
														
 
															+
														
 
															+        if skew > 0.5:
														
 
															+            skew_dir = '右偏（长尾在右侧，大部分值偏小）'
														
 
															+        elif skew < -0.5:
														
 
															+            skew_dir = '左偏（长尾在左侧，大部分值偏大）'
														
 
															+        else:
														
 
															+            skew_dir = '基本对称'
														
 
															+
														
 
															+        # Concentration analysis
														
 
															+        cv = float(s.std()) / float(s.mean()) if float(s.mean()) != 0 else 0
														
 
															+        if cv < 0.3:
														
 
															+            concentration = '高度集中'
														
 
															+        elif cv < 0.7:
														
 
															+            concentration = '中度集中'
														
 
															+        elif cv < 1.2:
														
 
															+            concentration = '适度分散'
														
 
															+        else:
														
 
															+            concentration = '高度分散'
														
 
															+
														
 
															+        return {
														
 
															+            'skewness': skew,
														
 
															+            'kurtosis': kurt,
														
 
															+            'skew_type': skew_type,
														
 
															+            'skew_direction': skew_dir,
														
 
															+            'cv': round(cv, 3),
														
 
															+            'concentration': concentration,
														
 
															+        }
														
 
															+    except Exception:
														
 
															+        return {}
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# DERIVED METRIC DETECTION
														
 
															+# =====================================================================
														
 
															+
														
 
															+def _detect_derived_relations(df: pd.DataFrame, numeric_cols: list) -> list[dict]:
														
 
															+    """
														
 
															+    Detect potential derived relationships among numeric columns.
														
 
															+    E.g., A - B = C, A + B = C, A / B = C (approx.)
														
 
															+    """
														
 
															+    relations = []
														
 
															+    num_names = [c['column_name'] for c in numeric_cols]
														
 
															+    if len(num_names) < 3:
														
 
															+        return relations
														
 
															+
														
 
															+    sample = df[num_names].dropna().head(500)
														
 
															+
														
 
															+    for i, a_name in enumerate(num_names):
														
 
															+        for j, b_name in enumerate(num_names):
														
 
															+            if j <= i:
														
 
															+                continue
														
 
															+            a = sample[a_name]
														
 
															+            b = sample[b_name]
														
 
															+
														
 
															+            # Check subtraction: a - b ≈ c or b - a ≈ c
														
 
															+            for diff_name in num_names:
														
 
															+                if diff_name in (a_name, b_name):
														
 
															+                    continue
														
 
															+                d = sample[diff_name]
														
 
															+                diff_ab = (a - b - d).abs().mean()
														
 
															+                diff_ba = (b - a - d).abs().mean()
														
 
															+                threshold = max(d.mean(), 1) * 0.1
														
 
															+                if diff_ab < threshold:
														
 
															+                    relations.append({
														
 
															+                        'type': 'subtraction',
														
 
															+                        'expression': f'{a_name} - {b_name} ≈ {diff_name}',
														
 
															+                        'accuracy': round(float(1 - diff_ab / max(float(d.mean()), 1)), 3),
														
 
															+                        'formula': f'{diff_name} = {a_name} - {b_name}',
														
 
															+                    })
														
 
															+                    break
														
 
															+                elif diff_ba < threshold:
														
 
															+                    relations.append({
														
 
															+                        'type': 'subtraction',
														
 
															+                        'expression': f'{b_name} - {a_name} ≈ {diff_name}',
														
 
															+                        'accuracy': round(float(1 - diff_ba / max(float(d.mean()), 1)), 3),
														
 
															+                        'formula': f'{diff_name} = {b_name} - {a_name}',
														
 
															+                    })
														
 
															+                    break
														
 
															+
														
 
															+            # Check addition: a + b ≈ c
														
 
															+            for sum_name in num_names:
														
 
															+                if sum_name in (a_name, b_name):
														
 
															+                    continue
														
 
															+                s = sample[sum_name]
														
 
															+                sum_ab = (a + b - s).abs().mean()
														
 
															+                threshold = max(s.mean(), 1) * 0.1
														
 
															+                if sum_ab < threshold:
														
 
															+                    relations.append({
														
 
															+                        'type': 'addition',
														
 
															+                        'expression': f'{a_name} + {b_name} ≈ {sum_name}',
														
 
															+                        'accuracy': round(float(1 - sum_ab / max(float(s.mean()), 1)), 3),
														
 
															+                        'formula': f'{sum_name} = {a_name} + {b_name}',
														
 
															+                    })
														
 
															+                    break
														
 
															+
														
 
															+    # Also check for ratio relations
														
 
															+    if len(num_names) >= 2:
														
 
															+        for i, a_name in enumerate(num_names):
														
 
															+            for j, b_name in enumerate(num_names):
														
 
															+                if j <= i:
														
 
															+                    continue
														
 
															+                a = sample[a_name]
														
 
															+                b = sample[b_name]
														
 
															+                ratio = (a / b.replace(0, np.nan)).dropna()
														
 
															+                if len(ratio) > 0:
														
 
															+                    ratio_std = float(ratio.std())
														
 
															+                    ratio_mean = float(ratio.mean())
														
 
															+                    if ratio_mean > 0 and ratio_std / ratio_mean < 0.1:
														
 
															+                        # Consistent ratio found
														
 
															+                        relations.append({
														
 
															+                            'type': 'ratio',
														
 
															+                            'expression': f'{a_name} / {b_name} ≈ {ratio_mean:.3f}',
														
 
															+                            'accuracy': round(float(1 - ratio_std / ratio_mean), 3),
														
 
															+                            'formula': f'{a_name} = {b_name} × {ratio_mean:.2f}',
														
 
															+                        })
														
 
															+
														
 
															+    return relations
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# MAIN PROFILING FUNCTION
														
 
															+# =====================================================================
														
 
															+
														
 
															+def profile_dataframe(df: pd.DataFrame) -> dict:
														
 
															+    total_rows = len(df)
														
 
															+    columns = []
														
 
															+
														
 
															+    for col in df.columns:
														
 
															+        series = df[col]
														
 
															+        dtype_str = str(series.dtype)
														
 
															+        null_count = int(series.isna().sum())
														
 
															+        null_rate = round(null_count / total_rows, 4) if total_rows else 0.0
														
 
															+
														
 
															+        non_null = series.dropna()
														
 
															+        unique_count = int(non_null.nunique())
														
 
															+
														
 
															+        sample_values = non_null.head(5).tolist()
														
 
															+        sample_values = [str(v) for v in sample_values]
														
 
															+
														
 
															+        # Enhanced: Value pattern analysis
														
 
															+        value_patterns = _analyze_value_patterns(series)
														
 
															+
														
 
															+        # Enhanced: Distribution shape analysis for numeric columns
														
 
															+        distribution_shape = None
														
 
															+        numeric_stats = None
														
 
															+        if pd.api.types.is_numeric_dtype(series) and not pd.api.types.is_bool_dtype(series):
														
 
															+            try:
														
 
															+                numeric_stats = {
														
 
															+                    'mean': round(float(series.mean()), 2) if not pd.isna(series.mean()) else 0,
														
 
															+                    'median': round(float(series.median()), 2) if not pd.isna(series.median()) else 0,
														
 
															+                    'min': round(float(series.min()), 2) if not pd.isna(series.min()) else 0,
														
 
															+                    'max': round(float(series.max()), 2) if not pd.isna(series.max()) else 0,
														
 
															+                    'std': round(float(series.std()), 2) if not pd.isna(series.std()) else 0,
														
 
															+                    'sum': round(float(series.sum()), 2) if not pd.isna(series.sum()) else 0,
														
 
															+                    'p25': round(float(series.quantile(0.25)), 2) if not pd.isna(series.quantile(0.25)) else 0,
														
 
															+                    'p75': round(float(series.quantile(0.75)), 2) if not pd.isna(series.quantile(0.75)) else 0,
														
 
															+                }
														
 
															+                distribution_shape = _calc_distribution_shape(series)
														
 
															+            except Exception:
														
 
															+                numeric_stats = None
														
 
															+
														
 
															+        # Enhanced role inference with value patterns
														
 
															+        role = _infer_column_role(col, dtype_str, sample_values, null_rate,
														
 
															+                                  unique_count, total_rows, value_patterns)
														
 
															+        label = _infer_metric_label(col, role, value_patterns)
														
 
															+        unit = _infer_unit(col, value_patterns)
														
 
															+
														
 
															+        # Enhanced: detect if column is a high-cardinality ID
														
 
															+        is_high_cardinality_id = (role == ColumnRole.TEXT and
														
 
															+                                  unique_count / max(total_rows, 1) > 0.8 and
														
 
															+                                  unique_count > 20)
														
 
															+        if is_high_cardinality_id:
														
 
															+            role = ColumnRole.ID
														
 
															+
														
 
															+        columns.append(ColumnProfile(
														
 
															+            column_name=col,
														
 
															+            dtype=dtype_str,
														
 
															+            role=role,
														
 
															+            null_count=null_count,
														
 
															+            null_rate=null_rate,
														
 
															+            unique_count=unique_count,
														
 
															+            sample_values=sample_values,
														
 
															+            numeric_stats=numeric_stats,
														
 
															+            inferred_label=label,
														
 
															+        ))
														
 
															+
														
 
															+        # Append extra metadata not in ColumnProfile
														
 
															+        columns[-1]._unit = unit
														
 
															+        columns[-1]._distribution_shape = distribution_shape
														
 
															+        columns[-1]._value_patterns = value_patterns
														
 
															+
														
 
															+    time_cols = [c for c in columns if c.role == ColumnRole.TIME]
														
 
															+    numeric_cols = [c for c in columns if c.role == ColumnRole.NUMERIC]
														
 
															+    category_cols = [c for c in columns if c.role == ColumnRole.CATEGORY]
														
 
															+    text_cols = [c for c in columns if c.role == ColumnRole.TEXT]
														
 
															+    id_cols = [c for c in columns if c.role == ColumnRole.ID]
														
 
															+
														
 
															+    # Date range inference
														
 
															+    date_range = (None, None)
														
 
															+    time_granularity = 'unknown'
														
 
															+    if time_cols:
														
 
															+        series = df[time_cols[0].column_name].dropna()
														
 
															+        parsed = series.apply(_parse_date).dropna()
														
 
															+        if len(parsed) > 0:
														
 
															+            date_range = (parsed.min(), parsed.max())
														
 
															+            if len(parsed) >= 2:
														
 
															+                diff = (parsed.max() - parsed.min()).days
														
 
															+                if diff <= 1:
														
 
															+                    time_granularity = 'daily'
														
 
															+                elif diff <= 7:
														
 
															+                    time_granularity = 'weekly'
														
 
															+                elif diff <= 31:
														
 
															+                    time_granularity = 'monthly'
														
 
															+                elif diff <= 92:
														
 
															+                    time_granularity = 'quarterly'
														
 
															+                else:
														
 
															+                    time_granularity = 'yearly'
														
 
															+
														
 
															+    # Enhanced: Detect derived relations among numeric columns
														
 
															+    derived_relations = _detect_derived_relations(df, [c.__dict__ for c in numeric_cols])
														
 
															+
														
 
															+    # Enhanced: Multi-dimensional quality scoring
														
 
															+    quality_score, quality_details = _calc_quality_score(
														
 
															+        df, columns, numeric_cols, date_range
														
 
															+    )
														
 
															+
														
 
															+    # Outlier detection (Enhanced: with CV-based filtering)
														
 
															+    outlier_columns = []
														
 
															+    for c in numeric_cols:
														
 
															+        ns = c.numeric_stats
														
 
															+        if ns and ns.get('std', 0) > 0 and ns.get('mean', 0) > 0:
														
 
															+            cv = ns['std'] / ns['mean']
														
 
															+            if cv > 3:
														
 
															+                outlier_columns.append(c.column_name)
														
 
															+
														
 
															+    return {
														
 
															+        'total_rows': total_rows,
														
 
															+        'total_columns': len(columns),
														
 
															+        'columns': [c.__dict__ for c in columns],
														
 
															+        'time_columns': [c.__dict__ for c in time_cols],
														
 
															+        'numeric_columns': [c.__dict__ for c in numeric_cols],
														
 
															+        'category_columns': [c.__dict__ for c in category_cols],
														
 
															+        'text_columns': [c.__dict__ for c in text_cols],
														
 
															+        'id_columns': [c.__dict__ for c in id_cols],
														
 
															+        'date_range': (
														
 
															+            date_range[0].strftime('%Y-%m-%d') if date_range[0] else None,
														
 
															+            date_range[1].strftime('%Y-%m-%d') if date_range[1] else None,
														
 
															+        ),
														
 
															+        'time_granularity': time_granularity,
														
 
															+        'data_quality': {
														
 
															+            'score': quality_score,
														
 
															+            'details': quality_details,
														
 
															+            'high_null_columns': [c.column_name for c in columns if c.null_rate > 0.3],
														
 
															+            'outlier_columns': outlier_columns,
														
 
															+        },
														
 
															+        'derived_relations': derived_relations,
														
 
															+        'column_stats': [{
														
 
															+            'column_name': col.column_name,
														
 
															+            'role': col.role.value,
														
 
															+            'dtype': col.dtype,
														
 
															+            'null_rate': col.null_rate,
														
 
															+            'unique_count': col.unique_count,
														
 
															+            'distribution_shape': getattr(col, '_distribution_shape', None),
														
 
															+            'inferred_label': col.inferred_label,
														
 
															+            'unit': getattr(col, '_unit', ''),
														
 
															+            'numeric_stats': col.numeric_stats,
														
 
															+        } for col in columns],
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# ENHANCED QUALITY SCORING
														
 
															+# =====================================================================
														
 
															+
														
 
															+def _calc_quality_score(df: pd.DataFrame, columns: list,
														
 
															+                        numeric_cols: list, date_range: tuple) -> tuple:
														
 
															+    """Multi-dimensional quality scoring (0-100)."""
														
 
															+    score = 100
														
 
															+    details = {}
														
 
															+
														
 
															+    # 1) Completeness (30%) — null rates
														
 
															+    avg_null_rate = np.mean([c.null_rate for c in columns]) if columns else 0
														
 
															+    completeness_penalty = min(30, avg_null_rate * 100 * 2)
														
 
															+    completeness = max(0, 30 - completeness_penalty)
														
 
															+    details['completeness'] = round(completeness, 1)
														
 
															+
														
 
															+    # 2) Uniqueness (20%) — presence of ID columns or unique identifiers
														
 
															+    id_ratio = len([c for c in columns if c.role == ColumnRole.ID]) / max(len(columns), 1)
														
 
															+    uniqueness = min(20, 10 + id_ratio * 10)
														
 
															+    details['uniqueness'] = round(uniqueness, 1)
														
 
															+
														
 
															+    # 3) Numeric health (25%) — outliers, zeros, negative values
														
 
															+    numeric_health = 25
														
 
															+    for c in numeric_cols:
														
 
															+        series = df[c.column_name].dropna()
														
 
															+        if len(series) == 0:
														
 
															+            continue
														
 
															+        # Check for negative values in non-negative expected columns
														
 
															+        if c.inferred_label in ('台数', '数量', '金额', '人数'):
														
 
															+            neg_ratio = (series < 0).sum() / len(series)
														
 
															+            if neg_ratio > 0.05:
														
 
															+                numeric_health -= 5
														
 
															+        # Check for excessive zeros
														
 
															+        zero_ratio = (series == 0).sum() / len(series)
														
 
															+        if zero_ratio > 0.5:
														
 
															+            numeric_health -= 3
														
 
															+    details['numeric_health'] = max(0, numeric_health)
														
 
															+
														
 
															+    # 4) Temporal consistency (15%) — if time columns exist, check date ordering
														
 
															+    temporal = 15
														
 
															+    if date_range[0] and date_range[1]:
														
 
															+        if date_range[0] <= date_range[1]:
														
 
															+            temporal = 15
														
 
															+        else:
														
 
															+            temporal = 5
														
 
															+    details['temporal_consistency'] = temporal
														
 
															+
														
 
															+    # 5) Completeness of categorical data (10%)
														
 
															+    cat_health = 10
														
 
															+    for c in columns:
														
 
															+        if c.role == ColumnRole.CATEGORY and c.null_rate > 0.2:
														
 
															+            cat_health -= 2
														
 
															+    details['categorical_health'] = max(0, cat_health)
														
 
															+
														
 
															+    score = completeness + uniqueness + numeric_health + temporal + cat_health
														
 
															+    score = max(0, min(100, round(score)))
														
 
															+
														
 
															+    return score, details
														
 
															+
														
 
															+
														
 
															+# =====================================================================
														
 
															+# HELPER FUNCTIONS (enhanced)
														
 
															+# =====================================================================
														
 
															+
														
 
															+def profile_category_distribution(df: pd.DataFrame, col_name: str, top_n: int = 15) -> dict:
														
 
															+    if col_name not in df.columns:
														
 
															+        return {}
														
 
															+    counts = df[col_name].value_counts().head(top_n).to_dict()
														
 
															+    total = df[col_name].notna().sum()
														
 
															+    # Calculate concentration (Herfindahl index)
														
 
															+    pcts = [v / total for v in counts.values()] if total else []
														
 
															+    hhi = sum(p * p for p in pcts) if pcts else 0
														
 
															+
														
 
															+    return {
														
 
															+        'total_categories': df[col_name].nunique(),
														
 
															+        'top_items': {str(k): {'count': int(v), 'pct': round(v / total * 100, 1) if total else 0}
														
 
															+                      for k, v in counts.items()},
														
 
															+        'concentration_hhi': round(hhi, 4),
														
 
															+        'concentration_label': '高度集中' if hhi > 0.5 else '中度集中' if hhi > 0.2 else '分散',
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def profile_numeric_series(df: pd.DataFrame, col_name: str) -> dict:
														
 
															+    if col_name not in df.columns:
														
 
															+        return {}
														
 
															+    series = df[col_name].dropna()
														
 
															+    if len(series) == 0:
														
 
															+        return {}
														
 
															+
														
 
															+    shape = _calc_distribution_shape(series)
														
 
															+    result = {
														
 
															+        'count': len(series),
														
 
															+        'sum': round(float(series.sum()), 2),
														
 
															+        'mean': round(float(series.mean()), 2),
														
 
															+        'median': round(float(series.median()), 2),
														
 
															+        'min': round(float(series.min()), 2),
														
 
															+        'max': round(float(series.max()), 2),
														
 
															+        'std': round(float(series.std()), 2),
														
 
															+    }
														
 
															+    if shape:
														
 
															+        result.update(shape)
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+def detect_data_issues(df: pd.DataFrame) -> list[dict]:
														
 
															+    issues = []
														
 
															+    for col in df.columns:
														
 
															+        null_rate = df[col].isna().mean()
														
 
															+        if null_rate > 0.5:
														
 
															+            issues.append({
														
 
															+                'column': col,
														
 
															+                'type': 'high_missing',
														
 
															+                'severity': 'major',
														
 
															+                'message': f'列"{col}"缺失率{null_rate:.1%}，建议排除或补全',
														
 
															+            })
														
 
															+        elif null_rate > 0.1:
														
 
															+            issues.append({
														
 
															+                'column': col,
														
 
															+                'type': 'moderate_missing',
														
 
															+                'severity': 'minor',
														
 
															+                'message': f'列"{col}"缺失率{null_rate:.1%}',
														
 
															+            })
														
 
															+
														
 
															+        if pd.api.types.is_numeric_dtype(df[col]):
														
 
															+            series = df[col].dropna()
														
 
															+            if len(series) > 0:
														
 
															+                q1, q3 = series.quantile(0.25), series.quantile(0.75)
														
 
															+                iqr = q3 - q1
														
 
															+                lower, upper = q1 - 3 * iqr, q3 + 3 * iqr
														
 
															+                outlier_count = ((series < lower) | (series > upper)).sum()
														
 
															+                if outlier_count > len(series) * 0.1:
														
 
															+                    issues.append({
														
 
															+                        'column': col,
														
 
															+                        'type': 'outliers',
														
 
															+                        'severity': 'major',
														
 
															+                        'message': f'列"{col}"存在{outlier_count}个异常值（{outlier_count/len(series):.1%}）',
														
 
															+                    })
														
 
															+
														
 
															+                # Check for negative values
														
 
															+                neg_count = (series < 0).sum()
														
 
															+                if neg_count > 0:
														
 
															+                    issues.append({
														
 
															+                        'column': col,
														
 
															+                        'type': 'negative_values',
														
 
															+                        'severity': 'minor',
														
 
															+                        'message': f'列"{col}"存在{neg_count}个负值',
														
 
															+                    })
														
 
															+
														
 
															+        # Check for constant columns
														
 
															+        if df[col].nunique() <= 1 and null_rate < 1.0:
														
 
															+            issues.append({
														
 
															+                'column': col,
														
 
															+                'type': 'constant_column',
														
 
															+                'severity': 'minor',
														
 
															+                'message': f'列"{col}"为常量列（仅1个唯一值），对分析无贡献',
														
 
															+            })
														
 
															+
														
 
															+    return issues
														
 
															+
														
 
															+
														
 
															+def generate_summary_text(profile: dict) -> str:
														
 
															+    lines = []
														
 
															+    lines.append(f"共 {profile['total_rows']:,} 行 × {profile['total_columns']} 列")
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+    time_cols = profile.get('time_columns', [])
														
 
															+
														
 
															+    lines.append(f"数值列: {len(num_cols)} 个 | 分类列: {len(cat_cols)} 个 | 时间列: {len(time_cols)} 个")
														
 
															+
														
 
															+    dr = profile.get('date_range', (None, None))
														
 
															+    if dr[0]:
														
 
															+        lines.append(f"时间范围: {dr[0]} ~ {dr[1]}")
														
 
															+    lines.append(f"时间粒度: {profile.get('time_granularity', 'unknown')}")
														
 
															+
														
 
															+    q = profile.get('data_quality', {})
														
 
															+    lines.append(f"数据质量评分: {q.get('score', 0)}/100")
														
 
															+    if q.get('details'):
														
 
															+        det = q['details']
														
 
															+        lines.append(f"  完整性: {det.get('completeness', 0)}/30 | "
														
 
															+                     f"数值健康: {det.get('numeric_health', 0)}/25 | "
														
 
															+                     f"时间一致性: {det.get('temporal_consistency', 0)}/15")
														
 
															+    if q.get('high_null_columns'):
														
 
															+        lines.append(f"高缺失列: {', '.join(q['high_null_columns'])}")
														
 
															+
														
 
															+    # Enhanced: derived relations
														
 
															+    derived = profile.get('derived_relations', [])
														
 
															+    if derived:
														
 
															+        lines.append(f"检测到 {len(derived)} 个数值关系:")
														
 
															+        for rel in derived[:5]:
														
 
															+            lines.append(f"  {rel['formula']} (置信度: {rel['accuracy']:.0%})")
														
 
															+
														
 
															+    # Distribution shape summary for numeric columns
														
 
															+    shape_cols = []
														
 
															+    for nc in num_cols[:3]:
														
 
															+        shape = nc.get('distribution_shape')
														
 
															+        if shape:
														
 
															+            shape_cols.append(f"{nc.get('inferred_label', nc['column_name'])}[{shape.get('concentration', 'N/A')}]")
														
 
															+    if shape_cols:
														
 
															+        lines.append(f"分布特征: {' | '.join(shape_cols)}")
														
 
															+
														
 
															+    return '\n'.join(lines)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    import sys
														
 
															+    if len(sys.argv) > 1:
														
 
															+        fp = sys.argv[1]
														
 
															+        try:
														
 
															+            df = pd.read_excel(fp)
														
 
															+        except Exception:
														
 
															+            df = pd.read_excel(fp, sheet_name=0)
														
 
															+        profile = profile_dataframe(df)
														
 
															+        print(generate_summary_text(profile))
														
 
															+        issues = detect_data_issues(df)
														
 
															+        if issues:
														
 
															+            print(f"\n数据问题 ({len(issues)}):")
														
 
															+            for iss in issues:
														
 
															+                print(f"  [{iss['severity']}] {iss['message']}")
														
--- a/generate-data-report-ppt/scripts/metrics_calculator.py
+++ b/generate-data-report-ppt/scripts/metrics_calculator.py
@@ -1136,6 +1136,198 @@ def avg(lst):
 
															     return sum(lst) / len(lst) if lst else 0
														
 
															+def calc_generic_metrics(df: pd.DataFrame, config) -> dict:
														
 
															+    metrics = {}
														
 
															+
														
 
															+    for metric_def in config.metrics:
														
 
															+        col = metric_def.column
														
 
															+        if col not in df.columns:
														
 
															+            metrics[metric_def.name] = 0
														
 
															+            continue
														
 
															+
														
 
															+        series = df[col].dropna()
														
 
															+        agg = metric_def.aggregation
														
 
															+
														
 
															+        if agg == 'sum':
														
 
															+            val = int(series.sum()) if pd.api.types.is_numeric_dtype(series) else len(series)
														
 
															+        elif agg == 'count':
														
 
															+            val = int(series.count())
														
 
															+        elif agg == 'avg':
														
 
															+            val = round(float(series.mean()), 1) if pd.api.types.is_numeric_dtype(series) else 0
														
 
															+        elif agg == 'max':
														
 
															+            val = round(float(series.max()), 1) if pd.api.types.is_numeric_dtype(series) else 0
														
 
															+        elif agg == 'min':
														
 
															+            val = round(float(series.min()), 1) if pd.api.types.is_numeric_dtype(series) else 0
														
 
															+        elif agg == 'distinct_count':
														
 
															+            val = int(series.nunique())
														
 
															+        else:
														
 
															+            val = len(series)
														
 
															+
														
 
															+        metrics[metric_def.name] = val
														
 
															+        metrics[f'{metric_def.name}_label'] = metric_def.label
														
 
															+        metrics[f'{metric_def.name}_unit'] = metric_def.unit
														
 
															+
														
 
															+    if hasattr(config, 'comparison') and config.comparison:
														
 
															+        pass
														
 
															+
														
 
															+    return metrics
														
 
															+
														
 
															+
														
 
															+def calc_generic_trend(df: pd.DataFrame, time_col: str, metric_col: str,
														
 
															+                       aggregation: str = 'sum') -> dict:
														
 
															+    if time_col not in df.columns or metric_col not in df.columns:
														
 
															+        return {}
														
 
															+
														
 
															+    if aggregation == 'sum':
														
 
															+        trend = df.groupby(time_col)[metric_col].sum().sort_index()
														
 
															+    elif aggregation == 'count':
														
 
															+        trend = df.groupby(time_col)[metric_col].count().sort_index()
														
 
															+    else:
														
 
															+        trend = df.groupby(time_col)[metric_col].mean().sort_index()
														
 
															+
														
 
															+    dates = []
														
 
															+    for d in trend.index:
														
 
															+        try:
														
 
															+            dates.append(pd.Timestamp(d).strftime('%m/%d'))
														
 
															+        except Exception:
														
 
															+            dates.append(str(d))
														
 
															+
														
 
															+    return {
														
 
															+        'dates': dates,
														
 
															+        'values': [int(v) if aggregation != 'avg' else round(float(v), 1) for v in trend.values],
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def calc_generic_distribution(df: pd.DataFrame, cat_col: str, metric_col: str = None,
														
 
															+                              aggregation: str = 'sum', top_n: int = 10) -> dict:
														
 
															+    if cat_col not in df.columns:
														
 
															+        return {}
														
 
															+
														
 
															+    if metric_col and metric_col in df.columns:
														
 
															+        if aggregation == 'sum':
														
 
															+            dist = df.groupby(cat_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
														
 
															+        elif aggregation == 'count':
														
 
															+            dist = df.groupby(cat_col)[metric_col].count().sort_values(ascending=False).head(top_n)
														
 
															+        else:
														
 
															+            dist = df.groupby(cat_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
														
 
															+    else:
														
 
															+        dist = df[cat_col].value_counts().head(top_n)
														
 
															+
														
 
															+    total = sum(dist.values)
														
 
															+    return {
														
 
															+        'categories': [str(k) for k in dist.index],
														
 
															+        'values': [int(v) for v in dist.values],
														
 
															+        'percentages': [round(v / total * 100, 1) if total else 0 for v in dist.values],
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def calc_generic_ranking(df: pd.DataFrame, rank_col: str, metric_col: str,
														
 
															+                         aggregation: str = 'sum', top_n: int = 15) -> list[dict]:
														
 
															+    if rank_col not in df.columns or metric_col not in df.columns:
														
 
															+        return []
														
 
															+
														
 
															+    if aggregation == 'sum':
														
 
															+        ranked = df.groupby(rank_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
														
 
															+    elif aggregation == 'count':
														
 
															+        ranked = df.groupby(rank_col)[metric_col].count().sort_values(ascending=False).head(top_n)
														
 
															+    else:
														
 
															+        ranked = df.groupby(rank_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
														
 
															+
														
 
															+    return [{'name': str(k), 'value': int(v), 'rank': i + 1}
														
 
															+            for i, (k, v) in enumerate(ranked.items())]
														
 
															+
														
 
															+
														
 
															+def generate_generic_insights(data_profile: dict, metrics: dict) -> list[dict]:
														
 
															+    items = []
														
 
															+
														
 
															+    num_cols = data_profile.get('numeric_columns', [])
														
 
															+    cat_cols = data_profile.get('category_columns', [])
														
 
															+    time_cols = data_profile.get('time_columns', [])
														
 
															+    q = data_profile.get('data_quality', {})
														
 
															+    score = q.get('score', 100)
														
 
															+
														
 
															+    if metrics:
														
 
															+        metric_details = []
														
 
															+        for k, v in metrics.items():
														
 
															+            if isinstance(v, (int, float)):
														
 
															+                metric_details.append(f'{k}: {v:,.0f}')
														
 
															+        if metric_details:
														
 
															+            items.append({
														
 
															+                'title': '核心指标总览',
														
 
															+                'content': f'本期关键指标：{"；".join(metric_details[:6])}。'
														
 
															+                           f'综合来看，业务运行态势可通过这些核心数据进行量化评估，'
														
 
															+                           f'建议结合业务目标与实际值的差距进行针对性分析。',
														
 
															+            })
														
 
															+
														
 
															+    if num_cols:
														
 
															+        for nc in num_cols[:2]:
														
 
															+            ns = nc.get('numeric_stats', {}) or {}
														
 
															+            col_name = nc.get('inferred_label', nc['column_name'])
														
 
															+            stats_parts = []
														
 
															+            if 'sum' in ns and ns['sum']:
														
 
															+                stats_parts.append(f'总量 {ns["sum"]:,.0f}')
														
 
															+            if 'mean' in ns and ns['mean']:
														
 
															+                stats_parts.append(f'均值 {ns["mean"]:,.1f}')
														
 
															+            if 'max' in ns and ns['max']:
														
 
															+                stats_parts.append(f'峰值 {ns["max"]:,.0f}')
														
 
															+            if 'min' in ns and ns['min']:
														
 
															+                stats_parts.append(f'最低 {ns["min"]:,.0f}')
														
 
															+            if stats_parts:
														
 
															+                items.append({
														
 
															+                    'title': f'{col_name}数据特征',
														
 
															+                    'content': f'指标"{col_name}"的统计特征：{"，".join(stats_parts)}。'
														
 
															+                               f'标准差 {ns.get("std", "N/A")}，数据波动幅度'
														
 
															+                               f'{"较大" if isinstance(ns.get("std"), (int,float)) and ns["std"] > ns.get("mean", 1) * 0.5 else "适中"}。',
														
 
															+                })
														
 
															+
														
 
															+    if cat_cols:
														
 
															+        for cc in cat_cols[:2]:
														
 
															+            uc = cc.get('unique_count', 0)
														
 
															+            items.append({
														
 
															+                'title': f'{cc.get("inferred_label", cc["column_name"])}维度分析',
														
 
															+                'content': f'数据覆盖 {uc} 个不同的{cc.get("inferred_label", cc["column_name"])}类别，'
														
 
															+                           f'丰富的分类维度支持多角度交叉分析。'
														
 
															+                           f'建议重点关注主要类别的集中度与分布均衡性，'
														
 
															+                           f'识别高价值类别与低效类别之间的差异特征。',
														
 
															+            })
														
 
															+
														
 
															+    if time_cols:
														
 
															+        tc = time_cols[0]
														
 
															+        items.append({
														
 
															+            'title': '时间维度覆盖',
														
 
															+            'content': f'数据包含时间列"{tc.get("inferred_label", tc["column_name"])}"，'
														
 
															+                       f'支持按时间维度进行趋势分析。通过对时间序列数据的分解，'
														
 
															+                       f'可识别周期性波动、趋势变化及异常时间节点，为预测与规划提供依据。',
														
 
															+        })
														
 
															+
														
 
															+    items.append({
														
 
															+        'title': '数据质量评估',
														
 
															+        'content': f'数据质量评分 {score}/100，'
														
 
															+                   f'{"数据完整可靠，" if score >= 90 else "数据质量良好，建议关注缺失值" if score >= 80 else "数据需重点关注质量控制"}'
														
 
															+                   f'缺失率 {q.get("null_rate", 0)*100:.1f}%。'
														
 
															+                   f'本报告中的分析与图表均基于现有数据进行自动化生成，确保数据准确性。',
														
 
															+    })
														
 
															+
														
 
															+    high_null = q.get('high_null_columns', [])
														
 
															+    if high_null:
														
 
															+        items.append({
														
 
															+            'title': '数据完整性说明',
														
 
															+            'content': f'以下列缺失值比例较高：{", ".join(high_null[:5])}。'
														
 
															+                       f'在分析涉及这些列时已进行空值排除处理，'
														
 
															+                       f'建议后续数据录入环节关注这些字段的完整填写，以提升分析精度。',
														
 
															+        })
														
 
															+
														
 
															+    total_rows = data_profile.get('total_rows', 0)
														
 
															+    if total_rows:
														
 
															+        items.append({
														
 
															+            'title': '数据规模概述',
														
 
															+            'content': f'本期报告基于 {total_rows} 条数据记录进行分析，'
														
 
															+                       f'样本量{"充足，统计结果具有较好的代表性" if total_rows >= 100 else "适中，统计结果可作为参考" if total_rows >= 30 else "有限，分析结果仅供参考"}。',
														
 
															+        })
														
 
															+
														
 
															+    return items
														
 
															+
														
 
															+
														
 
															 if __name__ == '__main__':
														
 
															     import sys
														
 
															     if len(sys.argv) > 1:
														
--- a/generate-data-report-ppt/scripts/page_layouts.py
+++ b/generate-data-report-ppt/scripts/page_layouts.py
@@ -0,0 +1,223 @@
 
															+"""
														
 
															+Dynamic page layout engine for the universal data report generator.
														
 
															+Provides pre-defined layout templates and layout calculation utilities.
														
 
															+"""
														
 
															+from pptx.util import Emu, Pt
														
 
															+from pptx.dml.color import RGBColor
														
 
															+from dataclasses import dataclass
														
 
															+from typing import Optional
														
 
															+
														
 
															+
														
 
															+SLIDE_WIDTH = 16256000
														
 
															+SLIDE_HEIGHT = 9144000
														
 
															+MARGIN_LEFT = Emu(762000)
														
 
															+MARGIN_RIGHT = Emu(762000)
														
 
															+MARGIN_TOP = Emu(254000)
														
 
															+CONTENT_TOP_BASE = Emu(1600200)
														
 
															+FOOTER_TOP = Emu(8824000)
														
 
															+FOOTER_HEIGHT = Emu(320000)
														
 
															+CONTENT_WIDTH = SLIDE_WIDTH - MARGIN_LEFT - MARGIN_RIGHT
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class LayoutZone:
														
 
															+    x: int
														
 
															+    y: int
														
 
															+    width: int
														
 
															+    height: int
														
 
															+    zone_type: str
														
 
															+
														
 
															+
														
 
															+def calculate_content_area(content_top_emu: int = None) -> LayoutZone:
														
 
															+    top = content_top_emu or int(CONTENT_TOP_BASE)
														
 
															+    height = FOOTER_TOP - top - Emu(100000)
														
 
															+    return LayoutZone(
														
 
															+        x=int(MARGIN_LEFT),
														
 
															+        y=top,
														
 
															+        width=int(CONTENT_WIDTH),
														
 
															+        height=int(height),
														
 
															+        zone_type='content_area',
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def get_kpi_grid(content_top_emu: int = None, cols: int = 3, rows: int = 2,
														
 
															+                 card_width_emu: int = 4699000, card_height_emu: int = 3048000,
														
 
															+                 gap_x_emu: int = 444500, gap_y_emu: int = 381000) -> list[LayoutZone]:
														
 
															+    start_y = max(int(CONTENT_TOP_BASE), content_top_emu or int(CONTENT_TOP_BASE))
														
 
															+    zones = []
														
 
															+    for row in range(rows):
														
 
															+        for col in range(cols):
														
 
															+            x = int(MARGIN_LEFT) + col * (card_width_emu + gap_x_emu)
														
 
															+            y = start_y + row * (card_height_emu + gap_y_emu)
														
 
															+            zones.append(LayoutZone(x=x, y=y, width=card_width_emu, height=card_height_emu, zone_type='kpi_card'))
														
 
															+    return zones
														
 
															+
														
 
															+
														
 
															+def get_chart_left_zone(content_top_emu: int = None, chart_ratio: float = 0.6) -> LayoutZone:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    chart_w = int(content.width * chart_ratio) - Emu(200000)
														
 
															+    return LayoutZone(
														
 
															+        x=content.x,
														
 
															+        y=content.y,
														
 
															+        width=chart_w,
														
 
															+        height=content.height,
														
 
															+        zone_type='chart_left',
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def get_insight_right_zone(content_top_emu: int = None, chart_ratio: float = 0.6) -> LayoutZone:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    chart_w = int(content.width * chart_ratio)
														
 
															+    text_left = content.x + chart_w + Emu(200000)
														
 
															+    text_w = content.x + content.width - text_left
														
 
															+    return LayoutZone(
														
 
															+        x=text_left,
														
 
															+        y=content.y,
														
 
															+        width=text_w,
														
 
															+        height=content.height,
														
 
															+        zone_type='insight_right',
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def get_full_width_zone(content_top_emu: int = None) -> LayoutZone:
														
 
															+    return calculate_content_area(content_top_emu)
														
 
															+
														
 
															+
														
 
															+def get_two_column_zones(content_top_emu: int = None, gap_emu: int = 381000) -> tuple[LayoutZone, LayoutZone]:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    half_w = (content.width - gap_emu) // 2
														
 
															+    left = LayoutZone(x=content.x, y=content.y, width=half_w, height=content.height, zone_type='column_left')
														
 
															+    right = LayoutZone(x=content.x + half_w + gap_emu, y=content.y, width=half_w, height=content.height, zone_type='column_right')
														
 
															+    return left, right
														
 
															+
														
 
															+
														
 
															+def get_two_row_zones(content_top_emu: int = None, gap_emu: int = 381000,
														
 
															+                      top_ratio: float = 0.55) -> tuple[LayoutZone, LayoutZone]:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    top_h = int(content.height * top_ratio)
														
 
															+    top = LayoutZone(x=content.x, y=content.y, width=content.width, height=top_h, zone_type='row_top')
														
 
															+    bottom = LayoutZone(
														
 
															+        x=content.x,
														
 
															+        y=content.y + top_h + gap_emu,
														
 
															+        width=content.width,
														
 
															+        height=content.height - top_h - gap_emu,
														
 
															+        zone_type='row_bottom',
														
 
															+    )
														
 
															+    return top, bottom
														
 
															+
														
 
															+
														
 
															+def get_card_grid(n: int, content_top_emu: int = None, max_cols: int = 3) -> list[LayoutZone]:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    cols = min(max_cols, n)
														
 
															+    rows = (n + cols - 1) // cols
														
 
															+    card_w = (content.width - (cols - 1) * Emu(254000)) // cols
														
 
															+    card_h = (content.height - (rows - 1) * Emu(254000)) // rows
														
 
															+
														
 
															+    zones = []
														
 
															+    for i in range(n):
														
 
															+        col = i % cols
														
 
															+        row = i // cols
														
 
															+        x = content.x + col * (card_w + Emu(254000))
														
 
															+        y = content.y + row * (card_h + Emu(254000))
														
 
															+        zones.append(LayoutZone(x=x, y=y, width=card_w, height=card_h, zone_type=f'card_{i}'))
														
 
															+    return zones
														
 
															+
														
 
															+
														
 
															+def get_alert_card_zones(n: int, content_top_emu: int = None) -> list[LayoutZone]:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    card_h = Emu(2286000)
														
 
															+    gap = Emu(254000)
														
 
															+    return get_card_grid(n, content_top_emu, max_cols=3)
														
 
															+
														
 
															+
														
 
															+def get_issue_card_zones(n: int, content_top_emu: int = None) -> list[LayoutZone]:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    card_h = Emu(2032000)
														
 
															+    gap = Emu(254000)
														
 
															+    start_y = content.y
														
 
															+    zones = []
														
 
															+    for i in range(min(n, 3)):
														
 
															+        y = start_y + i * (card_h + gap)
														
 
															+        zones.append(LayoutZone(x=content.x, y=y, width=content.width, height=card_h, zone_type=f'issue_{i}'))
														
 
															+    return zones
														
 
															+
														
 
															+
														
 
															+def get_table_zone(content_top_emu: int = None, ratio: float = 0.5) -> LayoutZone:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    return LayoutZone(
														
 
															+        x=content.x,
														
 
															+        y=content.y + int(content.height * ratio) + Emu(200000),
														
 
															+        width=content.width,
														
 
															+        height=int(content.height * (1 - ratio)),
														
 
															+        zone_type='table_bottom',
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def detect_layout_slots(slide) -> dict:
														
 
															+    slots = {
														
 
															+        'has_header': False,
														
 
															+        'has_footer': False,
														
 
															+        'has_page_title': False,
														
 
															+        'content_top': int(CONTENT_TOP_BASE),
														
 
															+        'content_width': int(CONTENT_WIDTH),
														
 
															+        'content_height': FOOTER_TOP - int(CONTENT_TOP_BASE) - Emu(100000),
														
 
															+    }
														
 
															+    for shape in slide.shapes:
														
 
															+        if shape.has_text_frame:
														
 
															+            text = shape.text_frame.text
														
 
															+            if 'page_title' in text or '报告' in text:
														
 
															+                slots['has_page_title'] = True
														
 
															+            if '数据来源' in text:
														
 
															+                slots['has_footer'] = True
														
 
															+                slots['footer_top'] = int(shape.top)
														
 
															+        if shape.top + shape.height < Emu(1300000):
														
 
															+            slots['has_header'] = True
														
 
															+    return slots
														
 
															+
														
 
															+
														
 
															+def ensure_safe_position(shape, slide_width: int, slide_height: int) -> bool:
														
 
															+    margin = Emu(254000)
														
 
															+    adjusted = False
														
 
															+    if shape.left < 0:
														
 
															+        shape.left = margin
														
 
															+        adjusted = True
														
 
															+    if shape.top < 0:
														
 
															+        shape.top = margin
														
 
															+        adjusted = True
														
 
															+    if shape.left + shape.width > slide_width:
														
 
															+        shape.left = slide_width - shape.width - margin
														
 
															+        adjusted = True
														
 
															+    if shape.top + shape.height > slide_height:
														
 
															+        shape.top = slide_height - shape.height - margin
														
 
															+        adjusted = True
														
 
															+    return adjusted
														
 
															+
														
 
															+
														
 
															+def calculate_fill_ratio(slide, content_top_emu: int = None) -> float:
														
 
															+    content = calculate_content_area(content_top_emu)
														
 
															+    total_area = content.width * content.height
														
 
															+    if total_area <= 0:
														
 
															+        return 0.0
														
 
															+    filled_area = 0
														
 
															+    for shape in slide.shapes:
														
 
															+        sx = int(shape.left)
														
 
															+        sy = int(shape.top)
														
 
															+        sw = int(shape.width)
														
 
															+        sh = int(shape.height)
														
 
															+        if sy < content.y:
														
 
															+            continue
														
 
															+        if sy > content.y + content.height:
														
 
															+            continue
														
 
															+        overlap_x = max(0, min(sx + sw, content.x + content.width) - max(sx, content.x))
														
 
															+        overlap_y = max(0, min(sy + sh, content.y + content.height) - max(sy, content.y))
														
 
															+        filled_area += overlap_x * overlap_y
														
 
															+    return min(1.0, filled_area / total_area)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    ca = calculate_content_area()
														
 
															+    print(f"Content area: {ca.width}x{ca.height}")
														
 
															+    kpis = get_kpi_grid()
														
 
															+    for i, z in enumerate(kpis):
														
 
															+        print(f"KPI {i}: x={z.x}, y={z.y}")
														
 
															+    print(f"Fill ratio test: hypothetical")
														
--- a/generate-data-report-ppt/scripts/ppt_builder.py
+++ b/generate-data-report-ppt/scripts/ppt_builder.py
@@ -8,6 +8,7 @@ insights (title + body per paragraph) aligned with reference PPT style.
 
															 import copy
														
 
															 import os
														
 
															 import sys
														
 
															+import re as re_module
														
 
															 from pathlib import Path
														
 
															 from datetime import datetime, timedelta
														
@@ -19,13 +20,31 @@ from pptx.dml.color import RGBColor
 
															 from pptx.enum.text import PP_ALIGN
														
 
															 from pptx.enum.shapes import MSO_SHAPE
														
 
															-from data_loader import load_daily, load_weekly, load_monthly, load_date_range
														
 
															-from metrics_calculator import calc_daily_metrics, calc_weekly_metrics, calc_monthly_metrics, generate_deep_insights
														
 
															+from data_loader import (
														
 
															+    load_daily, load_weekly, load_monthly, load_date_range,
														
 
															+    load_generic_excel,
														
 
															+)
														
 
															+from metrics_calculator import (
														
 
															+    calc_daily_metrics, calc_weekly_metrics, calc_monthly_metrics, generate_deep_insights,
														
 
															+    calc_generic_metrics, calc_generic_trend, calc_generic_distribution,
														
 
															+    calc_generic_ranking, generate_generic_insights,
														
 
															+)
														
 
															 from chart_factory import (
														
 
															     add_column_chart, add_bar_chart, add_line_chart, add_doughnut_chart,
														
 
															     add_pie_chart, add_funnel_chart, add_horizontal_bar_chart,
														
 
															     add_grouped_bar_chart, add_table
														
 
															 )
														
 
															+from page_layouts import (
														
 
															+    get_kpi_grid, get_chart_left_zone, get_insight_right_zone,
														
 
															+    get_full_width_zone, get_two_column_zones,
														
 
															+)
														
 
															+from quality_inspector import QualityInspector
														
 
															+from theme_manager import theme_to_rgb_colors, get_theme
														
 
															+from report_config import (
														
 
															+    ReportConfig, PageDef, MetricDef, PeriodType, ChartType,
														
 
															+    validate_six_confirmations,
														
 
															+)
														
 
															+from quality_rules import SLIDE_WIDTH, SLIDE_HEIGHT, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP
														
 
															 # Colors — aligned with reference design theme YAML
														
 
															 C_PRIMARY = RGBColor(0x1E, 0x3A, 0x5F)
														
@@ -113,6 +132,53 @@ def _replace_all_placeholders(slide, mapping: dict):
 
															         _replace_placeholder(slide, placeholder, new_text)
														
 
															+def _remove_shape(shape):
														
 
															+    """Remove a python-pptx shape from its parent tree."""
														
 
															+    el = shape.element
														
 
															+    el.getparent().remove(el)
														
 
															+
														
 
															+
														
 
															+def _remove_empty_cover_kpi_placeholders(slide):
														
 
															+    """
														
 
															+    Remove template KPI cards when generic cover data does not provide values.
														
 
															+    This prevents empty rounded rectangles from staying on the cover.
														
 
															+    """
														
 
															+    kpi_pattern = re_module.compile(r'\{kpi\d+_(label|value)\}')
														
 
															+    placeholder_shapes = [
														
 
															+        shape for shape in slide.shapes
														
 
															+        if shape.has_text_frame and kpi_pattern.search(shape.text_frame.text or '')
														
 
															+    ]
														
 
															+    if not placeholder_shapes:
														
 
															+        return
														
 
															+
														
 
															+    x_min = min(int(shape.left) for shape in placeholder_shapes)
														
 
															+    x_max = max(int(shape.left) + int(shape.width) for shape in placeholder_shapes)
														
 
															+    y_min = min(int(shape.top) for shape in placeholder_shapes)
														
 
															+    y_max = max(int(shape.top) + int(shape.height) for shape in placeholder_shapes)
														
 
															+    pad = Emu(220000)
														
 
															+
														
 
															+    to_remove = []
														
 
															+    for shape in slide.shapes:
														
 
															+        sx = int(shape.left)
														
 
															+        sy = int(shape.top)
														
 
															+        sw = int(shape.width)
														
 
															+        sh = int(shape.height)
														
 
															+        in_region = (
														
 
															+            sx >= x_min - pad and sx + sw <= x_max + pad and
														
 
															+            sy >= y_min - pad and sy + sh <= y_max + pad
														
 
															+        )
														
 
															+        is_text_placeholder = shape in placeholder_shapes
														
 
															+        is_empty_kpi_card = (
														
 
															+            in_region and
														
 
															+            getattr(shape, 'auto_shape_type', None) == MSO_SHAPE.ROUNDED_RECTANGLE
														
 
															+        )
														
 
															+        if is_text_placeholder or is_empty_kpi_card:
														
 
															+            to_remove.append(shape)
														
 
															+
														
 
															+    for shape in to_remove:
														
 
															+        _remove_shape(shape)
														
 
															+
														
 
															+
														
 
															 # ==============================================================================
														
 
															 # NAVIGATION TABS
														
 
															 # ==============================================================================
														
@@ -903,6 +969,746 @@ def _safe_div(a, b):
 
															 # ==============================================================================
														
 
															+# DYNAMIC / UNIVERSAL REPORT BUILDER
														
 
															+# ==============================================================================
														
 
															+
														
 
															+def build_report(data_file: str, config: ReportConfig, output_path: str) -> str:
														
 
															+    master_path = config.template_path or get_master_template('daily')
														
 
															+    prs = Presentation(master_path)
														
 
															+
														
 
															+    df = load_generic_excel(data_file)
														
 
															+    if config.require_six_confirmations:
														
 
															+        confirmation_issues = validate_six_confirmations(config, list(df.columns))
														
 
															+        if confirmation_issues:
														
 
															+            raise ValueError('生成前六项确认未通过：\n- ' + '\n- '.join(confirmation_issues))
														
 
															+    profile = config.data_profiling or {}
														
 
															+
														
 
															+    colors = theme_to_rgb_colors(config.theme)
														
 
															+
														
 
															+    metrics = calc_generic_metrics(df, config)
														
 
															+
														
 
															+    content_top = _detect_content_top(prs.slides[1]) if len(prs.slides) > 1 else 1524000
														
 
															+
														
 
															+    total_pages = len([p for p in config.pages if p.selected])
														
 
															+    if total_pages == 0:
														
 
															+        total_pages = len(config.pages)
														
 
															+
														
 
															+    for page_idx, page_def in enumerate(config.pages):
														
 
															+        if not page_def.selected:
														
 
															+            continue
														
 
															+
														
 
															+        page_num = page_idx + 1
														
 
															+
														
 
															+        if page_def.page_type == 'cover':
														
 
															+            _build_cover_page(prs, config, colors)
														
 
															+        elif page_def.page_type == 'toc':
														
 
															+            _build_toc_page(prs, config, colors)
														
 
															+        elif page_def.page_type == 'kpi_overview':
														
 
															+            _build_kpi_overview_page(prs, config, metrics, colors, content_top, df, profile)
														
 
															+        elif page_def.page_type == 'trend':
														
 
															+            _build_trend_page(prs, config, df, profile, colors, content_top)
														
 
															+        elif page_def.page_type == 'distribution':
														
 
															+            _build_distribution_page(prs, config, df, profile, colors, content_top, page_def)
														
 
															+        elif page_def.page_type == 'ranking':
														
 
															+            _build_ranking_page(prs, config, df, profile, colors, content_top, page_def)
														
 
															+        elif page_def.page_type == 'summary':
														
 
															+            _build_summary_page(prs, config, metrics, profile, colors, content_top, page_def)
														
 
															+        elif page_def.page_type == 'end':
														
 
															+            _build_end_page(prs, config, colors)
														
 
															+
														
 
															+    for slide in prs.slides:
														
 
															+        _ensure_word_wrap_all(slide)
														
 
															+
														
 
															+    _delete_template_slides(prs)
														
 
															+    prs.save(output_path)
														
 
															+    print(f"Report saved: {output_path}")
														
 
															+    return output_path
														
 
															+
														
 
															+
														
 
															+def quality_assured_build(data_file: str, config: ReportConfig,
														
 
															+                          output_path: str) -> tuple:
														
 
															+    if config.require_six_confirmations:
														
 
															+        df = load_generic_excel(data_file)
														
 
															+        confirmation_issues = validate_six_confirmations(config, list(df.columns))
														
 
															+        if confirmation_issues:
														
 
															+            raise ValueError('生成前六项确认未通过：\n- ' + '\n- '.join(confirmation_issues))
														
 
															+
														
 
															+    inspector = QualityInspector(theme_to_rgb_colors(config.theme))
														
 
															+
														
 
															+    return inspector.quality_assured_build(
														
 
															+        build_fn=lambda d, c: _build_without_save(d, c, config),
														
 
															+        data=data_file,
														
 
															+        config=config,
														
 
															+        output_path=output_path,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_without_save(data_file, temp_config, original_config):
														
 
															+    from pptx import Presentation as Prs
														
 
															+    prs = Prs(get_master_template('daily'))
														
 
															+    df = load_generic_excel(data_file)
														
 
															+    profile = original_config.data_profiling or {}
														
 
															+    colors = theme_to_rgb_colors(original_config.theme)
														
 
															+    metrics = calc_generic_metrics(df, original_config)
														
 
															+    content_top = _detect_content_top(prs.slides[1]) if len(prs.slides) > 1 else 1524000
														
 
															+
														
 
															+    for page_def in original_config.pages:
														
 
															+        if not page_def.selected:
														
 
															+            continue
														
 
															+        if page_def.page_type == 'cover':
														
 
															+            _build_cover_page(prs, original_config, colors)
														
 
															+        elif page_def.page_type == 'kpi_overview':
														
 
															+            _build_kpi_overview_page(prs, original_config, metrics, colors, content_top, df, profile)
														
 
															+        elif page_def.page_type == 'trend':
														
 
															+            if not _build_trend_page(prs, original_config, df, profile, colors, content_top):
														
 
															+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
														
 
															+        elif page_def.page_type == 'distribution':
														
 
															+            if not _build_distribution_page(prs, original_config, df, profile, colors, content_top, page_def):
														
 
															+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
														
 
															+        elif page_def.page_type == 'ranking':
														
 
															+            if not _build_ranking_page(prs, original_config, df, profile, colors, content_top, page_def):
														
 
															+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
														
 
															+        elif page_def.page_type == 'summary':
														
 
															+            _build_summary_page(prs, original_config, metrics, profile, colors, content_top, page_def)
														
 
															+        elif page_def.page_type == 'end':
														
 
															+            _build_end_page(prs, original_config, colors)
														
 
															+        elif page_def.page_type == 'toc':
														
 
															+            _build_toc_page(prs, original_config, colors)
														
 
															+
														
 
															+    for slide in prs.slides:
														
 
															+        _ensure_word_wrap_all(slide)
														
 
															+    _delete_template_slides(prs)
														
 
															+    return prs
														
 
															+
														
 
															+
														
 
															+def _build_cover_page(prs, config, colors):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[0])
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{report_type}': '数据报告',
														
 
															+        '{date}': config.period_str or config.date_range[0].strftime('%Y年%m月%d日'),
														
 
															+        '{department}': config.source_label,
														
 
															+        '{period}': config.period_str,
														
 
															+        '{gen_time}': datetime.now().strftime('%Y-%m-%d %H:%M'),
														
 
															+    })
														
 
															+    _remove_empty_cover_kpi_placeholders(slide)
														
 
															+    _add_footer_if_missing(slide, f'数据来源：{config.source_label} | 1/{len(config.pages)}')
														
 
															+
														
 
															+
														
 
															+def _build_fallback_analysis_page(prs, config, page_def, df, profile, metrics, colors, content_top):
														
 
															+    """
														
 
															+    Fallback page builder: generates analysis text from available data
														
 
															+    when the primary page type cannot produce content (e.g. no time columns
														
 
															+    for trend, no category columns for distribution).
														
 
															+    Produces at least 4 deep analysis blocks with data citations.
														
 
															+    """
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    page_title = page_def.title if page_def and page_def.title else f'{config.title}数据分析'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+
														
 
															+    insight_items = []
														
 
															+
														
 
															+    if num_cols:
														
 
															+        top_metric = num_cols[0]
														
 
															+        top_name = top_metric.get('inferred_label', top_metric['column_name'])
														
 
															+        top_vals = df[top_metric['column_name']].dropna()
														
 
															+        if len(top_vals) > 0:
														
 
															+            mean_val = top_vals.mean()
														
 
															+            max_val = top_vals.max()
														
 
															+            min_val = top_vals.min()
														
 
															+            median_val = top_vals.median()
														
 
															+            total_val = top_vals.sum()
														
 
															+            insight_items.append({
														
 
															+                'title': f'{top_name}整体概览',
														
 
															+                'content': f'报告周期内，{top_name}统计数据共包含 {len(top_vals)} 条有效记录。'
														
 
															+                           f'总和为 {total_val:,.0f}，平均值为 {mean_val:,.2f}，中位数为 {median_val:,.2f}。'
														
 
															+                           f'最大值为 {max_val:,.2f}，最小值为 {min_val:,.2f}。'
														
 
															+                           f'{"数据波动范围较大，最大值与最小值差距显著，说明不同条目间差异明显，建议深入分析极端值成因" if min_val > 0 and max_val / max(min_val, 1) > 100 else "数据整体分布较为均衡，波动性在合理范围内"}。'
														
 
															+                           f'中位数与平均值的偏差反映了数据的{"右偏分布（少数大值拉高了均值），说明存在显著头部效应" if median_val < mean_val * 0.8 else "左偏分布" if median_val > mean_val * 1.2 else "较为对称，数据呈正态分布趋势"}。',
														
 
															+            })
														
 
															+
														
 
															+            insight_items.append({
														
 
															+                'title': f'{top_name}分段分析',
														
 
															+                'content': f'对 {top_name} 进行四分段统计：上四分位数（25%数据高于此值）为 {top_vals.quantile(0.75):,.2f}，'
														
 
															+                           f'下四分位数（25%数据低于此值）为 {top_vals.quantile(0.25):,.2f}，'
														
 
															+                           f'四分位距（IQR）为 {top_vals.quantile(0.75) - top_vals.quantile(0.25):,.2f}。'
														
 
															+                           f'{"IQR较大，数据分布较为离散，不同类别的表现差异明显，需关注尾部类别的提升空间" if (top_vals.quantile(0.75) - top_vals.quantile(0.25)) > abs(mean_val) * 0.5 else "IQR在合理范围内，数据集中度较好"}。'
														
 
															+                           f'建议按四分位将数据分为四组，重点跟踪上四分位组的表现，识别可复制的成功因素。',
														
 
															+            })
														
 
															+
														
 
															+    if cat_cols and num_cols:
														
 
															+        cat = cat_cols[0]
														
 
															+        cat_name = cat.get('inferred_label', cat['column_name'])
														
 
															+        num = num_cols[0]
														
 
															+        num_name = num.get('inferred_label', num['column_name'])
														
 
															+        cat_unique = df[cat['column_name']].dropna().nunique()
														
 
															+        insight_items.append({
														
 
															+            'title': f'{cat_name}分类覆盖分析',
														
 
															+            'content': f'数据共覆盖 {cat_unique} 个不同的{cat_name}，在 {num_name} 维度上呈现差异化分布。'
														
 
															+                       f'不同{cat_name}对整体{num_name}的贡献度各异，建议按贡献度大小将{cat_name}进行分类管理。'
														
 
															+                       f'高贡献类别应重点维护和深度挖掘，中等贡献类别需持续培育和资源投入，'
														
 
															+                       f'低贡献类别可评估其战略价值，适当调整投入节奏。建议建立分类分级管理体系，'
														
 
															+                       f'每月跟踪各类别的变化趋势和占比波动。',
														
 
															+        })
														
 
															+
														
 
															+    if len(num_cols) >= 2:
														
 
															+        num1 = num_cols[0]
														
 
															+        num2 = num_cols[1]
														
 
															+        ratio = df[num1['column_name']].sum() / max(df[num2['column_name']].sum(), 1)
														
 
															+        insight_items.append({
														
 
															+            'title': '关键比率与效率指标',
														
 
															+            'content': f'{num1.get("inferred_label", num1["column_name"])}与{num2.get("inferred_label", num2["column_name"])}的比率为 {ratio:.2f}，'
														
 
															+                       f'该比率是衡量业务效率的重要参考指标。'
														
 
															+                       f'{"比率处于较高水平，表明单位投入产出效率良好" if ratio > 1 else "比率偏低，单位投入的产出效益有限，存在效率提升空间"}。'
														
 
															+                       f'建议将此比率纳入定期监控指标，按月环比追踪变化趋势，'
														
 
															+                       f'并针对低比率项目制定专项提升计划，分析制约因素和可优化环节。',
														
 
															+        })
														
 
															+
														
 
															+    insight_items.append({
														
 
															+        'title': '数据质量与代表性评估',
														
 
															+        'content': f'本报告基于共 {len(df)} 条记录进行分析，数据覆盖范围包括上述多个维度。'
														
 
															+                   f'建议在后续周期中持续关注数据完整性和及时性，确保分析结果准确反映业务真实情况。'
														
 
															+                   f'对于数据量较小或集中度较高的维度，应结合业务判断进行解读，避免以偏概全。'
														
 
															+                   f'同时建议补充更多维度的数据（如时间序列数据、竞品对标数据等），'
														
 
															+                   f'以支撑更全面的分析视角和更精准的决策建议。',
														
 
															+    })
														
 
															+
														
 
															+    if not insight_items:
														
 
															+        insight_items = [{
														
 
															+            'title': '数据总览',
														
 
															+            'content': f'当前数据集包含 {len(df)} 条记录，{len(df.columns)} 个字段。'
														
 
															+                       f'数值字段 {len(num_cols)} 个，分类字段 {len(cat_cols)} 个。'
														
 
															+                       f'建议结合业务场景规划具体的数据分析维度，'
														
 
															+                       f'以生成更具洞察力和指导意义的数据报告。',
														
 
															+        }]
														
 
															+
														
 
															+    if num_cols and len(df) > 0:
														
 
															+        top_col = num_cols[0]
														
 
															+        chart_zone = get_chart_left_zone(content_top, 0.4)
														
 
															+        text_zone = get_insight_right_zone(content_top, 0.4)
														
 
															+        sample_vals = df[top_col['column_name']].dropna().head(10).tolist()
														
 
															+        sample_labels = [f'记录{i+1}' for i in range(len(sample_vals))]
														
 
															+        if sample_vals:
														
 
															+            add_bar_chart(slide, sample_labels, sample_vals,
														
 
															+                         Emu(chart_zone.x), Emu(chart_zone.y),
														
 
															+                         Emu(chart_zone.width), Emu(chart_zone.height),
														
 
															+                         series_name=top_col.get('inferred_label', top_col['column_name']),
														
 
															+                         color=colors.get('primary'))
														
 
															+        _add_structured_insight(slide, insight_items,
														
 
															+                                Emu(text_zone.x), Emu(text_zone.y),
														
 
															+                                Emu(text_zone.width), Emu(text_zone.height))
														
 
															+    else:
														
 
															+        zone = get_full_width_zone(content_top)
														
 
															+        _add_structured_insight(slide, insight_items,
														
 
															+                                Emu(zone.x), Emu(zone.y),
														
 
															+                                Emu(zone.width), Emu(zone.height))
														
 
															+
														
 
															+
														
 
															+def _build_toc_page(prs, config, colors):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    active_pages = [p for p in config.pages if p.selected and p.page_type not in ('cover', 'toc', 'end')]
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': '目录',
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': f'2/{len(config.pages)}',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+    for i, page in enumerate(active_pages[:6], 1):
														
 
															+        _replace_placeholder(slide, f'{{chapter{i}_title}}', page.title)
														
 
															+        _replace_placeholder(slide, f'{{chapter{i}_desc}}', page.conclusion_title or page.title)
														
 
															+
														
 
															+
														
 
															+def _build_kpi_overview_page(prs, config, metrics, colors, content_top, df=None, profile=None):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    page_title = '核心指标概览'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+    kpi_items = []
														
 
															+    primary_vals = {}
														
 
															+    all_vals = {}
														
 
															+    for md in config.metrics:
														
 
															+        if md.metric_type.value == 'kpi' and md.selected:
														
 
															+            val = metrics.get(md.name, 0)
														
 
															+            display_val = format(val, md.format_spec) if isinstance(val, (int, float)) else str(val)
														
 
															+            kpi_items.append({
														
 
															+                'label': md.label,
														
 
															+                'value': display_val,
														
 
															+                'unit': md.unit,
														
 
															+                'change': '',
														
 
															+                'sub': '',
														
 
															+            })
														
 
															+            if md.is_primary:
														
 
															+                primary_vals[md.label] = val
														
 
															+            all_vals[md.label] = val
														
 
															+
														
 
															+    if kpi_items:
														
 
															+        _add_kpi_cards(slide, kpi_items[:6], start_y=Emu(content_top))
														
 
															+
														
 
															+        insight_items = []
														
 
															+
														
 
															+        kpi_names = [m.label for m in config.metrics if m.selected]
														
 
															+        kpi_str = "、".join(kpi_names[:6]) if kpi_names else "各指标"
														
 
															+        primary_kpis = [m for m in config.metrics if m.is_primary and m.selected]
														
 
															+        if not primary_kpis:
														
 
															+            primary_kpis = [m for m in config.metrics if m.selected][:3]
														
 
															+
														
 
															+        kpi_detail_parts = []
														
 
															+        for i, pk in enumerate(primary_kpis):
														
 
															+            val = all_vals.get(pk.label, 0)
														
 
															+            unit_str = pk.unit if pk.unit else ''
														
 
															+            display_val = format(val, pk.format_spec) if isinstance(val, (int, float)) else str(val)
														
 
															+            kpi_detail_parts.append(f'{pk.label}: {display_val}{unit_str}')
														
 
															+
														
 
															+        insight_items.append({
														
 
															+            'title': '核心数据概览',
														
 
															+            'content': f'本期报告涵盖 {kpi_str} 共 {len(kpi_names)} 项核心指标。'
														
 
															+                       f'{"；".join(kpi_detail_parts[:4])}。'
														
 
															+                       f'其中{"、".join(p.label for p in primary_kpis[:3])}为本次分析的重点关注指标。'
														
 
															+                       f'建议将这些指标与历史同期数据进行纵向对比，以及与行业基准进行横向对标，以全面评估当前业务健康度。'
														
 
															+                       f'对于波动较大的指标，需深入追溯其背后的业务动因，判断是否为趋势性变化还是季节性波动。',
														
 
															+        })
														
 
															+
														
 
															+        cat_cols = profile.get('category_columns', []) if profile else []
														
 
															+        num_cols = profile.get('numeric_columns', []) if profile else []
														
 
															+        total_rows = profile.get('total_rows', 0) if profile else 0
														
 
															+
														
 
															+        if cat_cols:
														
 
															+            top_cats = [c.get('inferred_label', c.get('column_name', '')) for c in cat_cols[:3]]
														
 
															+            cat_details = []
														
 
															+            for c in cat_cols[:3]:
														
 
															+                uc = c.get('unique_count', 'N/A')
														
 
															+                cat_details.append(f'{c.get("inferred_label", c.get("column_name", ""))}({uc}类)')
														
 
															+            insight_items.append({
														
 
															+                'title': '数据覆盖与维度分析',
														
 
															+                'content': f'数据覆盖 {total_rows:,} 条记录，包含 {", ".join(cat_details)} 等多个分析维度。'
														
 
															+                           f'丰富的维度数据支持从 {", ".join(top_cats)} 等角度进行多维度联动分析。'
														
 
															+                           f'建议关注各维度下的数据分布特征，识别高贡献或异常的分类群体，'
														
 
															+                           f'针对性地分析不同维度的表现差异，为精细化运营和数据驱动决策提供支撑。',
														
 
															+            })
														
 
															+
														
 
															+        if len(config.metrics) >= 3:
														
 
															+            compare_items = []
														
 
															+            for a, b in zip(primary_kpis[:2], primary_kpis[1:3]):
														
 
															+                va = all_vals.get(a.label, 0)
														
 
															+                vb = all_vals.get(b.label, 0)
														
 
															+                if va and vb:
														
 
															+                    ratio = round(va / vb, 2) if vb else 0
														
 
															+                    compare_items.append(f'{a.label}与{b.label}的比值为 {ratio}')
														
 
															+            if compare_items:
														
 
															+                insight_items.append({
														
 
															+                    'title': '指标间关联分析',
														
 
															+                    'content': f'{"；".join(compare_items)}。通过指标间的比值关系可以发现数据的内在规律，'
														
 
															+                               f'比值异常偏离正常区间时需重点关注。建议进一步计算各指标与核心业务目标之间的相关系数，'
														
 
															+                               f'量化不同指标对业务目标的影响力排序，将有限资源聚焦在驱动型指标上。',
														
 
															+                })
														
 
															+            else:
														
 
															+                insight_items.append({
														
 
															+                    'title': '指标间关联分析',
														
 
															+                    'content': f'本期核心指标包括 {", ".join(p.label for p in primary_kpis[:3])}。'
														
 
															+                               f'建议通过散点图或相关系数分析探索指标间的线性/非线性关系，识别是否存在协同或对冲效应。'
														
 
															+                               f'同时建议按时间序列分析各指标的周期性规律，为资源配置和预测提供依据。',
														
 
															+                })
														
 
															+
														
 
															+        insight_items.append({
														
 
															+            'title': '关键发现与行动建议',
														
 
															+            'content': f'综合分析 {len(kpi_names)} 项指标，建议重点关注以下方向：'
														
 
															+                       f'(1) 定期监控核心指标的趋势变化，建立异常预警机制，当指标偏离正常区间时及时触发排查流程；'
														
 
															+                       f'(2) 深化多维度交叉分析，挖掘不同群体间的结构差异，识别增长机会和风险点；'
														
 
															+                       f'(3) 结合业务经验和外部数据，验证数据指标的准确性和合理性；'
														
 
															+                       f'(4) 将分析结论转化为可执行的具体行动项，明确责任人和时间节点，建立跟踪闭环机制。',
														
 
															+        })
														
 
															+
														
 
															+        kpi_rows = 2 if len(kpi_items) > 3 else 1
														
 
															+        kpi_grid_bottom = int(content_top) + Emu(3048000)
														
 
															+        if kpi_rows == 2:
														
 
															+            kpi_grid_bottom += Emu(3429000)
														
 
															+        insight_zone_y = kpi_grid_bottom + Emu(254000)
														
 
															+        remaining_height = int(FOOTER_TOP - insight_zone_y - Emu(180000))
														
 
															+        if remaining_height >= Emu(1400000):
														
 
															+            compact_items = insight_items[:2] if kpi_rows == 2 else insight_items[:3]
														
 
															+            _add_structured_insight(slide, compact_items,
														
 
															+                                    Emu(CONTENT_LEFT), Emu(insight_zone_y),
														
 
															+                                    Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(remaining_height),
														
 
															+                                    title_size=Pt(10), body_size=Pt(9), min_body_size=Pt(8))
														
 
															+
														
 
															+
														
 
															+def _build_trend_page(prs, config, df, profile, colors, content_top):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    time_cols = profile.get('time_columns', [])
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    if not time_cols or not num_cols:
														
 
															+        return False
														
 
															+
														
 
															+    time_col = time_cols[0]['column_name']
														
 
															+    metric_col = num_cols[0]['column_name']
														
 
															+    label = num_cols[0].get('inferred_label', metric_col)
														
 
															+
														
 
															+    page_title = f'{label}趋势'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+
														
 
															+    trend_data = calc_generic_trend(df, time_col, metric_col)
														
 
															+
														
 
															+    if trend_data.get('dates'):
														
 
															+        chart_zone = get_chart_left_zone(content_top, 0.6)
														
 
															+        text_zone = get_insight_right_zone(content_top, 0.6)
														
 
															+        add_line_chart(slide, trend_data['dates'], trend_data['values'],
														
 
															+                       Emu(chart_zone.x), Emu(chart_zone.y),
														
 
															+                       Emu(chart_zone.width), Emu(chart_zone.height),
														
 
															+                       series_name=label, color=colors.get('primary'))
														
 
															+
														
 
															+        dates = trend_data['dates']
														
 
															+        vals = trend_data['values']
														
 
															+        n = len(vals)
														
 
															+        first_v, last_v = vals[0], vals[-1]
														
 
															+        change = last_v - first_v
														
 
															+        change_pct = round(change / first_v * 100, 1) if first_v else 0
														
 
															+        max_v = max(vals) if vals else 0
														
 
															+        min_v = min(vals) if vals else 0
														
 
															+        max_idx = vals.index(max_v) if vals else 0
														
 
															+        min_idx = vals.index(min_v) if vals else 0
														
 
															+        peak_date = dates[max_idx] if max_idx < len(dates) else 'N/A'
														
 
															+        trough_date = dates[min_idx] if min_idx < len(dates) else 'N/A'
														
 
															+
														
 
															+        direction_text = '上升' if change > 0 else '下降' if change < 0 else '平稳'
														
 
															+        volatility = round((max_v - min_v) / (sum(vals) / n) * 100, 1) if sum(vals) else 0 if vals else 0
														
 
															+        insight_items = [
														
 
															+            {
														
 
															+                'title': f'{label}整体趋势概况',
														
 
															+                'content': f'在报告周期内共采集 {n} 个时间点的数据，{label}'
														
 
															+                           f'从 {dates[0]} 的 {first_v:,.0f} 变动至 {dates[-1]} 的 {last_v:,.0f}，'
														
 
															+                           f'整体{direction_text}{abs(change_pct):.1f}%，{direction_text}趋势{"显著" if abs(change_pct) > 20 else "温和" if abs(change_pct) > 5 else "较为平缓"}。'
														
 
															+                           f'数据变化轨迹反映出{"持续向好的增长态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和改善的积极信号" if direction_text == "上升" else "回调盘整的阶段性特征" if direction_text == "下降" else "平稳运行的基本状态"}，'
														
 
															+                           f'建议将当前趋势与业务目标和历史同期数据进行交叉对比，评估达成全年目标的可行性。如需更详尽的趋势分析，建议增加数据采集频度和时间跨度。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '峰值与谷值分析',
														
 
															+                'content': f'周期内最高值出现在 {peak_date}，为 {max_v:,.0f}；'
														
 
															+                           f'最低值出现在 {trough_date}，为 {min_v:,.0f}。'
														
 
															+                           f'极值差距 {max_v - min_v:,.0f}，波动幅度 {volatility}%，'
														
 
															+                           f'{"波动显著，需关注异常节点的驱动因素，建议排查是否受节假日、促销活动、外部政策变化等因素影响" if volatility > 30 else "波动在可控范围内，但仍需对异常波动保持警觉"}{"." if volatility > 30 else "，建立异常值的快速预警和响应机制。"}',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '趋势阶段性特征',
														
 
															+                'content': f'前半程（{dates[0]}至{dates[min(n//2, n-1)]}）'
														
 
															+                           f'{"呈上升态势" if sum(vals[:n//2]) < sum(vals[n//2:]) else "呈下降态势" if sum(vals[:n//2]) > sum(vals[n//2:]) else "基本持平"}，'
														
 
															+                           f'后半程均值为 {sum(vals[n//2:])/(n-n//2):,.0f}。建议结合业务事件节点深入分析拐点成因，'
														
 
															+                           f'重点关注是否存在季节性波动、周期性波动或外部冲击等结构性因素。'
														
 
															+                           f'若数据量较少，趋势解读应以业务经验为主，辅以数据验证。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '业务启示',
														
 
															+                'content': f'综合趋势分析，当前数据反映出{"积极向好的发展态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和稳定的运行动态" if abs(change_pct) <= 10 else "需重点关注的下行风险"}。'
														
 
															+                           f'建议{"加大资源投入以把握增长机遇，同时关注增速的可持续性，避免盲目扩张" if direction_text == "上升" else "排查下降原因并制定针对性应对措施，分析是短期波动还是长期趋势转折" if direction_text == "下降" else "保持当前运营节奏，同时关注潜在变化信号，适时调整策略" if direction_text == "平稳" else "继续观察数据走势"}。'
														
 
															+                           f'建议将数据与业务KPI目标进行对标分析，定期回顾趋势变化。',
														
 
															+            },
														
 
															+        ]
														
 
															+        _add_structured_insight(slide, insight_items,
														
 
															+                                Emu(text_zone.x), Emu(text_zone.y),
														
 
															+                                Emu(text_zone.width), Emu(text_zone.height))
														
 
															+        return True
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															+def _build_distribution_page(prs, config, df, profile, colors, content_top, page_def=None):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    if not cat_cols:
														
 
															+        return False
														
 
															+
														
 
															+    elem = (page_def.elements or [{}])[0] if page_def else {}
														
 
															+    cat_col = elem.get('category') or cat_cols[0]['column_name']
														
 
															+    cat_label = elem.get('category_label') or next(
														
 
															+        (c.get('inferred_label', cat_col) for c in cat_cols if c['column_name'] == cat_col), cat_col)
														
 
															+    metric_col = elem.get('metric') or (num_cols[0]['column_name'] if num_cols else None)
														
 
															+    metric_label = elem.get('metric_label') or (next(
														
 
															+        (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) if metric_col else '')
														
 
															+
														
 
															+    page_title = page_def.title if page_def and page_def.title else f'{cat_label}分布'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+
														
 
															+    dist = calc_generic_distribution(df, cat_col, metric_col, top_n=8)
														
 
															+
														
 
															+    if dist.get('categories'):
														
 
															+        chart_zone = get_chart_left_zone(content_top, 0.55)
														
 
															+        text_zone = get_insight_right_zone(content_top, 0.55)
														
 
															+        if len(dist['categories']) <= 8:
														
 
															+            add_doughnut_chart(slide, dist['categories'], dist['values'],
														
 
															+                              Emu(chart_zone.x), Emu(chart_zone.y),
														
 
															+                              Emu(chart_zone.width), Emu(chart_zone.height),
														
 
															+                              colors=colors.get('series'))
														
 
															+        else:
														
 
															+            add_bar_chart(slide, dist['categories'], dist['values'],
														
 
															+                         Emu(chart_zone.x), Emu(chart_zone.y),
														
 
															+                         Emu(chart_zone.width), Emu(chart_zone.height),
														
 
															+                         series_name=metric_label, color=colors.get('primary'))
														
 
															+
														
 
															+        cats, vals, pcts = dist['categories'], dist['values'], dist['percentages']
														
 
															+        grand_total = sum(vals)
														
 
															+        top3_pct = sum(pcts[:3])
														
 
															+        top1_name, top1_val, top1_pct = cats[0], vals[0], pcts[0]
														
 
															+
														
 
															+        metric_suffix = metric_label if metric_label else '数量'
														
 
															+        insight_items = [
														
 
															+            {
														
 
															+                'title': f'{cat_label}分布概况',
														
 
															+                'content': f'共有 {len(cats)} 个不同的{cat_label}，覆盖范围'
														
 
															+                           f'{"广泛" if len(cats) >= 8 else "较为丰富" if len(cats) >= 5 else "相对集中"}。'
														
 
															+                           f'前3名合计占比 {top3_pct:.1f}%，集中度'
														
 
															+                           f'{"较高，呈现显著的头部集中特征" if top3_pct > 70 else "中等，呈现梯度递减分布" if top3_pct > 50 else "较低，分布较为均衡"}。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': f'排名第一: {top1_name}',
														
 
															+                'content': f'{top1_name}以 {top1_val:,}{metric_suffix}（占比 {top1_pct:.1f}%）位居榜首，'
														
 
															+                           f'{"是第二名" + cats[1] + "的" + f"{round(top1_val/vals[1],1)}" + "倍，优势极为显著" if len(cats) > 1 else "是该维度中最重要的类别"}。'
														
 
															+                           f'该类别贡献了超过三分之一的{metric_label}，是整体业务的基本盘和核心增长极。',
														
 
															+            },
														
 
															+        ]
														
 
															+        if len(vals) >= 3:
														
 
															+            top3_sum = sum(vals[:3])
														
 
															+            tail_sum = sum(vals[3:])
														
 
															+            tail_pct = sum(pcts[3:])
														
 
															+            insight_items.append({
														
 
															+                'title': '长尾分布特征',
														
 
															+                'content': f'前三名累计 {top3_sum:,}{metric_suffix}（{top3_pct:.1f}%），'
														
 
															+                           f'剩余 {len(cats)-3} 个合计 {tail_sum:,}{metric_suffix}（{tail_pct:.1f}%），'
														
 
															+                           f'属于{"头部集中型分布" if top3_pct > 70 else "相对均衡分布" if top3_pct < 50 else "梯度递减型分布"}。'
														
 
															+                           f'头部贡献了绝大部分{metric_label}，尾部虽数量众多但单个贡献有限。',
														
 
															+            })
														
 
															+        if len(vals) > 1:
														
 
															+            avg_val = sum(vals) / len(vals)
														
 
															+            cv = round(vals[0] / avg_val, 1) if avg_val else 0
														
 
															+            median_idx = len(vals) // 2
														
 
															+            median_val = vals[median_idx]
														
 
															+            insight_items.append({
														
 
															+                'title': '差异化与离散度分析',
														
 
															+                'content': f'排名第一的{cat_label}{top1_name}的{metric_suffix}是全部分类均值的 {cv} 倍，'
														
 
															+                           f'中位数分类（第{median_idx+1}名）为 {median_val:,}{metric_suffix}，'
														
 
															+                           f'表明该维度{"差异化显著，资源集中度较高" if cv > 3 else "差异化适中，各分类间差距可控" if cv > 1.5 else "分布较为均匀"}。'
														
 
															+                           f'头部与中位数的差距反映了{cat_label}维度上的分层特征，是运营资源重点倾斜方向。',
														
 
															+            })
														
 
															+        insight_items.append({
														
 
															+            'title': '业务启示',
														
 
															+            'content': f'建议重点关注 {cats[0]} 的增量拓展与存量维护，同时深入分析排名中位类别的提升空间。'
														
 
															+                       f'对于 {metric_label}贡献较小的尾部类别（如占比低于3%的分类），可评估是否优化资源配置、'
														
 
															+                       f'调整运营策略或将资源向高回报类别倾斜。结合{cat_label}维度持续跟踪分布变化，及时把握结构性机会。',
														
 
															+        })
														
 
															+
														
 
															+        _add_structured_insight(slide, insight_items,
														
 
															+                                Emu(text_zone.x), Emu(text_zone.y),
														
 
															+                                Emu(text_zone.width), Emu(text_zone.height))
														
 
															+        return True
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															+def _build_ranking_page(prs, config, df, profile, colors, content_top, page_def=None):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    cat_cols = profile.get('category_columns', [])
														
 
															+    num_cols = profile.get('numeric_columns', [])
														
 
															+    if not cat_cols or not num_cols:
														
 
															+        return False
														
 
															+
														
 
															+    elem = (page_def.elements or [{}])[0] if page_def else {}
														
 
															+    rank_col = elem.get('category') or cat_cols[-1]['column_name']
														
 
															+    rank_label = elem.get('category_label') or next(
														
 
															+        (c.get('inferred_label', rank_col) for c in cat_cols if c['column_name'] == rank_col), rank_col)
														
 
															+    metric_col = elem.get('metric') or num_cols[0]['column_name']
														
 
															+    metric_label = elem.get('metric_label') or next(
														
 
															+        (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col)
														
 
															+
														
 
															+    page_title = page_def.title if page_def and page_def.title else f'{rank_label}TOP排行'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+
														
 
															+    ranking = calc_generic_ranking(df, rank_col, metric_col, top_n=15)
														
 
															+    if ranking:
														
 
															+        chart_zone = get_chart_left_zone(content_top, 0.6)
														
 
															+        text_zone = get_insight_right_zone(content_top, 0.6)
														
 
															+        names = [r['name'] for r in ranking]
														
 
															+        vals = [r['value'] for r in ranking]
														
 
															+        add_bar_chart(slide, names, vals,
														
 
															+                     Emu(chart_zone.x), Emu(chart_zone.y),
														
 
															+                     Emu(chart_zone.width), Emu(chart_zone.height),
														
 
															+                     series_name=metric_label, color=colors.get('primary'))
														
 
															+
														
 
															+        total_val = sum(vals)
														
 
															+        top3_names = [r['name'] for r in ranking[:3]]
														
 
															+        top3_vals = [r['value'] for r in ranking[:3]]
														
 
															+        top3_pct = [round(v / total_val * 100, 1) for v in top3_vals] if total_val else [0, 0, 0]
														
 
															+        top1_vs_last = round(vals[0] / vals[-1], 1) if len(vals) > 1 and vals[-1] > 0 else 'N/A'
														
 
															+
														
 
															+        insight_items = [
														
 
															+            {
														
 
															+                'title': f'{rank_label}TOP排行概况',
														
 
															+                'content': f'共展示 {len(ranking)} 个排名项，前3名分别为 {top3_names[0]}、{top3_names[1]}、'
														
 
															+                           f'{top3_names[2]}，累计 {sum(top3_vals):,}{metric_label}（{sum(top3_pct):.1f}%）。'
														
 
															+                           f'前三名合计贡献超过总量的三分之一，表明{rank_label}维度呈现{"显著的头部集中特征" if sum(top3_pct) > 60 else "梯度递减的分布格局" if sum(top3_pct) > 40 else "相对均衡的分布态势"}。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': f'榜首分析: {top3_names[0]}',
														
 
															+                'content': f'{top3_names[0]}以 {top3_vals[0]:,}{metric_label}（占比 {top3_pct[0]:.1f}%）位居榜首，'
														
 
															+                           f'{"是第2名" + top3_names[1] + "的" + f"{round(top3_vals[0]/top3_vals[1],1)}倍，领先优势显著" if len(ranking) > 1 and top3_vals[1] > 0 else "优势突出"}。'
														
 
															+                           f'作为排名第一的{rank_label}，其业绩表现直接影响整体业务大盘，建议重点关注其可持续增长策略。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '头部与尾部差距分析',
														
 
															+                'content': f'第1名与第{len(ranking)}名差距达 {top1_vs_last} 倍，'
														
 
															+                           f'前5名平均 {round(sum(vals[:5])/5):,}{metric_label}，'
														
 
															+                           f'后5名平均 {round(sum(vals[-5:])/5):,}{metric_label}，'
														
 
															+                           f'前后差距约 {round((sum(vals[:5])/5)/(sum(vals[-5:])/5),1) if sum(vals[-5:]) > 0 else "N/A"} 倍。'
														
 
															+                           f'{"头部效应极为明显，需关注是否因资源分配不均导致" if isinstance(top1_vs_last, float) and top1_vs_last > 10 else "差距较为显著，存在分层优化的空间" if isinstance(top1_vs_last, float) and top1_vs_last > 5 else "梯度分布相对均衡，可针对性提升各层级表现"}。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '累计贡献率与分层分析',
														
 
															+                'content': f'前5名累计贡献 {sum(vals[:5]):,}{metric_label}（{round(sum(vals[:5])/total_val*100,1) if total_val else 0}%），'
														
 
															+                           f'前10名累计贡献 {sum(vals[:10]):,}{metric_label}（{round(sum(vals[:10])/total_val*100,1) if total_val else 0}%），'
														
 
															+                           f'剩余 {len(ranking)-10} 名合计贡献 {sum(vals[10:]):,}{metric_label}（{round(sum(vals[10:])/total_val*100,1) if total_val else 0}%）。'
														
 
															+                           f'从分层结构来看，可划分为三个梯队：第一梯队（前3名）为业绩核心贡献者，第二梯队（第4-8名）为稳定输出层，'
														
 
															+                           f'第三梯队（第9名及以后）为潜力提升层。',
														
 
															+            },
														
 
															+            {
														
 
															+                'title': '业务建议',
														
 
															+                'content': f'重点关注 {", ".join(top3_names)} 的发展动态，提炼其成功经验并推广至团队。'
														
 
															+                           f'对于排名靠后的{rank_label}，可评估其增长潜力与资源匹配度，'
														
 
															+                           f'识别可突破的增量空间。建议建立{rank_label}的绩效考核与激励体系，'
														
 
															+                           f'通过标杆带动和梯队培养实现整体业绩提升。',
														
 
															+            },
														
 
															+        ]
														
 
															+        _add_structured_insight(slide, insight_items,
														
 
															+                                Emu(text_zone.x), Emu(text_zone.y),
														
 
															+                                Emu(text_zone.width), Emu(text_zone.height))
														
 
															+        return True
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															+def _build_summary_page(prs, config, metrics, profile, colors, content_top, page_def=None):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[1])
														
 
															+    page_title = page_def.title if page_def and page_def.title else '总结与建议'
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+        '{date}': config.period_str,
														
 
															+        '{page_title}': page_title,
														
 
															+        '{source}': config.source_label,
														
 
															+        '{period}': '',
														
 
															+        '{page_num}': '',
														
 
															+    })
														
 
															+
														
 
															+    elem = (page_def.elements or [{}])[0] if page_def else {}
														
 
															+
														
 
															+    if elem.get('support_status') is not None:
														
 
															+        status = elem['support_status']
														
 
															+        dept = elem.get('support_by_dept', {})
														
 
															+        sc = elem.get('support_count', 0)
														
 
															+        cc = elem.get('closed_count', 0)
														
 
															+        close_rate = round(cc / sc * 100, 1) if sc else 0
														
 
															+        fully_closed = status.get('已闭环', 0)
														
 
															+        partial_closed = status.get('部分闭环', 0)
														
 
															+        not_closed = status.get('未闭环', 0)
														
 
															+        insight_items = [{
														
 
															+            'title': '支持需求总览',
														
 
															+            'content': f'本期共产生 {sc} 项跨部门支持需求，其中已闭环 {cc} 项（含完全闭环 {fully_closed} 项、部分闭环 {partial_closed} 项），'
														
 
															+                       f'闭环率 {close_rate}%。未闭环需求 {sc - cc} 项（占比 {round((sc-cc)/sc*100,1) if sc else 0}%），'
														
 
															+                       f'闭环率{"较高，跨部门协作效率良好" if close_rate >= 60 else "处于中等水平，仍有提升空间" if close_rate >= 30 else "偏低，需重点关注闭环推动"}。'
														
 
															+                       f'跨部门支持是保障项目推进的重要环节，高效的闭环机制有助于提升客户满意度和订单转化效率。',
														
 
															+        }]
														
 
															+        if status:
														
 
															+            total_status = sum(status.values())
														
 
															+            fully_pct = round(fully_closed / total_status * 100, 1) if total_status else 0
														
 
															+            partial_pct = round(partial_closed / total_status * 100, 1) if total_status else 0
														
 
															+            not_pct = round(not_closed / total_status * 100, 1) if total_status else 0
														
 
															+            insight_items.append({
														
 
															+                'title': '闭环状态明细',
														
 
															+                'content': f'已闭环 {fully_closed} 项（{fully_pct}%）、部分闭环 {partial_closed} 项（{partial_pct}%）、'
														
 
															+                           f'未闭环 {not_closed} 项（{not_pct}%）。'
														
 
															+                           f'其中完全闭环占比{"超过七成，闭环质量较高" if fully_pct >= 70 else "处于中等水平" if fully_pct >= 40 else "偏低，需提升闭环完整性"}。'
														
 
															+                           f'部分闭环表明需求已部分满足但未完全解决，需持续跟踪至彻底闭环。',
														
 
															+            })
														
 
															+        if dept:
														
 
															+            dept_top = list(dept.items())[:5]
														
 
															+            dept_top_sum = sum(v for _, v in dept_top)
														
 
															+            dept_total = sum(dept.values())
														
 
															+            dept_str = '、'.join([f'{k}（{v}项）' for k, v in dept_top])
														
 
															+            avg_dept_load = round(dept_total / len(dept), 1) if dept else 0
														
 
															+            max_dept = dept_top[0]
														
 
															+            insight_items.append({
														
 
															+                'title': '支持部门工作量分布',
														
 
															+                'content': f'需求覆盖 {len(dept)} 个部门/科室，前5个部门承接 {dept_top_sum} 项（{round(dept_top_sum/dept_total*100,1) if dept_total else 0}%）。'
														
 
															+                           f'Top部门：{dept_str}。其中{max_dept[0]}承接最多（{max_dept[1]}项），'
														
 
															+                           f'平均每个部门承接 {avg_dept_load} 项。请关注工作量较大的部门资源分配是否充足，'
														
 
															+                           f'同时识别是否有部门长期未被分配需求（可能表明资源未充分利用）。',
														
 
															+            })
														
 
															+        if sc - cc > 0:
														
 
															+            insight_items.append({
														
 
															+                'title': '未闭环需求跟进建议',
														
 
															+                'content': f'当前仍有 {sc - cc} 项需求未完成闭环。建议按以下策略推进：第一，按紧急程度和影响范围对未闭环需求进行优先级排序，'
														
 
															+                           f'高优需求指定专人负责限期解决；第二，建立周度闭环跟踪机制，定期更新需求处理进展；'
														
 
															+                           f'第三，对于跨部门协同的复杂需求，建议指定牵头部门统筹协调推进，'
														
 
															+                           f'并建立问题升级机制（当需求超期未解决时自动升级至更高层级协调）。',
														
 
															+            })
														
 
															+        insight_items.append({
														
 
															+            'title': '闭环效率提升建议',
														
 
															+            'content': f'为持续提升支持需求闭环效率，建议：一是建立标准化的需求流转流程，明确各环节责任人和响应时限；'
														
 
															+                       f'二是定期开展闭环案例复盘，提炼最佳实践并在团队内推广；'
														
 
															+                       f'三是建立闭环率考核指标，将闭环时效纳入部门协作评价体系，'
														
 
															+                       f'通过制度保障跨部门协作的效率和质量。',
														
 
															+        })
														
 
															+    else:
														
 
															+        insight_items = generate_generic_insights(profile, metrics)
														
 
															+
														
 
															+    zone = get_full_width_zone(content_top)
														
 
															+    _add_structured_insight(slide, insight_items,
														
 
															+                            Emu(zone.x), Emu(zone.y),
														
 
															+                            Emu(zone.width), Emu(zone.height))
														
 
															+
														
 
															+
														
 
															+def _build_end_page(prs, config, colors):
														
 
															+    slide = _duplicate_slide(prs, prs.slides[3] if len(prs.slides) > 3 else prs.slides[0])
														
 
															+    total = len([p for p in config.pages if p.selected])
														
 
															+    _add_footer_if_missing(slide, f'数据来源：{config.source_label} | {total}/{total}')
														
 
															+    _replace_all_placeholders(slide, {
														
 
															+        '{report_title}': config.title,
														
 
															+    })
														
 
															+
														
 
															+
														
 
															+# ==============================================================================
														
 
															 # DAILY REPORT
														
 
															 # ==============================================================================
														
--- a/generate-data-report-ppt/scripts/quality_inspector.py
+++ b/generate-data-report-ppt/scripts/quality_inspector.py
@@ -0,0 +1,855 @@
 
															+"""
														
 
															+PPT quality inspector and auto-fix engine.
														
 
															+Inspects generated PPT for layout, visual, content, and data issues,
														
 
															+then auto-fixes them iteratively until quality threshold is met.
														
 
															+"""
														
 
															+import re
														
 
															+from pptx import Presentation
														
 
															+from pptx.util import Emu, Pt
														
 
															+from pptx.dml.color import RGBColor
														
 
															+from collections import Counter
														
 
															+
														
 
															+from quality_rules import (
														
 
															+    QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS,
														
 
															+    FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX,
														
 
															+    TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH,
														
 
															+    SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE,
														
 
															+    FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT,
														
 
															+    get_quality_label, calculate_score,
														
 
															+)
														
 
															+from page_layouts import calculate_fill_ratio, ensure_safe_position
														
 
															+
														
 
															+
														
 
															+class QualityIssue:
														
 
															+    def __init__(self, severity, category, page_index, description,
														
 
															+                 rule_id='', auto_fixable=True, fix_data=None):
														
 
															+        self.severity = severity
														
 
															+        self.category = category
														
 
															+        self.page_index = page_index
														
 
															+        self.description = description
														
 
															+        self.rule_id = rule_id
														
 
															+        self.auto_fixable = auto_fixable
														
 
															+        self.fix_data = fix_data or {}
														
 
															+
														
 
															+    def __repr__(self):
														
 
															+        return f"[{self.severity}] Page {self.page_index+1}: {self.description}"
														
 
															+
														
 
															+
														
 
															+class QualityInspector:
														
 
															+    def __init__(self, theme_colors: dict = None):
														
 
															+        self.theme_colors = theme_colors or {}
														
 
															+        self.fix_count = 0
														
 
															+        self.fix_log = []
														
 
															+
														
 
															+    def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        issues += self._check_confirmation_alignment(prs, config)
														
 
															+        for page_idx, slide in enumerate(prs.slides):
														
 
															+            page_type = self._get_page_type(page_idx, config, len(prs.slides))
														
 
															+            issues += self._check_layout(slide, page_idx)
														
 
															+            issues += self._check_visual(slide, page_idx)
														
 
															+            issues += self._check_content(slide, page_idx, config, prs, page_type)
														
 
															+            issues += self._check_data(slide, page_idx, prs)
														
 
															+        return issues
														
 
															+
														
 
															+    def _get_page_type(self, page_idx: int, config, total_slides: int) -> str:
														
 
															+        if config and hasattr(config, 'pages') and page_idx < len(config.pages):
														
 
															+            return config.pages[page_idx].page_type
														
 
															+        if page_idx == 0:
														
 
															+            return 'cover'
														
 
															+        if page_idx == total_slides - 1:
														
 
															+            return 'end'
														
 
															+        if page_idx == 1:
														
 
															+            return 'toc'
														
 
															+        return 'content'
														
 
															+
														
 
															+    def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        if not config:
														
 
															+            return issues
														
 
															+
														
 
															+        selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)]
														
 
															+        if getattr(config, 'require_six_confirmations', False):
														
 
															+            confirmation = getattr(config, 'user_confirmation', None)
														
 
															+            if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete():
														
 
															+                issues.append(QualityIssue(
														
 
															+                    'critical', 'data', -1,
														
 
															+                    '六项确认未完成，PPT 不应进入输出阶段',
														
 
															+                    'D006', False,
														
 
															+                    {'type': 'confirmation_incomplete'}
														
 
															+                ))
														
 
															+
														
 
															+        if config and getattr(config, 'page_count_range', None) and selected_pages:
														
 
															+            low, high = config.page_count_range
														
 
															+            if len(selected_pages) < low or len(selected_pages) > high:
														
 
															+                issues.append(QualityIssue(
														
 
															+                    'major', 'data', -1,
														
 
															+                    f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内',
														
 
															+                    'D006', False,
														
 
															+                    {'type': 'page_count_range'}
														
 
															+                ))
														
 
															+
														
 
															+        if config and getattr(config, 'metrics', None) and len(selected_pages) > 0:
														
 
															+            selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)]
														
 
															+            if not selected_metrics:
														
 
															+                issues.append(QualityIssue(
														
 
															+                    'critical', 'data', -1,
														
 
															+                    '未找到已确认的核心指标，无法验证输出一致性',
														
 
															+                    'D006', False,
														
 
															+                    {'type': 'missing_metrics'}
														
 
															+                ))
														
 
															+        return issues
														
 
															+
														
 
															+    def auto_fix(self, prs: Presentation, issues: list[QualityIssue]):
														
 
															+        fixable = [i for i in issues if i.auto_fixable]
														
 
															+        self.fix_count = 0
														
 
															+        self.fix_log = []
														
 
															+
														
 
															+        for issue in fixable:
														
 
															+            try:
														
 
															+                if issue.page_index < 0:
														
 
															+                    continue
														
 
															+                slide = prs.slides[issue.page_index]
														
 
															+                self._apply_fix(slide, issue, prs)
														
 
															+                self.fix_count += 1
														
 
															+            except Exception as e:
														
 
															+                self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}")
														
 
															+
														
 
															+        return self.fix_count
														
 
															+
														
 
															+    def _apply_fix(self, slide, issue, prs):
														
 
															+        category = issue.category
														
 
															+        if category == 'layout':
														
 
															+            self._fix_layout(slide, issue)
														
 
															+        elif category == 'visual':
														
 
															+            self._fix_visual(slide, issue)
														
 
															+        elif category == 'content':
														
 
															+            self._fix_content(slide, issue, prs)
														
 
															+        elif category == 'data':
														
 
															+            self._fix_data(slide, issue, prs)
														
 
															+
														
 
															+    def generate_report(self, issues: list[QualityIssue], iteration: int = 1,
														
 
															+                        total_pages: int = 0) -> str:
														
 
															+        lines = []
														
 
															+        lines.append('═' * 50)
														
 
															+        lines.append(f'  PPT 质量自检报告 (第 {iteration} 轮)')
														
 
															+        lines.append('═' * 50)
														
 
															+
														
 
															+        if not issues:
														
 
															+            lines.append('[PASS] 全部通过！未发现任何质量问题。')
														
 
															+            return '\n'.join(lines)
														
 
															+
														
 
															+        by_page = {}
														
 
															+        for iss in issues:
														
 
															+            p = iss.page_index
														
 
															+            if p not in by_page:
														
 
															+                by_page[p] = []
														
 
															+            by_page[p].append(iss)
														
 
															+
														
 
															+        for p_idx in sorted(by_page.keys()):
														
 
															+            page_issues = by_page[p_idx]
														
 
															+            sev_order = {'critical': 0, 'major': 1, 'minor': 2}
														
 
															+            page_issues.sort(key=lambda x: sev_order.get(x.severity, 3))
														
 
															+
														
 
															+            has_critical = any(i.severity == 'critical' for i in page_issues)
														
 
															+            has_major = any(i.severity == 'major' for i in page_issues)
														
 
															+
														
 
															+            if has_critical:
														
 
															+                icon = '[CRIT]'
														
 
															+            elif has_major:
														
 
															+                icon = '[MAJ]'
														
 
															+            else:
														
 
															+                icon = '[OK]'
														
 
															+
														
 
															+            lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题')
														
 
															+
														
 
															+            for iss in page_issues:
														
 
															+                sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '')
														
 
															+                status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else ''
														
 
															+                lines.append(f'  ├─ {sev_icon} {iss.description}{status}')
														
 
															+
														
 
															+        lines.append('─' * 50)
														
 
															+
														
 
															+        by_sev = Counter(i.severity for i in issues)
														
 
															+        by_cat = Counter(i.category for i in issues)
														
 
															+        fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed'))
														
 
															+        score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
														
 
															+        label = get_quality_label(score)
														
 
															+
														
 
															+        lines.append(f'总结: {len(issues)} 个问题 | '
														
 
															+                     f'{by_sev.get("critical", 0)} 严重 + '
														
 
															+                     f'{by_sev.get("major", 0)} 主要 + '
														
 
															+                     f'{by_sev.get("minor", 0)} 次要')
														
 
															+        lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个')
														
 
															+        lines.append(f'最终质量评分: {score}/100 [{label}]')
														
 
															+        lines.append('═' * 50)
														
 
															+
														
 
															+        return '\n'.join(lines)
														
 
															+
														
 
															+    def quality_assured_build(self, build_fn, data, config, output_path,
														
 
															+                              max_iterations=None, _attempt=0) -> tuple:
														
 
															+        max_iterations = max_iterations or config.max_fix_iterations
														
 
															+        total_pages = 0
														
 
															+        needs_rebuild = False
														
 
															+        rebuilt_once = False
														
 
															+        prs = None
														
 
															+
														
 
															+        for iteration in range(1, max_iterations + 1):
														
 
															+            if iteration == 1 or needs_rebuild:
														
 
															+                if needs_rebuild:
														
 
															+                    if rebuilt_once and iteration > 2:
														
 
															+                        print(f'[INFO] 已尝试重建，不再继续重建以避免无限循环')
														
 
															+                        needs_rebuild = False
														
 
															+                    else:
														
 
															+                        print(f'[REBUILD] 检测到需要重建的页面，触发重新生成...')
														
 
															+                        rebuilt_once = True
														
 
															+                        needs_rebuild = False
														
 
															+                prs = build_fn(data, config)
														
 
															+                total_pages = len(prs.slides)
														
 
															+
														
 
															+            issues = self.inspect(prs, config)
														
 
															+
														
 
															+            if not issues:
														
 
															+                print(f'[PASS] 第 {iteration} 次迭代：无问题，质量达标')
														
 
															+                break
														
 
															+
														
 
															+            by_sev = Counter(i.severity for i in issues)
														
 
															+            print(f'[INSPECT] 第 {iteration} 次自检：{by_sev.get("critical",0)} 严重 + '
														
 
															+                  f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要')
														
 
															+
														
 
															+            fixable = [i for i in issues if i.auto_fixable]
														
 
															+            self.auto_fix(prs, fixable)
														
 
															+            print(f'[FIX] 自动修复了 {self.fix_count} 个问题')
														
 
															+
														
 
															+            for issue in fixable:
														
 
															+                if issue.fix_data.get('needs_rebuild'):
														
 
															+                    needs_rebuild = True
														
 
															+                    print(f'[WARN] 检测到内容严重不足，将在下一轮迭代中重建')
														
 
															+                    break
														
 
															+
														
 
															+            unfixable = [i for i in issues if not i.auto_fixable]
														
 
															+            if unfixable:
														
 
															+                print(f'[WARN] {len(unfixable)} 个问题需人工确认')
														
 
															+
														
 
															+            remaining = self.inspect(prs, config)
														
 
															+            if not remaining:
														
 
															+                print(f'[PASS] 第 {iteration} 次修复后：所有问题已解决')
														
 
															+                break
														
 
															+
														
 
															+            has_critical = any(i.severity == 'critical' for i in remaining)
														
 
															+            has_major = any(i.severity == 'major' for i in remaining)
														
 
															+            if not has_critical and not has_major:
														
 
															+                print(f'[PASS] 第 {iteration} 次修复后：仅剩次要问题，质量达标')
														
 
															+                break
														
 
															+
														
 
															+            if needs_rebuild and iteration < max_iterations:
														
 
															+                continue
														
 
															+
														
 
															+        final_issues = self.inspect(prs, config)
														
 
															+        by_sev = Counter(i.severity for i in final_issues)
														
 
															+        by_cat = Counter(i.category for i in final_issues)
														
 
															+        score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
														
 
															+        label = get_quality_label(score)
														
 
															+
														
 
															+        report = self.generate_report(final_issues, iteration, total_pages)
														
 
															+        print(report)
														
 
															+
														
 
															+        if score >= config.quality_threshold:
														
 
															+            prs.save(output_path)
														
 
															+            print(f'[PASS] 高质量 PPT 已输出: {output_path}')
														
 
															+        else:
														
 
															+            has_critical_final = any(i.severity == 'critical' for i in final_issues)
														
 
															+            has_layout_critical = any(
														
 
															+                i.severity == 'critical' and i.category == 'layout'
														
 
															+                for i in final_issues
														
 
															+            )
														
 
															+            if has_layout_critical:
														
 
															+                raise RuntimeError(
														
 
															+                    f'PPT 存在严重布局问题（评分 {score}），无法自动修复。'
														
 
															+                    f'请检查页面配置和数据。'
														
 
															+                )
														
 
															+            prs.save(output_path)
														
 
															+            if has_critical_final:
														
 
															+                print(f'[WARN] 质量评分 {score}（低于阈值 {config.quality_threshold}），'
														
 
															+                      f'存在 {by_sev.get("critical", 0)} 个严重内容问题，建议补充分析数据后重新生成')
														
 
															+            else:
														
 
															+                print(f'[WARN] 质量评分 {score}（低于阈值 {config.quality_threshold}），已输出但建议复核')
														
 
															+
														
 
															+        return prs, final_issues
														
 
															+
														
 
															+    def _check_layout(self, slide, page_idx) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH
														
 
															+        sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT
														
 
															+
														
 
															+        for shape in slide.shapes:
														
 
															+            l, t = int(shape.left), int(shape.top)
														
 
															+            w, h = int(shape.width), int(shape.height)
														
 
															+
														
 
															+            if l < -100:
														
 
															+                issues.append(QualityIssue('critical', 'layout', page_idx,
														
 
															+                    f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})',
														
 
															+                    'L001', True, {'shape': shape, 'type': 'left'}))
														
 
															+            if l + w > sw + 500:
														
 
															+                issues.append(QualityIssue('critical', 'layout', page_idx,
														
 
															+                    f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})',
														
 
															+                    'L002', True, {'shape': shape, 'type': 'right'}))
														
 
															+            if t < -100:
														
 
															+                issues.append(QualityIssue('critical', 'layout', page_idx,
														
 
															+                    f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})',
														
 
															+                    'L003', True, {'shape': shape, 'type': 'top'}))
														
 
															+            if t + h > sh + 500:
														
 
															+                issues.append(QualityIssue('critical', 'layout', page_idx,
														
 
															+                    f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})',
														
 
															+                    'L004', True, {'shape': shape, 'type': 'bottom'}))
														
 
															+
														
 
															+            if l < SAFE_MARGIN and l >= 0:
														
 
															+                    if l == 0 and w >= sw * 0.8:
														
 
															+                        continue
														
 
															+                    if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000):
														
 
															+                        continue
														
 
															+                    if int(shape.top) > sh - Emu(500000):
														
 
															+                        continue
														
 
															+                    issues.append(QualityIssue('minor', 'layout', page_idx,
														
 
															+                        f'形状"{_shape_name(shape)}"过于靠近左边缘',
														
 
															+                        'L007', True, {'shape': shape, 'type': 'edge_left'}))
														
 
															+
														
 
															+        placeholder_pattern = re.compile(r'\{[^}]+\}')
														
 
															+        for shape in slide.shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                text = shape.text_frame.text
														
 
															+                if placeholder_pattern.search(text):
														
 
															+                    issues.append(QualityIssue('critical', 'layout', page_idx,
														
 
															+                        f'发现未替换占位符: "{text[:50]}"',
														
 
															+                        'L006', True, {'shape': shape, 'type': 'placeholder'}))
														
 
															+
														
 
															+        empty_artifacts = self._find_empty_template_artifacts(slide)
														
 
															+        for shape in empty_artifacts:
														
 
															+            issues.append(QualityIssue(
														
 
															+                'major', 'layout', page_idx,
														
 
															+                f'发现空模板组件残留: "{_shape_name(shape)}"',
														
 
															+                'L008', True, {'shape': shape, 'type': 'empty_template_artifact'}
														
 
															+            ))
														
 
															+
														
 
															+        shapes_list = list(slide.shapes)
														
 
															+        for i, a in enumerate(shapes_list):
														
 
															+            for b in shapes_list[i+1:]:
														
 
															+                if self._shapes_overlap(a, b):
														
 
															+                    a_name = _shape_name(a)
														
 
															+                    b_name = _shape_name(b)
														
 
															+                    if self._is_intentional_overlap(a, b):
														
 
															+                        continue
														
 
															+                    issues.append(QualityIssue('major', 'layout', page_idx,
														
 
															+                        f'形状"{a_name}"与"{b_name}"存在重叠',
														
 
															+                        'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'}))
														
 
															+
														
 
															+        return issues
														
 
															+
														
 
															+    def _check_visual(self, slide, page_idx) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        fonts_seen = {}
														
 
															+
														
 
															+        for shape in slide.shapes:
														
 
															+            if not shape.has_text_frame:
														
 
															+                continue
														
 
															+            for para in shape.text_frame.paragraphs:
														
 
															+                for run in para.runs:
														
 
															+                    if run.font.size:
														
 
															+                        size_pt = run.font.size / 12700.0
														
 
															+                        if size_pt < 6:
														
 
															+                            issues.append(QualityIssue('major', 'visual', page_idx,
														
 
															+                                f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"',
														
 
															+                                'V002', True, {'run': run, 'type': 'font_small'}))
														
 
															+                        elif size_pt > 65:
														
 
															+                            issues.append(QualityIssue('major', 'visual', page_idx,
														
 
															+                                f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"',
														
 
															+                                'V003', True, {'run': run, 'type': 'font_large'}))
														
 
															+
														
 
															+                    if run.font.name:
														
 
															+                        fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1
														
 
															+
														
 
															+        if len(fonts_seen) > 3:
														
 
															+            issues.append(QualityIssue('minor', 'visual', page_idx,
														
 
															+                f'字体使用超过3种: {list(fonts_seen.keys())}',
														
 
															+                'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen}))
														
 
															+
														
 
															+        return issues
														
 
															+
														
 
															+    def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+
														
 
															+        if page_type in ('cover', 'end'):
														
 
															+            return issues
														
 
															+
														
 
															+        issues += self._check_dynamic_page_fit(page_idx, page_type, config)
														
 
															+        issues += self._check_core_metric_presence(slide, page_idx, page_type, config)
														
 
															+
														
 
															+        if page_type == 'toc':
														
 
															+            content_shapes = [s for s in slide.shapes
														
 
															+                             if s.has_text_frame and _is_in_content_area(s)]
														
 
															+            all_content_text = ''
														
 
															+            for shape in content_shapes:
														
 
															+                text = shape.text_frame.text.strip()
														
 
															+                if text:
														
 
															+                    all_content_text += text + '\n'
														
 
															+            if len(all_content_text.strip()) < 30:
														
 
															+                issues.append(QualityIssue('minor', 'content', page_idx,
														
 
															+                    '目录页内容过少',
														
 
															+                    'C008', False, {'type': 'empty_page'}))
														
 
															+            return issues
														
 
															+
														
 
															+        fill_ratio = calculate_fill_ratio(slide)
														
 
															+
														
 
															+        if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary'):
														
 
															+            if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']:
														
 
															+                issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                    f'页面内容严重不足，填充率仅 {fill_ratio:.1%}，必须补充图表和分析文本',
														
 
															+                    'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
														
 
															+            elif fill_ratio < FILL_RATIO_THRESHOLDS['low']:
														
 
															+                issues.append(QualityIssue('major', 'content', page_idx,
														
 
															+                    f'页面留白偏多，填充率 {fill_ratio:.1%}，需补充分析内容',
														
 
															+                    'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
														
 
															+        elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2:
														
 
															+            issues.append(QualityIssue('minor', 'content', page_idx,
														
 
															+                f'页面填充率过低 {fill_ratio:.1%}',
														
 
															+                'C001', False))
														
 
															+
														
 
															+        content_shapes = [s for s in slide.shapes
														
 
															+                         if s.has_text_frame and _is_in_content_area(s)]
														
 
															+        all_content_text = ''
														
 
															+        insight_blocks = 0
														
 
															+        for shape in content_shapes:
														
 
															+            tf = shape.text_frame
														
 
															+            full_text = tf.text.strip()
														
 
															+            if not full_text:
														
 
															+                continue
														
 
															+            all_content_text += full_text + '\n'
														
 
															+            for para in tf.paragraphs:
														
 
															+                para_text = para.text.strip()
														
 
															+                if para_text and len(para_text) >= TEXT_MIN_LENGTH:
														
 
															+                    insight_blocks += 1
														
 
															+
														
 
															+        total_content_chars = len(all_content_text.strip())
														
 
															+
														
 
															+        text_lengths = [len(p.text.strip()) for s in content_shapes
														
 
															+                        for p in s.text_frame.paragraphs if p.text.strip()]
														
 
															+
														
 
															+        if total_content_chars < PAGE_MIN_TEXT_LENGTH:
														
 
															+            issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                f'页面内容为空！所有文本框总字数仅 {total_content_chars} 字（要求≥{PAGE_MIN_TEXT_LENGTH}字）',
														
 
															+                'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
														
 
															+        elif total_content_chars < 200:
														
 
															+            issues.append(QualityIssue('major', 'content', page_idx,
														
 
															+                f'页面内容过少，总字数仅 {total_content_chars} 字，分析深度严重不足',
														
 
															+                'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
														
 
															+
														
 
															+        if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH:
														
 
															+            issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                f'分析文本过短（最长为 {max(text_lengths)} 字），需撰写≥{TEXT_MIN_LENGTH}字的深度分析',
														
 
															+                'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)}))
														
 
															+
														
 
															+        if insight_blocks < INSIGHT_MIN_COUNT:
														
 
															+            issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                f'分析段数不足，仅 {insight_blocks} 段（要求≥{INSIGHT_MIN_COUNT}段）',
														
 
															+                'C007', True, {'type': 'insight_count', 'count': insight_blocks}))
														
 
															+
														
 
															+        has_title = False
														
 
															+        for shape in slide.shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                text = shape.text_frame.text
														
 
															+                try:
														
 
															+                    sy = int(shape.top)
														
 
															+                except Exception:
														
 
															+                    sy = 99999999
														
 
															+                if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000):
														
 
															+                    if len(text.strip()) > 0 and not text.startswith('{'):
														
 
															+                        has_title = True
														
 
															+                        break
														
 
															+                if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结',
														
 
															+                                              '排行', '报告', '建议', '告警', '要点']):
														
 
															+                    if sy < CONTENT_TOP_BASE + Emu(400000):
														
 
															+                        has_title = True
														
 
															+                        break
														
 
															+
														
 
															+        if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1:
														
 
															+            issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                '页面缺少标题', 'C006', True, {'type': 'missing_title'}))
														
 
															+
														
 
															+        for shape in slide.shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                if self._is_text_overflowing(shape):
														
 
															+                    issues.append(QualityIssue('major', 'content', page_idx,
														
 
															+                        f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"',
														
 
															+                        'C004', True, {'shape': shape, 'type': 'text_overflow'}))
														
 
															+
														
 
															+        has_chart = False
														
 
															+        for shape in slide.shapes:
														
 
															+            if shape.has_chart:
														
 
															+                has_chart = True
														
 
															+                break
														
 
															+
														
 
															+        if has_chart and insight_blocks == 0 and page_idx >= 2:
														
 
															+            issues.append(QualityIssue('critical', 'content', page_idx,
														
 
															+                '页面有图表但完全缺少分析文本，图表数据需要被解读和说明',
														
 
															+                'C009', True, {'type': 'chart_no_text'}))
														
 
															+
														
 
															+        return issues
														
 
															+
														
 
															+    def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        profile = getattr(config, 'data_profiling', None) or {}
														
 
															+        if not profile:
														
 
															+            return issues
														
 
															+
														
 
															+        time_cols = profile.get('time_columns', [])
														
 
															+        cat_cols = profile.get('category_columns', [])
														
 
															+        num_cols = profile.get('numeric_columns', [])
														
 
															+
														
 
															+        if page_type == 'trend' and (not time_cols or not num_cols):
														
 
															+            issues.append(QualityIssue(
														
 
															+                'critical', 'content', page_idx,
														
 
															+                '趋势页缺少可用时间列或数值列，需要重建或降级为摘要页',
														
 
															+                'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
														
 
															+            ))
														
 
															+        elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols):
														
 
															+            issues.append(QualityIssue(
														
 
															+                'critical', 'content', page_idx,
														
 
															+                f'{page_type} 页缺少分类维度或数值列，需要重建或降级为摘要页',
														
 
															+                'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
														
 
															+            ))
														
 
															+        elif page_type == 'kpi_overview':
														
 
															+            selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
														
 
															+            if len(selected_metrics) > 6:
														
 
															+                issues.append(QualityIssue(
														
 
															+                    'major', 'content', page_idx,
														
 
															+                    f'核心指标数量 {len(selected_metrics)} 超过 6 个，KPI页应拆页或改为紧凑布局',
														
 
															+                    'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)}
														
 
															+                ))
														
 
															+        return issues
														
 
															+
														
 
															+    def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+        if page_type != 'kpi_overview' or not config:
														
 
															+            return issues
														
 
															+        selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
														
 
															+        if not selected_metrics:
														
 
															+            return issues
														
 
															+        slide_text = '\n'.join(
														
 
															+            shape.text_frame.text for shape in slide.shapes
														
 
															+            if shape.has_text_frame and shape.text_frame.text
														
 
															+        )
														
 
															+        missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text]
														
 
															+        if missing:
														
 
															+            issues.append(QualityIssue(
														
 
															+                'critical', 'data', page_idx,
														
 
															+                'KPI概览页缺少已确认核心指标：' + '、'.join(missing),
														
 
															+                'D006', True, {'type': 'core_metric_missing', 'missing': missing}
														
 
															+            ))
														
 
															+        return issues
														
 
															+
														
 
															+    def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]:
														
 
															+        issues = []
														
 
															+
														
 
															+        if page_idx == 0:
														
 
															+            return issues
														
 
															+
														
 
															+        for shape in slide.shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                text = shape.text_frame.text
														
 
															+
														
 
															+                page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text)
														
 
															+                if page_pattern:
														
 
															+                    current = int(page_pattern.group(1))
														
 
															+                    total = int(page_pattern.group(2))
														
 
															+                    if total == 0:
														
 
															+                        issues.append(QualityIssue('major', 'data', page_idx,
														
 
															+                            f'页码格式异常: {text.strip()}',
														
 
															+                            'D002', True, {'type': 'page_num'}))
														
 
															+
														
 
															+        return issues
														
 
															+
														
 
															+    def _fix_layout(self, slide, issue):
														
 
															+        fd = issue.fix_data
														
 
															+        if fd.get('type') in ('left', 'right', 'top', 'bottom'):
														
 
															+            shape = fd.get('shape')
														
 
															+            if shape:
														
 
															+                ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT)
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'overlap':
														
 
															+            a, b = fd.get('shape_a'), fd.get('shape_b')
														
 
															+            if a and b:
														
 
															+                try:
														
 
															+                    if int(b.left) < int(a.left) + int(a.width) + Emu(50000):
														
 
															+                        b.left = int(a.left) + int(a.width) + Emu(152400)
														
 
															+                        ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT)
														
 
															+                except Exception:
														
 
															+                    pass
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'placeholder':
														
 
															+            shape = fd.get('shape')
														
 
															+            if shape and shape.has_text_frame:
														
 
															+                for para in shape.text_frame.paragraphs:
														
 
															+                    para.text = re.sub(r'\{[^}]+\}', '', para.text)
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'edge_left':
														
 
															+            shape = fd.get('shape')
														
 
															+            if shape:
														
 
															+                try:
														
 
															+                    w = int(shape.width)
														
 
															+                    if w < SLIDE_WIDTH * 0.5:
														
 
															+                        shape.left = SAFE_MARGIN
														
 
															+                except Exception:
														
 
															+                    pass
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'empty_template_artifact':
														
 
															+            shape = fd.get('shape')
														
 
															+            if shape:
														
 
															+                self._remove_shape(shape)
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+    def _fix_visual(self, slide, issue):
														
 
															+        fd = issue.fix_data
														
 
															+        if fd.get('type') == 'font_small':
														
 
															+            run = fd.get('run')
														
 
															+            if run:
														
 
															+                run.font.size = FONT_SIZE_MIN
														
 
															+                fd['fixed'] = True
														
 
															+        elif fd.get('type') == 'font_large':
														
 
															+            run = fd.get('run')
														
 
															+            if run:
														
 
															+                run.font.size = FONT_SIZE_MAX
														
 
															+                fd['fixed'] = True
														
 
															+        elif fd.get('type') == 'font_mixed':
														
 
															+            for shape in slide.shapes:
														
 
															+                if shape.has_text_frame:
														
 
															+                    for para in shape.text_frame.paragraphs:
														
 
															+                        for run in para.runs:
														
 
															+                            run.font.name = DEFAULT_FONT
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+    def _fix_content(self, slide, issue, prs):
														
 
															+        fd = issue.fix_data
														
 
															+        if fd.get('type') == 'sparse':
														
 
															+            fill_ratio = fd.get('fill_ratio', 0)
														
 
															+            if fill_ratio < FILL_RATIO_THRESHOLDS['low']:
														
 
															+                try:
														
 
															+                    box = slide.shapes.add_textbox(
														
 
															+                        CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)),
														
 
															+                        Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000))
														
 
															+                    tf = box.text_frame
														
 
															+                    tf.word_wrap = True
														
 
															+                    p = tf.paragraphs[0]
														
 
															+                    p.text = (
														
 
															+                        '[WARNING] 此页面内容不足，需补充深度分析内容。'
														
 
															+                        '分析应包含：具体数据引用（含数值和单位）、'
														
 
															+                        '与同类/历史/目标的对比分析、'
														
 
															+                        '数据背后原因的至少2条解读、'
														
 
															+                        '以及可执行的业务行动建议。'
														
 
															+                        '请勿使用"要加强"、"进一步优化"等模糊措辞。'
														
 
															+                    )
														
 
															+                    p.font.size = Pt(12)
														
 
															+                    p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00)
														
 
															+                    p.font.name = DEFAULT_FONT
														
 
															+                    p.font.bold = True
														
 
															+                    fd['fixed'] = True
														
 
															+                    fd['needs_rebuild'] = True
														
 
															+                except Exception:
														
 
															+                    pass
														
 
															+
														
 
															+        elif fd.get('type') == 'empty_page':
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'chart_no_text':
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'insight_count':
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'short_text':
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'):
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'core_metric_missing':
														
 
															+            fd['needs_rebuild'] = True
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+        elif fd.get('type') == 'missing_title':
														
 
															+            try:
														
 
															+                box = slide.shapes.add_textbox(
														
 
															+                    CONTENT_LEFT, Emu(914400),
														
 
															+                    Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000))
														
 
															+                p = box.text_frame.paragraphs[0]
														
 
															+                p.text = '数据详情'
														
 
															+                p.font.size = Pt(24)
														
 
															+                p.font.bold = True
														
 
															+                p.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
														
 
															+                p.font.name = DEFAULT_FONT
														
 
															+                fd['fixed'] = True
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+
														
 
															+        elif fd.get('type') == 'text_overflow':
														
 
															+            shape = fd.get('shape')
														
 
															+            if shape and shape.has_text_frame:
														
 
															+                text_len = len(shape.text_frame.text or '')
														
 
															+                try:
														
 
															+                    if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000):
														
 
															+                        fd['needs_rebuild'] = True
														
 
															+                    else:
														
 
															+                        for para in shape.text_frame.paragraphs:
														
 
															+                            for run in para.runs:
														
 
															+                                if run.font.size and run.font.size > Pt(9):
														
 
															+                                    run.font.size = Pt(9)
														
 
															+                except Exception:
														
 
															+                    fd['needs_rebuild'] = True
														
 
															+                fd['fixed'] = True
														
 
															+
														
 
															+    def _fix_data(self, slide, issue, prs):
														
 
															+        fd = issue.fix_data
														
 
															+        if fd.get('type') == 'page_num':
														
 
															+            fd['fixed'] = True
														
 
															+
														
 
															+    def _shapes_overlap(self, a, b) -> bool:
														
 
															+        ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height)
														
 
															+        bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height)
														
 
															+
														
 
															+        if ax + aw <= bx or bx + bw <= ax:
														
 
															+            return False
														
 
															+        if ay + ah <= by or by + bh <= ay:
														
 
															+            return False
														
 
															+        return True
														
 
															+
														
 
															+    def _is_intentional_overlap(self, a, b) -> bool:
														
 
															+        if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'):
														
 
															+            return True
														
 
															+        a_area = int(a.width) * int(a.height)
														
 
															+        b_area = int(b.width) * int(b.height)
														
 
															+        if a_area > b_area * 3 or b_area > a_area * 3:
														
 
															+            return True
														
 
															+        return False
														
 
															+
														
 
															+    def _is_title_shape(self, shape) -> bool:
														
 
															+        if not shape.has_text_frame:
														
 
															+            return False
														
 
															+        try:
														
 
															+            y = int(shape.top)
														
 
															+            return y < int(CONTENT_TOP_BASE) + Emu(200000)
														
 
															+        except Exception:
														
 
															+            return False
														
 
															+
														
 
															+    def _find_empty_template_artifacts(self, slide) -> list:
														
 
															+        artifacts = []
														
 
															+        shapes = list(slide.shapes)
														
 
															+        empty_text_boxes = []
														
 
															+
														
 
															+        for shape in shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                text = (shape.text_frame.text or '').strip()
														
 
															+                if text:
														
 
															+                    continue
														
 
															+                if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000):
														
 
															+                    continue
														
 
															+                if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000):
														
 
															+                    continue
														
 
															+                empty_text_boxes.append(shape)
														
 
															+                artifacts.append(shape)
														
 
															+
														
 
															+        for shape in shapes:
														
 
															+            if shape.has_text_frame:
														
 
															+                continue
														
 
															+            try:
														
 
															+                is_large_soft_card = (
														
 
															+                    int(shape.width) >= Emu(1000000) and
														
 
															+                    int(shape.height) >= Emu(500000) and
														
 
															+                    int(shape.top) < int(FOOTER_TOP) - Emu(400000)
														
 
															+                )
														
 
															+                if not is_large_soft_card:
														
 
															+                    continue
														
 
															+                overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes)
														
 
															+                if overlaps_empty_text:
														
 
															+                    artifacts.append(shape)
														
 
															+            except Exception:
														
 
															+                continue
														
 
															+
														
 
															+        # Preserve order while de-duplicating.
														
 
															+        seen = set()
														
 
															+        unique = []
														
 
															+        for shape in artifacts:
														
 
															+            key = id(shape)
														
 
															+            if key not in seen:
														
 
															+                unique.append(shape)
														
 
															+                seen.add(key)
														
 
															+        return unique
														
 
															+
														
 
															+    def _remove_shape(self, shape):
														
 
															+        el = shape.element
														
 
															+        el.getparent().remove(el)
														
 
															+
														
 
															+    def _is_text_overflowing(self, shape) -> bool:
														
 
															+        if not shape.has_text_frame:
														
 
															+            return False
														
 
															+        text = shape.text_frame.text
														
 
															+        if not text.strip():
														
 
															+            return False
														
 
															+        if len(text) > 800:
														
 
															+            return True
														
 
															+        try:
														
 
															+            w = int(shape.width)
														
 
															+            h = int(shape.height)
														
 
															+            width_pt = max(1, w / 12700.0)
														
 
															+            max_font_pt = 10
														
 
															+            para_count = 0
														
 
															+            for para in shape.text_frame.paragraphs:
														
 
															+                if not para.text.strip():
														
 
															+                    continue
														
 
															+                para_count += 1
														
 
															+                for run in para.runs:
														
 
															+                    if run.font.size:
														
 
															+                        max_font_pt = max(max_font_pt, run.font.size / 12700.0)
														
 
															+            chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15)))
														
 
															+            est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line)
														
 
															+            est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700)
														
 
															+            if est_height > h * 1.15:
														
 
															+                return True
														
 
															+            if h < Emu(200000) and len(text) > 80:
														
 
															+                return True
														
 
															+        except Exception:
														
 
															+            pass
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def _shape_name(shape):
														
 
															+    try:
														
 
															+        if shape.has_text_frame:
														
 
															+            return shape.text_frame.text[:20].replace('\n', ' ')
														
 
															+    except Exception:
														
 
															+        pass
														
 
															+    try:
														
 
															+        return shape.shape_type
														
 
															+    except Exception:
														
 
															+        pass
														
 
															+    return '无名形状'
														
 
															+
														
 
															+
														
 
															+def _is_in_content_area(shape):
														
 
															+    try:
														
 
															+        return int(shape.top) >= int(CONTENT_TOP_BASE)
														
 
															+    except Exception:
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    print("QualityInspector module loaded")
														
 
															+    inspector = QualityInspector()
														
 
															+    print("Ready to inspect PPT files")
														
--- a/generate-data-report-ppt/scripts/quality_rules.py
+++ b/generate-data-report-ppt/scripts/quality_rules.py
@@ -0,0 +1,141 @@
 
															+"""
														
 
															+Quality inspection rule definitions for PPT quality assurance.
														
 
															+Each rule defines a check function, severity level, and auto-fix strategy.
														
 
															+"""
														
 
															+from pptx.util import Emu, Pt
														
 
															+from dataclasses import dataclass, field
														
 
															+from typing import Callable, Optional
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class QualityRule:
														
 
															+    rule_id: str
														
 
															+    category: str
														
 
															+    description: str
														
 
															+    severity: str
														
 
															+    auto_fixable: bool
														
 
															+    check_fn: str
														
 
															+    fix_fn: str
														
 
															+
														
 
															+
														
 
															+QUALITY_RULES = [
														
 
															+    QualityRule('L001', 'layout', '元素飞出页面左边界', 'critical', True, '_check_left_bounds', '_fix_left_bounds'),
														
 
															+    QualityRule('L002', 'layout', '元素飞出页面右边界', 'critical', True, '_check_right_bounds', '_fix_right_bounds'),
														
 
															+    QualityRule('L003', 'layout', '元素飞出页面顶部', 'critical', True, '_check_top_bounds', '_fix_top_bounds'),
														
 
															+    QualityRule('L004', 'layout', '元素飞出页面底部', 'critical', True, '_check_bottom_bounds', '_fix_bottom_bounds'),
														
 
															+    QualityRule('L005', 'layout', '图文重叠', 'critical', True, '_check_overlap', '_fix_overlap'),
														
 
															+    QualityRule('L006', 'layout', '占位符未替换', 'critical', True, '_check_placeholders', '_fix_placeholders'),
														
 
															+    QualityRule('L007', 'layout', '元素紧贴页面边缘', 'minor', True, '_check_edge_proximity', '_fix_edge_proximity'),
														
 
															+
														
 
															+    QualityRule('V001', 'visual', '字体不一致', 'minor', True, '_check_font_consistency', '_fix_font_consistency'),
														
 
															+    QualityRule('V002', 'visual', '字号过小(<8pt)', 'major', True, '_check_font_too_small', '_fix_font_too_small'),
														
 
															+    QualityRule('V003', 'visual', '字号过大(>60pt)', 'major', True, '_check_font_too_large', '_fix_font_too_large'),
														
 
															+    QualityRule('V004', 'visual', '颜色对比度不足', 'major', True, '_check_contrast', '_fix_contrast'),
														
 
															+    QualityRule('V005', 'visual', '图片拉伸变形', 'major', True, '_check_image_aspect', '_fix_image_aspect'),
														
 
															+
														
 
															+    QualityRule('C001', 'content', '页面留白过多(填充率<35%)', 'critical', True, '_check_sparse_page', '_fix_sparse_page'),
														
 
															+    QualityRule('C002', 'content', 'KPI卡片数值为空', 'critical', True, '_check_empty_kpi', '_fix_empty_kpi'),
														
 
															+    QualityRule('C003', 'content', '图表无数据', 'critical', True, '_check_empty_chart', '_fix_empty_chart'),
														
 
															+    QualityRule('C004', 'content', '文本截断溢出', 'major', True, '_check_text_overflow', '_fix_text_overflow'),
														
 
															+    QualityRule('C005', 'content', '分析文本过短(<100字)', 'critical', True, '_check_short_text', '_fix_short_text'),
														
 
															+    QualityRule('C006', 'content', '页面缺少标题', 'critical', True, '_check_missing_title', '_fix_missing_title'),
														
 
															+    QualityRule('C007', 'content', '分析段数不足', 'critical', True, '_check_insight_count', '_fix_insight_count'),
														
 
															+    QualityRule('C008', 'content', '页面内容为空(<50字)', 'critical', True, '_check_empty_page', '_fix_empty_page'),
														
 
															+    QualityRule('C009', 'content', '图表缺少分析文本', 'critical', True, '_check_chart_no_text', '_fix_chart_no_text'),
														
 
															+
														
 
															+    QualityRule('D001', 'data', '图表数据与文本矛盾', 'critical', False, '_check_data_text_contradiction', None),
														
 
															+    QualityRule('D002', 'data', '页码错乱', 'major', True, '_check_page_numbers', '_fix_page_numbers'),
														
 
															+    QualityRule('D003', 'data', '数据来源缺失', 'major', True, '_check_missing_source', '_fix_missing_source'),
														
 
															+    QualityRule('D004', 'data', '表格列宽不合理', 'minor', True, '_check_table_column_width', '_fix_table_column_width'),
														
 
															+    QualityRule('D005', 'data', '图表刻度异常', 'minor', True, '_check_axis_scale', '_fix_axis_scale'),
														
 
															+]
														
 
															+
														
 
															+QUALITY_RULES.extend([
														
 
															+    QualityRule('L008', 'layout', '空模板组件残留', 'major', True,
														
 
															+                '_check_empty_template_artifacts', '_fix_empty_template_artifacts'),
														
 
															+    QualityRule('C010', 'content', '动态页面与数据画像不匹配', 'critical', True,
														
 
															+                '_check_dynamic_page_fit', '_fix_rebuild_page'),
														
 
															+    QualityRule('C011', 'content', 'KPI布局容量不足', 'major', True,
														
 
															+                '_check_kpi_layout_capacity', '_fix_rebuild_page'),
														
 
															+    QualityRule('D006', 'data', '六项确认与输出不一致', 'critical', True,
														
 
															+                '_check_confirmation_alignment', '_fix_rebuild_page'),
														
 
															+])
														
 
															+
														
 
															+
														
 
															+SEVERITY_WEIGHTS = {
														
 
															+    'critical': 20,
														
 
															+    'major': 10,
														
 
															+    'minor': 3,
														
 
															+}
														
 
															+
														
 
															+CATEGORY_WEIGHTS = {
														
 
															+    'layout': 0.30,
														
 
															+    'visual': 0.25,
														
 
															+    'content': 0.25,
														
 
															+    'data': 0.20,
														
 
															+}
														
 
															+
														
 
															+FILL_RATIO_THRESHOLDS = {
														
 
															+    'sparse': 0.20,
														
 
															+    'low': 0.35,
														
 
															+    'acceptable': 0.55,
														
 
															+    'good': 0.70,
														
 
															+}
														
 
															+
														
 
															+FONT_SIZE_MIN = Pt(8)
														
 
															+FONT_SIZE_MAX = Pt(60)
														
 
															+TEXT_MIN_LENGTH = 80
														
 
															+INSIGHT_MIN_COUNT = 2
														
 
															+PAGE_MIN_TEXT_LENGTH = 50
														
 
															+SAFE_MARGIN = Emu(762000)
														
 
															+CONTENT_LEFT = Emu(762000)
														
 
															+CONTENT_TOP_BASE = Emu(1524000)
														
 
															+FOOTER_TOP = Emu(8824000)
														
 
															+SLIDE_WIDTH = 16256000
														
 
															+SLIDE_HEIGHT = 9144000
														
 
															+
														
 
															+DEFAULT_FONT = '微软雅黑'
														
 
															+DEFAULT_NUMBER_FONT = 'Arial'
														
 
															+
														
 
															+
														
 
															+def get_rules_by_category(category: str) -> list[QualityRule]:
														
 
															+    return [r for r in QUALITY_RULES if r.category == category]
														
 
															+
														
 
															+
														
 
															+def get_rules_by_severity(severity: str) -> list[QualityRule]:
														
 
															+    return [r for r in QUALITY_RULES if r.severity == severity]
														
 
															+
														
 
															+
														
 
															+def calculate_score(issues_by_severity: dict, issues_by_category: dict, total_pages: int) -> int:
														
 
															+    if total_pages <= 0:
														
 
															+        return 100
														
 
															+
														
 
															+    penalty = 0
														
 
															+    for sev, count in issues_by_severity.items():
														
 
															+        weight = SEVERITY_WEIGHTS.get(sev, 5)
														
 
															+        penalty += count * weight
														
 
															+
														
 
															+    per_page_penalty = min(penalty / total_pages, 80)
														
 
															+    score = max(0, 100 - per_page_penalty)
														
 
															+    return int(score)
														
 
															+
														
 
															+
														
 
															+def get_quality_label(score: int) -> str:
														
 
															+    if score >= 90:
														
 
															+        return '优质'
														
 
															+    elif score >= 75:
														
 
															+        return '良好'
														
 
															+    elif score >= 60:
														
 
															+        return '待改善'
														
 
															+    else:
														
 
															+        return '不合格'
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    print(f"Loaded {len(QUALITY_RULES)} quality rules")
														
 
															+    for cat in ['layout', 'visual', 'content', 'data']:
														
 
															+        rules = get_rules_by_category(cat)
														
 
															+        print(f"  {cat}: {len(rules)} rules")
														
 
															+    for sev in ['critical', 'major', 'minor']:
														
 
															+        rules = get_rules_by_severity(sev)
														
 
															+        print(f"  {sev}: {len(rules)} rules")
														
--- a/generate-data-report-ppt/scripts/report_config.py
+++ b/generate-data-report-ppt/scripts/report_config.py
@@ -0,0 +1,255 @@
 
															+"""
														
 
															+Report configuration data models for the universal data report generator.
														
 
															+Defines ReportConfig, MetricDef, PageDef, ThemeConfig, and related enums.
														
 
															+"""
														
 
															+from dataclasses import dataclass, field
														
 
															+from enum import Enum
														
 
															+from datetime import date
														
 
															+from typing import Optional
														
 
															+
														
 
															+
														
 
															+class PeriodType(str, Enum):
														
 
															+    DAILY = 'daily'
														
 
															+    WEEKLY = 'weekly'
														
 
															+    MONTHLY = 'monthly'
														
 
															+    QUARTERLY = 'quarterly'
														
 
															+    CUSTOM = 'custom'
														
 
															+
														
 
															+
														
 
															+class AudienceType(str, Enum):
														
 
															+    MANAGEMENT = 'management'
														
 
															+    OPERATION = 'operation'
														
 
															+    CLIENT = 'client'
														
 
															+    CUSTOM = 'custom'
														
 
															+
														
 
															+
														
 
															+class ComparisonType(str, Enum):
														
 
															+    PREV_PERIOD = 'prev_period'
														
 
															+    YOY = 'yoy'
														
 
															+    NONE = 'none'
														
 
															+
														
 
															+
														
 
															+class ColumnRole(str, Enum):
														
 
															+    TIME = 'time'
														
 
															+    NUMERIC = 'numeric'
														
 
															+    CATEGORY = 'category'
														
 
															+    TEXT = 'text'
														
 
															+    ID = 'id'
														
 
															+    BOOLEAN = 'boolean'
														
 
															+    UNKNOWN = 'unknown'
														
 
															+
														
 
															+
														
 
															+class AggregationType(str, Enum):
														
 
															+    SUM = 'sum'
														
 
															+    COUNT = 'count'
														
 
															+    AVG = 'avg'
														
 
															+    MAX = 'max'
														
 
															+    MIN = 'min'
														
 
															+    DISTINCT_COUNT = 'distinct_count'
														
 
															+
														
 
															+
														
 
															+class MetricType(str, Enum):
														
 
															+    KPI = 'kpi'
														
 
															+    TREND = 'trend'
														
 
															+    DISTRIBUTION = 'distribution'
														
 
															+    RANKING = 'ranking'
														
 
															+    FUNNEL = 'funnel'
														
 
															+    ALERT = 'alert'
														
 
															+
														
 
															+
														
 
															+class ChartType(str, Enum):
														
 
															+    COLUMN = 'column'
														
 
															+    BAR = 'bar'
														
 
															+    LINE = 'line'
														
 
															+    DOUGHNUT = 'doughnut'
														
 
															+    PIE = 'pie'
														
 
															+    FUNNEL = 'funnel'
														
 
															+    TABLE = 'table'
														
 
															+    GROUPED_BAR = 'grouped_bar'
														
 
															+
														
 
															+
														
 
															+class ThemePreset(str, Enum):
														
 
															+    BUSINESS_CLASSIC = 'business_classic'
														
 
															+    FRESH_SIMPLE = 'fresh_simple'
														
 
															+    DARK_PROFESSIONAL = 'dark_professional'
														
 
															+    WARM_BRAND = 'warm_brand'
														
 
															+    CUSTOM = 'custom'
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class ColumnProfile:
														
 
															+    column_name: str
														
 
															+    dtype: str
														
 
															+    role: ColumnRole
														
 
															+    null_count: int
														
 
															+    null_rate: float
														
 
															+    unique_count: int
														
 
															+    sample_values: list = field(default_factory=list)
														
 
															+    numeric_stats: Optional[dict] = None
														
 
															+    inferred_label: str = ''
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class MetricDef:
														
 
															+    name: str
														
 
															+    label: str
														
 
															+    column: str
														
 
															+    aggregation: AggregationType
														
 
															+    metric_type: MetricType = MetricType.KPI
														
 
															+    unit: str = ''
														
 
															+    format_spec: str = ',.0f'
														
 
															+    selected: bool = True
														
 
															+    is_primary: bool = False
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class PageDef:
														
 
															+    page_id: str
														
 
															+    title: str
														
 
															+    page_type: str
														
 
															+    order: int
														
 
															+    selected: bool = True
														
 
															+    elements: list[dict] = field(default_factory=list)
														
 
															+    conclusion_title: str = ''
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class ConfirmationSpec:
														
 
															+    """Six user confirmations required before building a report."""
														
 
															+    period_and_page_range: bool = False
														
 
															+    core_metrics: bool = False
														
 
															+    audience_and_decision: bool = False
														
 
															+    visual_style_and_palette: bool = False
														
 
															+    page_structure_and_template: bool = False
														
 
															+    data_scope_and_field_mapping: bool = False
														
 
															+
														
 
															+    def missing_items(self) -> list[str]:
														
 
															+        labels = {
														
 
															+            'period_and_page_range': '报告周期与页数范围',
														
 
															+            'core_metrics': '核心指标集',
														
 
															+            'audience_and_decision': '受众与决策场景',
														
 
															+            'visual_style_and_palette': '视觉风格与配色方向',
														
 
															+            'page_structure_and_template': '页面结构与模板方案',
														
 
															+            'data_scope_and_field_mapping': '数据范围与字段映射',
														
 
															+        }
														
 
															+        return [
														
 
															+            label for field_name, label in labels.items()
														
 
															+            if not getattr(self, field_name)
														
 
															+        ]
														
 
															+
														
 
															+    def is_complete(self) -> bool:
														
 
															+        return not self.missing_items()
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class ThemeConfig:
														
 
															+    preset: ThemePreset = ThemePreset.BUSINESS_CLASSIC
														
 
															+    name: str = '商务经典'
														
 
															+    primary: str = '#1E3A5F'
														
 
															+    accent: str = '#10B981'
														
 
															+    accent_neg: str = '#EF4444'
														
 
															+    secondary: str = '#64748B'
														
 
															+    dark: str = '#1F3A5C'
														
 
															+    white: str = '#FFFFFF'
														
 
															+    gray_bg: str = '#F2F2F2'
														
 
															+    card_bg: str = '#E7F0F7'
														
 
															+    text: str = '#333333'
														
 
															+    text_gray: str = '#666666'
														
 
															+    line: str = '#D9D9D9'
														
 
															+    chart_series: list[str] = field(default_factory=lambda: [
														
 
															+        '#1E3A5F', '#10B981', '#ED7D31', '#64748B',
														
 
															+        '#EF4444', '#707070', '#4472C4', '#10B981'
														
 
															+    ])
														
 
															+    title_font: str = '微软雅黑'
														
 
															+    body_font: str = '微软雅黑'
														
 
															+    number_font: str = 'Arial'
														
 
															+
														
 
															+
														
 
															+@dataclass
														
 
															+class ReportConfig:
														
 
															+    title: str
														
 
															+    period_type: PeriodType
														
 
															+    date_range: tuple[date, date]
														
 
															+    period_str: str = ''
														
 
															+
														
 
															+    metrics: list[MetricDef] = field(default_factory=list)
														
 
															+    pages: list[PageDef] = field(default_factory=list)
														
 
															+
														
 
															+    audience: AudienceType = AudienceType.MANAGEMENT
														
 
															+    decision_scenario: str = ''
														
 
															+    custom_audience: str = ''
														
 
															+
														
 
															+    theme: ThemeConfig = field(default_factory=ThemeConfig)
														
 
															+    template_path: str = ''
														
 
															+    visual_style_direction: str = ''
														
 
															+    page_structure_template: str = ''
														
 
															+
														
 
															+    filters: dict = field(default_factory=dict)
														
 
															+    comparison: ComparisonType = ComparisonType.PREV_PERIOD
														
 
															+    page_count_range: tuple[int, int] = (6, 15)
														
 
															+
														
 
															+    source_label: str = '数据报告系统'
														
 
															+
														
 
															+    data_scope: str = ''
														
 
															+    data_field_mapping: dict = field(default_factory=dict)
														
 
															+
														
 
															+    data_profiling: Optional[dict] = None
														
 
															+    agent_recommendations: Optional[dict] = None
														
 
															+    user_confirmation: ConfirmationSpec = field(default_factory=ConfirmationSpec)
														
 
															+    require_six_confirmations: bool = True
														
 
															+
														
 
															+    quality_threshold: int = 85
														
 
															+    max_fix_iterations: int = 5
														
 
															+
														
 
															+    def to_dict(self) -> dict:
														
 
															+        return {
														
 
															+            'title': self.title,
														
 
															+            'period_type': self.period_type.value,
														
 
															+            'period_str': self.period_str,
														
 
															+            'page_count_range': list(self.page_count_range),
														
 
															+            'audience': self.audience.value,
														
 
															+            'theme_preset': self.theme.preset.value,
														
 
															+            'metrics_count': len(self.metrics),
														
 
															+            'pages_count': len(self.pages),
														
 
															+            'six_confirmations_complete': self.user_confirmation.is_complete(),
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+def validate_six_confirmations(config: ReportConfig, data_columns: Optional[list[str]] = None) -> list[str]:
														
 
															+    """Return validation gaps for the six confirmation contract."""
														
 
															+    issues = []
														
 
															+
														
 
															+    missing = config.user_confirmation.missing_items()
														
 
															+    if missing:
														
 
															+        issues.append('六项确认未完成：' + '、'.join(missing))
														
 
															+
														
 
															+    if not config.period_str and not config.date_range:
														
 
															+        issues.append('缺少报告周期。')
														
 
															+    if not config.page_count_range or len(config.page_count_range) != 2:
														
 
															+        issues.append('缺少页数范围。')
														
 
															+    if not [m for m in config.metrics if m.selected]:
														
 
															+        issues.append('缺少已确认的核心指标集。')
														
 
															+    if not config.decision_scenario:
														
 
															+        issues.append('缺少受众与决策场景说明。')
														
 
															+    if not config.visual_style_direction and not config.theme:
														
 
															+        issues.append('缺少视觉风格与配色方向。')
														
 
															+    if not config.pages:
														
 
															+        issues.append('缺少页面结构与模板方案。')
														
 
															+    if not config.data_field_mapping:
														
 
															+        issues.append('缺少数据范围与字段映射。')
														
 
															+
														
 
															+    if data_columns:
														
 
															+        missing_cols = []
														
 
															+        for metric in config.metrics:
														
 
															+            if metric.selected and metric.column and metric.column not in data_columns:
														
 
															+                missing_cols.append(f'{metric.label} -> {metric.column}')
														
 
															+        if missing_cols:
														
 
															+            issues.append('核心指标字段映射不存在：' + '、'.join(missing_cols[:8]))
														
 
															+
														
 
															+    selected_pages = [p for p in config.pages if p.selected]
														
 
															+    if config.page_count_range and selected_pages:
														
 
															+        low, high = config.page_count_range
														
 
															+        if len(selected_pages) < low - 1 or len(selected_pages) > high + 1:
														
 
															+            issues.append(f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 页附近。')
														
 
															+
														
 
															+    return issues
														
--- a/generate-data-report-ppt/scripts/theme_manager.py
+++ b/generate-data-report-ppt/scripts/theme_manager.py
@@ -0,0 +1,146 @@
 
															+"""
														
 
															+Multi-theme color and visual style manager for the universal data report generator.
														
 
															+"""
														
 
															+from pptx.dml.color import RGBColor
														
 
															+from report_config import ThemeConfig, ThemePreset
														
 
															+
														
 
															+
														
 
															+PRESETS = {
														
 
															+    ThemePreset.BUSINESS_CLASSIC: ThemeConfig(
														
 
															+        preset=ThemePreset.BUSINESS_CLASSIC,
														
 
															+        name='商务经典',
														
 
															+        primary='#1E3A5F',
														
 
															+        accent='#10B981',
														
 
															+        accent_neg='#EF4444',
														
 
															+        secondary='#64748B',
														
 
															+        dark='#1F3A5C',
														
 
															+        white='#FFFFFF',
														
 
															+        gray_bg='#F2F2F2',
														
 
															+        card_bg='#E7F0F7',
														
 
															+        text='#333333',
														
 
															+        text_gray='#666666',
														
 
															+        line='#D9D9D9',
														
 
															+        chart_series=[
														
 
															+            '#1E3A5F', '#10B981', '#ED7D31', '#64748B',
														
 
															+            '#EF4444', '#707070', '#4472C4', '#5B9BD5',
														
 
															+        ],
														
 
															+    ),
														
 
															+    ThemePreset.FRESH_SIMPLE: ThemeConfig(
														
 
															+        preset=ThemePreset.FRESH_SIMPLE,
														
 
															+        name='清新简约',
														
 
															+        primary='#1B8A5E',
														
 
															+        accent='#10B981',
														
 
															+        accent_neg='#EF4444',
														
 
															+        secondary='#94A3B8',
														
 
															+        dark='#0F5C3B',
														
 
															+        white='#FFFFFF',
														
 
															+        gray_bg='#F8FAFC',
														
 
															+        card_bg='#ECFDF5',
														
 
															+        text='#1E293B',
														
 
															+        text_gray='#64748B',
														
 
															+        line='#E2E8F0',
														
 
															+        chart_series=[
														
 
															+            '#1B8A5E', '#3B82F6', '#F59E0B', '#94A3B8',
														
 
															+            '#EF4444', '#8B5CF6', '#06B6D4', '#10B981',
														
 
															+        ],
														
 
															+    ),
														
 
															+    ThemePreset.DARK_PROFESSIONAL: ThemeConfig(
														
 
															+        preset=ThemePreset.DARK_PROFESSIONAL,
														
 
															+        name='深色专业',
														
 
															+        primary='#1E293B',
														
 
															+        accent='#38BDF8',
														
 
															+        accent_neg='#F87171',
														
 
															+        secondary='#94A3B8',
														
 
															+        dark='#0F172A',
														
 
															+        white='#FFFFFF',
														
 
															+        gray_bg='#F1F5F9',
														
 
															+        card_bg='#E2E8F0',
														
 
															+        text='#1E293B',
														
 
															+        text_gray='#475569',
														
 
															+        line='#CBD5E1',
														
 
															+        chart_series=[
														
 
															+            '#1E293B', '#38BDF8', '#F59E0B', '#94A3B8',
														
 
															+            '#F87171', '#A78BFA', '#34D399', '#FB923C',
														
 
															+        ],
														
 
															+    ),
														
 
															+    ThemePreset.WARM_BRAND: ThemeConfig(
														
 
															+        preset=ThemePreset.WARM_BRAND,
														
 
															+        name='温暖品牌',
														
 
															+        primary='#C2410C',
														
 
															+        accent='#F97316',
														
 
															+        accent_neg='#DC2626',
														
 
															+        secondary='#78716C',
														
 
															+        dark='#7C2D12',
														
 
															+        white='#FFFFFF',
														
 
															+        gray_bg='#FFFBEB',
														
 
															+        card_bg='#FFF7ED',
														
 
															+        text='#292524',
														
 
															+        text_gray='#78716C',
														
 
															+        line='#D6D3D1',
														
 
															+        chart_series=[
														
 
															+            '#C2410C', '#F97316', '#EAB308', '#78716C',
														
 
															+            '#DC2626', '#84CC16', '#06B6D4', '#A855F7',
														
 
															+        ],
														
 
															+    ),
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def get_theme(preset: ThemePreset, custom_overrides: dict = None) -> ThemeConfig:
														
 
															+    if preset == ThemePreset.CUSTOM:
														
 
															+        config = ThemeConfig(preset=ThemePreset.CUSTOM, name='自定义主题')
														
 
															+        if custom_overrides:
														
 
															+            for k, v in custom_overrides.items():
														
 
															+                if hasattr(config, k):
														
 
															+                    setattr(config, k, v)
														
 
															+        return config
														
 
															+    return PRESETS.get(preset, PRESETS[ThemePreset.BUSINESS_CLASSIC])
														
 
															+
														
 
															+
														
 
															+def theme_to_rgb_colors(theme: ThemeConfig) -> dict:
														
 
															+    return {
														
 
															+        'primary': _hex_to_rgb(theme.primary),
														
 
															+        'accent': _hex_to_rgb(theme.accent),
														
 
															+        'accent_neg': _hex_to_rgb(theme.accent_neg),
														
 
															+        'secondary': _hex_to_rgb(theme.secondary),
														
 
															+        'dark': _hex_to_rgb(theme.dark),
														
 
															+        'white': _hex_to_rgb(theme.white),
														
 
															+        'gray_bg': _hex_to_rgb(theme.gray_bg),
														
 
															+        'card_bg': _hex_to_rgb(theme.card_bg),
														
 
															+        'text': _hex_to_rgb(theme.text),
														
 
															+        'text_gray': _hex_to_rgb(theme.text_gray),
														
 
															+        'line': _hex_to_rgb(theme.line),
														
 
															+        'green': _hex_to_rgb(theme.accent),
														
 
															+        'red': _hex_to_rgb(theme.accent_neg),
														
 
															+        'orange': _hex_to_rgb(theme.chart_series[2]) if len(theme.chart_series) > 2 else RGBColor(0xED, 0x7D, 0x31),
														
 
															+        'series': [_hex_to_rgb(c) for c in theme.chart_series],
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def _hex_to_rgb(hex_str: str) -> RGBColor:
														
 
															+    hex_str = hex_str.lstrip('#')
														
 
															+    if len(hex_str) == 6:
														
 
															+        return RGBColor(int(hex_str[0:2], 16), int(hex_str[2:4], 16), int(hex_str[4:6], 16))
														
 
															+    return RGBColor(0x33, 0x33, 0x33)
														
 
															+
														
 
															+
														
 
															+def list_themes() -> list[dict]:
														
 
															+    result = []
														
 
															+    for preset, config in PRESETS.items():
														
 
															+        result.append({
														
 
															+            'key': preset.value,
														
 
															+            'name': config.name,
														
 
															+            'primary': config.primary,
														
 
															+            'accent': config.accent,
														
 
															+        })
														
 
															+    result.append({
														
 
															+        'key': 'custom',
														
 
															+        'name': '自定义主题',
														
 
															+        'primary': '自定义',
														
 
															+        'accent': '自定义',
														
 
															+    })
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    for t in list_themes():
														
 
															+        print(f"{t['key']}: {t['name']} (primary={t['primary']}, accent={t['accent']})")
														
--- a/~$5月6日质检测试_v2.pptx
+++ b/~$5月6日质检测试_v2.pptx