2 tháng trước cách đây · cd6afa90f9
--- a/5月6日数据.xlsx
+++ b/5月6日数据.xlsx
--- a/5月6日质检测试_v2.pptx
+++ b/5月6日质检测试_v2.pptx
--- a/generate-data-report-ppt/SKILL.md
+++ b/generate-data-report-ppt/SKILL.md
@@ -1,25 +1,30 @@
 
				 ---
			
 
				 name: generate-data-report-ppt
			
 
				 description: >
			
 
				-  基于 Excel 业务明细数据，自动生成数据日报、周报、月报 PPT。
			
 
				+  通用数据报告 PPT 生成器。输入任意 Excel 数据文件，自动探查数据结构、计算指标、通过六项用户确认后生成高质量 PPT 报告。
			
 
				   图表使用原生 python-pptx 可编辑 Chart 对象（非 matplotlib PNG 插入）。
			
 
				-  当用户请求"生成日报/周报/月报"、"创建数据报告 PPT"、"输出业务报告 PPT"
			
 
				-  或任何涉及周期性数据报告的 PowerPoint 格式需求时触发此技能。
			
 
				+  内置质量自检引擎，自动检测布局/视觉/内容/数据问题并迭代修复至理想效果。
			
 
				 ---
			
 
				 
			
 
				-# 生成数据报告 PPT
			
 
				+# 通用数据报告 PPT 生成器
			
 
				 
			
 
				-自动生成周期性业务数据报告（日报 / 周报 / 月报）为原生可编辑 PowerPoint 文件。
			
 
				+输入任意 Excel 文件，自动生成高质量可编辑数据报告 PPT。
			
 
				 
			
 
				 ## 工作流程
			
 
				 
			
 
				-1. **接收输入**：数据文件路径、报告类型、日期/周期参数、可选部门/来源名称。
			
 
				-2. **加载数据**：`scripts/data_loader.py` 按日期范围读取 Excel 工作表，清洗并校验数据。
			
 
				-3. **计算指标**：`scripts/metrics_calculator.py` 根据报告类型计算 KPI、分布、趋势、告警、深度洞察。
			
 
				-4. **生成洞察**：`scripts/deep_insights.py` 生成结构化深度分析文本（标题 + 正文段落）。
			
 
				-5. **构建 PPT**：`scripts/ppt_builder.py` 从模板复制幻灯片，替换占位符，动态绘制导航标签，
			
 
				-   通过 `scripts/chart_factory.py` 插入原生图表、KPI 卡片、告警卡片、洞察文本块。
			
 
				-6. **输出**：保存 `.pptx` 文件。所有图表和表格均可在 PowerPoint 中编辑（右键 → 编辑数据）。
			
 
				+1.  **接收 Excel 文件**：用户提供任意格式的 Excel 数据文件。
			
 
				+2.  **数据探查**：`scripts/data_profiler.py` 自动检测 Schema、统计特征、数据质量及字段语义。
			
 
				+3.  **智能分析与推荐**：`scripts/agent_analyzer.py` 基于探查结果生成指标推荐、页面结构建议及可视化方案。
			
 
				+4.  **用户确认（六项）**：生成 PPT 前必须与用户确认以下内容：
			
 
				+    - 报告周期与页数范围
			
 
				+    - 核心指标集
			
 
				+    - 受众与决策场景
			
 
				+    - 视觉风格与配色方向
			
 
				+    - 页面结构与模板方案
			
 
				+    - （合计六项确认内容）
			
 
				+5.  **生成 PPT**：`scripts/ppt_builder.py` 按用户确认的配置，动态选择布局模板、应用主题配色、插入原生可编辑图表。
			
 
				+6.  **质量自检与修复**：`scripts/quality_inspector.py` 检查四类 25 项质量标准，发现问题自动修复，迭代至阈值达标。
			
 
				+7.  **输出**：保存 `.pptx` 文件。所有图表和表格均可在 PowerPoint 中编辑。
			
 
				 
			
 
				 ## 目录结构
			
 
				 
			
@@ -27,153 +32,138 @@ description: >
 
				 generate-data-report-ppt/
			
 
				 ├── SKILL.md
			
 
				 ├── scripts/
			
 
				-│   ├── data_loader.py          # Excel 加载与数据清洗
			
 
				-│   ├── metrics_calculator.py   # KPI 计算引擎（含日报/周报/月报指标）
			
 
				-│   ├── deep_insights.py        # 结构化深度洞察生成（周报/月报各页面）
			
 
				-│   ├── chart_factory.py        # 原生可编辑图表创建
			
 
				-│   └── ppt_builder.py          # PPT 组装编排器
			
 
				+│   ├── data_loader.py              # Excel 加载与数据清洗（保留原有兼容）
			
 
				+│   ├── data_profiler.py            # 通用数据探查引擎（新增）
			
 
				+│   ├── report_config.py            # ReportConfig 数据模型定义（新增）
			
 
				+│   ├── theme_manager.py            # 多主题配色与模板管理（新增）
			
 
				+│   ├── agent_analyzer.py           # LLM 智能分析与推荐（新增）
			
 
				+│   ├── metrics_calculator.py       # KPI 计算引擎（新增通用计算函数）
			
 
				+│   ├── chart_factory.py            # 原生可编辑图表创建
			
 
				+│   ├── page_layouts.py             # 动态页面布局引擎（新增）
			
 
				+│   ├── quality_rules.py            # 质量检查规则定义（新增）
			
 
				+│   ├── quality_inspector.py        # 质量自检与自动修复引擎（新增）
			
 
				+│   ├── deep_insights.py            # 结构化深度洞察生成（周报/月报）
			
 
				+│   └── ppt_builder.py              # PPT 组装编排器（新增 build_report / quality_assured_build）
			
 
				 ├── references/
			
 
				-│   ├── data-schema.md          # Excel 字段映射与校验规则
			
 
				-│   ├── report-structures.md    # 日报/周报/月报页面结构
			
 
				-│   ├── chart-specs.md          # 原生图表类型、配色、数据绑定
			
 
				-│   └── visual-style-guide.md   # 布局、字体、配色方案
			
 
				+│   ├── data-schema.md              # Excel 字段映射与校验规则
			
 
				+│   ├── report-structures.md        # 日报/周报/月报页面结构
			
 
				+│   ├── chart-specs.md              # 原生图表类型、配色、数据绑定
			
 
				+│   └── visual-style-guide.md       # 布局、字体、配色方案
			
 
				 └── assets/
			
 
				-    ├── report-master.pptx      # 日报模板（封面、内容页、目录、尾页）
			
 
				-    ├── weekly-master.pptx      # 周报模板
			
 
				-    └── monthly-master.pptx     # 月报模板
			
 
				+    ├── report-master.pptx          # 日报模板（封面、内容页、目录、尾页）
			
 
				+    ├── weekly-master.pptx          # 周报模板
			
 
				+    └── monthly-master.pptx         # 月报模板
			
 
				 ```
			
 
				 
			
 
				-## 报告类型
			
 
				+## 新增核心模块
			
 
				+
			
 
				+### ReportConfig（report_config.py）
			
 
				+通用数据模型，定义报告配置的所有要素：
			
 
				+- `ReportConfig`：报告标题、周期、数据来源、主题、页数范围、质量阈值、最大修复迭代次数
			
 
				+- `MetricDef`：指标名称、列映射、聚合方式、数值格式、单位、指标类型
			
 
				+- `PageDef`：页面类型、标题、结论标题、布局模板、可选图表类型、选中状态
			
 
				+- `PeriodType` 枚举：DAILY / WEEKLY / MONTHLY / CUSTOM
			
 
				+- `ChartType` 枚举：BAR / LINE / PIE / DOUGHNUT / FUNNEL / TABLE / AUTO
			
 
				+
			
 
				+### 数据探查（data_profiler.py）
			
 
				+自动分析任意 Excel 数据结构：
			
 
				+- Schema 检测：列名、类型推断、缺失率、唯一值统计
			
 
				+- 统计特征提取：数值列的 min/max/mean/std/分位数，分类列的分布/基数
			
 
				+- 语义推断：自动识别时间列、分类列、数值列、ID 列
			
 
				+- 数据质量评分：完整性、唯一性、合理性三维度评分
			
 
				+
			
 
				+### 多主题管理（theme_manager.py）
			
 
				+- 5 套预设主题：商务经典（默认）、清新简约、科技蓝调、暖橙活力、暗夜深邃
			
 
				+- 每套主题包含：主色、辅色、强调色、背景色、文字色、系列色盘
			
 
				+- 支持自定义配色覆盖
			
 
				+- `theme_to_rgb_colors()` 一键转换为 pptx RGBColor 对象
			
 
				+
			
 
				+### 智能分析（agent_analyzer.py）
			
 
				+- 自动识别可量化的数值指标
			
 
				+- 推荐可视化方案（图表类型、配色、数据准备方式）
			
 
				+- 生成页面结构建议（含结论标题和洞察文案模板）
			
 
				+- 所有推荐需经用户确认后注入 ReportConfig
			
 
				+
			
 
				+### 页面布局引擎（page_layouts.py）
			
 
				+- 预定义布局模板：KPI 网格、图表左+洞察右、两栏、两行、卡片网格、全宽
			
 
				+- `calculate_content_area()` 计算可用内容区域
			
 
				+- `calculate_fill_ratio()` 计算页面内容填充率
			
 
				+- `ensure_safe_position()` 确保元素在页面安全区域内
			
 
				+
			
 
				+### 质量自检（quality_rules.py + quality_inspector.py）
			
 
				+
			
 
				+**四类 25 项检查规则**：
			
 
				+
			
 
				+| 类别 | 检查项 | 自动修复 |
			
 
				+|------|--------|---------|
			
 
				+| layout（布局） | 元素飞出边界（4方向）、图文重叠、占位符未替换、元素紧贴边缘 | ✅ |
			
 
				+| visual（视觉） | 字体不一致、字号过小/过大、颜色对比度不足、图片拉伸变形 | ✅ |
			
 
				+| content（内容） | 页面留白过多、KPI数值为空、图表无数据、文本截断、分析文本过短、缺少标题 | ✅ |
			
 
				+| data（数据） | 图表与文本矛盾、页码错乱、数据来源缺失、表格列宽不合理、图表刻度异常 | 部分 |
			
 
				+
			
 
				+**迭代修复机制**：
			
 
				+1. 生成 PPT → 执行全量检查 → 报告问题
			
 
				+2. 对可自动修复的问题执行修复 → 重新检查
			
 
				+3. 重复至无严重/主要问题或达最大迭代次数
			
 
				+4. 计算质量评分（0-100），评分 ≥ 阈值（默认 85）输出
			
 
				+
			
 
				+**质量评分计算**：
			
 
				+- 严重问题：-20 分/页
			
 
				+- 主要问题：-10 分/页
			
 
				+- 次要问题：-3 分/页
			
 
				+- 加权归一化到 100 分制
			
 
				+
			
 
				+## 报告类型（原有，保持兼容）
			
 
				 
			
 
				 ### 日报
			
 
				-- **结构**：封面 → 核心指标概览 → 近10天趋势 → 订单状态分布 → 负责人分布 → 目的国家 TOP8 → 异常告警 → 今日要点
			
 
				-- **分析维度**：与昨日对比
			
 
				-- **页数**：8
			
 
				-
			
 
				+- 结构：封面 → 核心指标概览 → 近10天趋势 → 订单状态分布 → 负责人分布 → 目的国家 TOP8 → 异常告警 → 今日要点
			
 
				+- 页数：8
			
 
				 ### 周报
			
 
				-- **结构**：封面 → 周汇总 → 7日趋势 → 环比分析 → 区域分布 → 国家排行 → 团队追踪 → 问题与建议 → 下周计划
			
 
				-- **分析维度**：周环比（WoW）、7日移动平均
			
 
				-- **页数**：9
			
 
				-- **导航标签**：周汇总 / 趋势图 / 环比分析 / 区域排行 / 问题建议 / 下周计划
			
 
				-
			
 
				+- 结构：封面 → 周汇总 → 7日趋势 → 环比分析 → 区域分布 → 国家排行 → 团队追踪 → 问题与建议 → 下周计划
			
 
				+- 页数：9
			
 
				 ### 月报
			
 
				-- **结构**：封面 → 目录 → 月度总览 → 订单状态漏斗 → 区域分布 → TOP10 目的国 → 30日追踪趋势 → 团队绩效 → 支持需求分析 → 下月规划 → 尾页
			
 
				-- **分析维度**：环比（MoM）、同比（YoY）、日均值、结构占比
			
 
				-- **页数**：11
			
 
				-- **导航标签**：月度总览 / 订单状态 / 区域趋势 / 团队展望
			
 
				-
			
 
				-## 模板使用
			
 
				-
			
 
				-`assets/` 下包含三种报告模板：
			
 
				-
			
 
				-| 报告类型 | 模板文件 | 包含幻灯片 |
			
 
				-|---------|---------|-----------|
			
 
				-| 日报 | `report-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
			
 
				-| 周报 | `weekly-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
			
 
				-| 月报 | `monthly-master.pptx` | 封面 / 内容页 / 目录页 / 尾页 |
			
 
				-
			
 
				-**复制机制**：`ppt_builder._duplicate_slide(prs, source_slide)` 深度复制模板幻灯片到输出文稿。
			
 
				-
			
 
				-**导航标签**：由 `ppt_builder._add_nav_tabs()` 在内容页上动态绘制，不内嵌在模板中。
			
 
				-
			
 
				-## 占位符替换
			
 
				-
			
 
				-所有模板形状使用 `{placeholder}` 语法。脚本遍历 `slide.shapes` 匹配段落文本进行替换。
			
 
				-
			
 
				-| 占位符 | 出现位置 | 替换内容 |
			
 
				-|--------|---------|---------|
			
 
				-| `{report_title}` | 封面、内容页眉、尾页 | 如"海外订单数据日报" |
			
 
				-| `{report_type}` | 封面副标题 | 如"数据日报" |
			
 
				-| `{date}` | 封面、页眉、尾页 | 报告日期或周期 |
			
 
				-| `{department}` | 封面、尾页 | 如"海外事业部" |
			
 
				-| `{period}` | 封面、底部来源条 | 数据周期描述 |
			
 
				-| `{gen_time}` | 封面 | 报告生成时间 |
			
 
				-| `{page_title}` | 内容页 | 当前页面标题 |
			
 
				-| `{page_num}` | 右下角 | 如"3 / 8" |
			
 
				-| `{source}` | 底部来源条 | 数据来源 |
			
 
				-| `{kpiN_label}` / `{kpiN_value}` | 封面/尾页卡片 | 第N个指标的标签和数值 |
			
 
				-| `{chapterN_title}` / `{chapterN_desc}` | 目录网格 | 第N章标题和描述 |
			
 
				-
			
 
				-## 图表插入规则
			
 
				-
			
 
				-**严格使用原生图表**，禁止生成 matplotlib PNG 图片。
			
 
				-
			
 
				-| 图表类型 | XL_CHART_TYPE | 使用场景 |
			
 
				-|---------|---------------|---------|
			
 
				-| 簇状柱形图 | COLUMN_CLUSTERED | 区域分布、团队追踪、支持需求分布 |
			
 
				-| 簇状条形图（横向） | BAR_CLUSTERED | 国家排名、负责人排名、状态漏斗 |
			
 
				-| 折线图（带标记） | LINE_MARKERS | 多日趋势（10天/7天/30天） |
			
 
				-| 环形图 | DOUGHNUT | 状态占比、区域占比 |
			
 
				-| 饼图 | PIE | 状态占比、区域占比（替代场景） |
			
 
				-| 表格 | TABLE | 明细列表、TOP列表、状态变化、超期订单 |
			
 
				-
			
 
				-调用 `chart_factory.add_*_chart()` 传入数据数组。图表数据嵌入 PPT 内部 Excel 工作簿，用户可直接编辑。
			
 
				-
			
 
				-## 数据输入要求
			
 
				-
			
 
				-Excel 文件按自然日分 Sheet，工作表命名：`YYYY年MM月DD日`（如 `2026年04月10日`）。
			
 
				-
			
 
				-**必填字段**：`目的国家`、`合同号`、`订单总数量`、`负责人`、`当前状态`、`拟定合同时间`
			
 
				-
			
 
				-**推荐字段**：`今日进度更新`、`是否更新`、`支持需求`、`4月交付`、`5月预测`
			
 
				-
			
 
				-完整字段映射、状态枚举（A-F）及校验规则见 `references/data-schema.md`。
			
 
				-
			
 
				-## 配色方案
			
 
				-
			
 
				-| 角色 | 色值 | 用途 |
			
 
				-|------|------|------|
			
 
				-| 主色 | `#1E3A5F` | 页眉标题、导航标签、强调色、顶部蓝线 |
			
 
				-| 辅色 | `#5B9BD5` | 图表主系列、CONTENTS 标签 |
			
 
				-| 深色背景 | `#1F3A5C` | 封面左侧块 |
			
 
				-| 增长色 | `#10B981` | 上涨指标、正面变化 |
			
 
				-| 下跌色 | `#EF4444` | 下跌指标、负面变化、严重告警 |
			
 
				-| 警告色 | `#ED7D31` | 中度告警、关注提示 |
			
 
				-| 卡片背景 | `#E7F0F7` | KPI 卡片背景 |
			
 
				-| 深灰文字 | `#333333` | 正文、主标题 |
			
 
				-| 中灰文字 | `#666666` | 副标题、次要信息 |
			
 
				-
			
 
				-## 核心功能模块
			
 
				-
			
 
				-### 指标计算（metrics_calculator.py）
			
 
				-- **日报指标**：在跟订单数、订单总量、今日更新、已发运、支持需求、下月预测、单均台数、状态分布、负责人分布、国家 TOP8、超期订单（A阶段>30天）、告警列表
			
 
				-- **周报指标**：周订单量、周车辆数、日均订单、单均台数、7日趋势、状态环比（WoW）、区域分布（含各国 TOP3）、国家排行（含环比变化）、团队绩效（人均产出）、支持需求分类、问题识别、下周目标（G1-G4自动生成）
			
 
				-- **月报指标**：月度合同数、月度车辆数、新签合同、已发运、覆盖国家数、支持需求占比、日均订单、状态漏斗（含阶段分析：前期/中期/后期）、区域分布（含各国 TOP3）、TOP10 国家（含环比变化）、30日趋势（含上中下旬均值、峰值日期）、团队绩效（人均订单/人均台数）、超期订单、下月目标（5项自动生成）、风险列表
			
 
				-
			
 
				-### 深度洞察（deep_insights.py）
			
 
				-为周报和月报各页面生成结构化洞察文本，每条洞察包含标题和正文：
			
 
				-- **周报**：周内节奏分析、周环比趋势偏移、月度进度推演、关键驱动因素、区域引擎识别、结构健康度、转化效率、瓶颈诊断、库存资金占用、发运端效率、漏斗健康度、区域战略矩阵、国家组合健康度、团队人均产出、问题根因分类、目标拆解、风险对冲等
			
 
				-- **月报**：月度节奏、目标达成率、季节性同比、年度进度、漏斗结构诊断、区域投入 ROI、国家增速梯队、团队均衡性、支持需求趋势、下月里程碑等
			
 
				-
			
 
				-### PPT 组件（ppt_builder.py）
			
 
				-- **KPI 卡片**：3×2 网格，支持数值、单位、变化徽章、情感标签（自动着色）
			
 
				-- **告警卡片**：1-3 个横向排列，支持严重/警告/关注三级颜色
			
 
				-- **问题卡片**：纵向堆叠，含严重度、标题、详情、建议措施
			
 
				-- **目标卡片**：2×2 网格，含图标、目标编号、标题、详情
			
 
				-- **结构化洞察文本块**：多段落洞察，自适应字号压缩以适配高度，带 emoji 前缀
			
 
				-- **页脚**：自动添加数据来源条和页码
			
 
				+- 结构：封面 → 目录 → 月度总览 → 订单状态漏斗 → 区域分布 → TOP10 目的国 → 30日追踪趋势 → 团队绩效 → 支持需求分析 → 下月规划 → 尾页
			
 
				+- 页数：11
			
 
				 
			
 
				 ## 执行示例
			
 
				 
			
 
				 ```python
			
 
				-from scripts.ppt_builder import build_daily_report, build_weekly_report, build_monthly_report
			
 
				-from datetime import datetime
			
 
				+from scripts.ppt_builder import build_daily_report, build_report, quality_assured_build
			
 
				+from scripts.report_config import ReportConfig, PageDef, MetricDef
			
 
				+
			
 
				+# === 原有方式（保持兼容）===
			
 
				+build_daily_report('data.xlsx', datetime(2026, 4, 10), 'daily.pptx')
			
 
				+
			
 
				+# === 新通用方式 ===
			
 
				+config = ReportConfig(
			
 
				+    title='销售数据报告',
			
 
				+    period_type='monthly',
			
 
				+    source_label='销售部',
			
 
				+    theme='business_classic',
			
 
				+    quality_threshold=85,
			
 
				+    max_fix_iterations=3,
			
 
				+)
			
 
				+# config 需经用户确认后填充 metrics 和 pages
			
 
				+
			
 
				+build_report('any_data.xlsx', config, 'output.pptx')
			
 
				+
			
 
				+# === 带质量保证的方式（推荐）===
			
 
				+prs, issues = quality_assured_build('any_data.xlsx', config, 'output_qa.pptx')
			
 
				+```
			
 
				 
			
 
				-# 日报
			
 
				-build_daily_report('data.xlsx', datetime(2026, 4, 10), 'daily_20260410.pptx')
			
 
				+## V2 generation contract
			
 
				 
			
 
				-# 周报（2026年第14周）
			
 
				-build_weekly_report('data.xlsx', 2026, 14, 'weekly_w14.pptx')
			
 
				+Before calling `build_report` or `quality_assured_build`, fill and validate the six user confirmations:
			
 
				 
			
 
				-# 月报（2026年4月）
			
 
				-build_monthly_report('data.xlsx', 2026, 4, 'monthly_202604.pptx')
			
 
				-```
			
 
				+1. 报告周期与页数范围
			
 
				+2. 核心指标集
			
 
				+3. 受众与决策场景
			
 
				+4. 视觉风格与配色方向
			
 
				+5. 页面结构与模板方案
			
 
				+6. 数据范围与字段映射
			
 
				+
			
 
				+Use `ConfirmationSpec` on `ReportConfig.user_confirmation` to record completion. Generic builds default to `require_six_confirmations=True`; missing confirmations or invalid metric-to-column mappings must stop generation before any PPT is written.
			
 
				 
			
 
				-## 扩展技能
			
 
				+Data profiling serves the confirmed business intent. It should map the confirmed metrics and dimensions to actual Excel columns, then select feasible pages and layouts. It must not invent a different business focus when the user has already confirmed the core metrics.
			
 
				 
			
 
				-添加新报告类型（如季报）：
			
 
				-1. 在 `references/report-structures.md` 中添加页面结构定义
			
 
				-2. 在 `scripts/metrics_calculator.py` 中添加指标计算函数
			
 
				-3. 在 `scripts/deep_insights.py` 中添加洞察生成函数
			
 
				-4. 在 `scripts/ppt_builder.py` 中添加构建函数
			
 
				-5. 若内容页布局通用，无需修改模板文件
			
 
				+For visual quality, treat master PPTX files as style assets, not rigid page contracts. If a template placeholder cannot be populated, remove the whole placeholder component. If a KPI grid consumes the available vertical space, do not add bottom insight text; use a later analysis page or a different layout instead.
			
--- a/generate-data-report-ppt/references/chart-specs.md
+++ b/generate-data-report-ppt/references/chart-specs.md
@@ -76,3 +76,28 @@ series = chart.series[0]
 
				 series.marker.size = 7
			
 
				 series.marker.style = XL_MARKER_STYLE.CIRCLE
			
 
				 ```
			
 
				+
			
 
				+
			
 
				+## 通用图表自动选择（新增）
			
 
				+
			
 
				+通用构建器支持自动选择图表类型，根据数据特征和页面场景智能推荐。
			
 
				+
			
 
				+### 智能推荐规则
			
 
				+
			
 
				+| 数据特征 | 推荐图表 | 原因 |
			
 
				+|---------|---------|------|
			
 
				+| 时间序列 + 数值 | LINE_MARKERS | 展示趋势变化 |
			
 
				+| 1个分类列 + 数值列 | BAR_CLUSTERED | 横向对比排名 |
			
 
				+| 1个分类列（含百分比） | DOUGHNUT | 展示占比结构 |
			
 
				+| 多个数值维度 | COLUMN_CLUSTERED | 多维度对比 |
			
 
				+| 需要展示精确数值 | TABLE | 数据明细 |
			
 
				+| 发现数据分层 | FUNNEL | 漏斗转化 |
			
 
				+| 经纬度数据 | 散点图/气泡图 | 地理分布 |
			
 
				+
			
 
				+### 图表质量自检
			
 
				+
			
 
				+| 检查项 | 问题 | 修复 |
			
 
				+|--------|------|------|
			
 
				+| 图表刻度异常 | Y轴从非零开始导致误导 | 重置Y轴最小值 |
			
 
				+| 图表无数据 | 数据系列为空 | 跳过该图表插入 |
			
 
				+| 表格列宽不合理 | 列宽与内容不匹配 | 按内容自适应列宽 |
			
--- a/generate-data-report-ppt/references/data-schema.md
+++ b/generate-data-report-ppt/references/data-schema.md
@@ -1,61 +1,238 @@
 
				 # 数据源 Schema
			
 
				 
			
 
				-数据输入为 Excel 文件，每个工作表（Sheet）代表一个自然日的订单明细数据。
			
 
				-
			
 
				-## 工作表命名规则
			
 
				-
			
 
				-- 日报数据源：`YYYY年MM月DD日`（如 `2026年04月10日`）
			
 
				-- 脚本通过日期字符串匹配对应工作表
			
 
				-
			
 
				-## 字段映射
			
 
				-
			
 
				-| Excel 列名 | 内部字段名 | 数据类型 | 说明 |
			
 
				-|-----------|-----------|---------|------|
			
 
				-| 序号 | `seq` | int | 行序号 |
			
 
				-| 目的国家 | `country` | str | 订单目的国家/地区 |
			
 
				-| 合同号 | `contract_no` | str | 唯一合同编号 |
			
 
				-| 用户名称/公司 | `customer` | str | 客户名称 |
			
 
				-| 意向车型及数量 | `product_info` | str | 车型及台数描述 |
			
 
				-| 订单总数量 | `order_qty` | int | 该合同的车辆总台数 |
			
 
				-| 负责人 | `owner` | str | 跟单负责人姓名 |
			
 
				-| 当前状态 | `status` | str | 订单阶段，见下方状态枚举 |
			
 
				-| 拟定合同时间 | `contract_date` | datetime | 合同拟定日期 |
			
 
				-| 跟单天数 | `tracking_days` | int | 从合同拟定到当前日期的天数 |
			
 
				-| 定金支付时间 | `deposit_date` | datetime | 定金支付日期 |
			
 
				-| 订金认领时间 | `deposit_claim_date` | datetime | 订金认领日期 |
			
 
				-| 订单生成时间 | `order_gen_date` | datetime | 订单在系统生成日期 |
			
 
				-| 价格评审时间 | `price_review_date` | datetime | 价格评审完成日期 |
			
 
				-| 合同评审时间 | `contract_review_date` | datetime | 合同评审完成日期 |
			
 
				-| 合同提交盖章申请时间 | `seal_apply_date` | datetime | 盖章申请日期 |
			
 
				-| 合同盖章时间 | `seal_date` | datetime | 合同盖章完成日期 |
			
 
				-| 车辆下线入库状态 | `inventory_status` | str | 车辆生产/入库状态描述 |
			
 
				-| 尾款支付时间 | `final_pay_date` | datetime | 尾款支付日期 |
			
 
				-| 尾款认领时间 | `final_claim_date` | datetime | 尾款认领日期 |
			
 
				-| 智慧关务信息维护 | `customs_date` | datetime | 关务信息维护日期 |
			
 
				-| 许可证办理时间 | `license_date` | datetime | 进口许可证办理日期 |
			
 
				-| 车辆发运时间 | `ship_date` | datetime | 实际发运日期 |
			
 
				-| 预计开票时间 | `invoice_date` | datetime | 预计开票日期 |
			
 
				-| 今日进度更新 | `progress_update` | str | 当日最新进度描述 |
			
 
				-| 是否更新 | `is_updated` | str (是/否) | 当日是否有进度更新 |
			
 
				-| 支持需求 | `support_request` | str | 需要跨部门支持的需求描述 |
			
 
				-| 4月交付 | `deliver_apr` | int | 标记为4月交付的台数 |
			
 
				-| 5月预测 | `forecast_may` | int | 标记为5月预测交付的台数 |
			
 
				-
			
 
				-## 订单状态枚举
			
 
				-
			
 
				-| 状态代码 | 状态名称 | 说明 |
			
 
				-|---------|---------|------|
			
 
				-| A | 合同拟定中 | 合同尚未盖章确认 |
			
 
				-| B | 已锁定合同待付订金 | 合同已盖章，等待客户支付订金 |
			
 
				-| C | 已付订金待生产 | 订金已到账，等待排产 |
			
 
				-| D | 已生产待付尾款 | 车辆已生产/入库，等待尾款 |
			
 
				-| E | 已付尾款待发运 | 尾款已到账，等待发运安排 |
			
 
				-| F | 已发运 | 车辆已发运 |
			
 
				-
			
 
				-## 数据校验规则
			
 
				-
			
 
				-1. **必填字段**：`country`, `contract_no`, `order_qty`, `owner`, `status`, `contract_date`
			
 
				-2. `order_qty` 必须为正整数
			
 
				-3. `status` 必须为 A-F 中的一个
			
 
				-4. `is_updated` 只能为 "是" 或 "否"
			
 
				-5. 日期字段若为字符串，尝试按 `YYYY-MM-DD` 解析
			
 
				+本报告生成器为 **通用型数据报告引擎**，不依赖固定的 Schema，可接受任意结构的 Excel 文件作为输入。
			
 
				+
			
 
				+## 数据要求
			
 
				+
			
 
				+### 基本要求
			
 
				+
			
 
				+- **格式**：Excel 文件（.xlsx / .xls）或 CSV 文件（.csv）
			
 
				+- **编码**：UTF-8（推荐），CSV 文件自动编码检测（支持 utf-8 / gbk / latin-1 等）
			
 
				+- **文件大小**：建议单文件不超过 50MB
			
 
				+- **行数**：支持数百～数十万行
			
 
				+
			
 
				+### 列类型建议
			
 
				+
			
 
				+引擎通过 `data_profiler.py` 自动检测每列的数据角色，支持以下类型：
			
 
				+
			
 
				+| 角色 | 检测方式 | 适用场景 |
			
 
				+|------|---------|---------|
			
 
				+| **数值列** | dtype 判断 + 列名关键词 + 值分布分析 | 销售额、台数、金额、数量、评分等可计算指标 |
			
 
				+| **分类列** | 唯一值基数 + 列名关键词 + 值内容分析 | 国家、状态、类型、部门、负责人等分组维度 |
			
 
				+| **时间列** | dtype 判断 + 列名关键词 + 日期格式解析 | 日期、时间戳、月份等时间序列维度 |
			
 
				+| **ID 列** | 列名关键词 + 值模式（邮箱/电话/长数字串） | 客户编号、订单号、合同号、邮箱等标识字段 |
			
 
				+| **文本列** | 高基数文本 + 列名关键词 | 备注、描述、地址、摘要等非结构化内容 |
			
 
				+| **布尔列** | 值内容（是/否、true/false、0/1） | 开关状态、是否标记等二值字段 |
			
 
				+
			
 
				+> 引擎支持**值内容驱动的自动识别**：当列名无法明确判断时，通过采样分析列中实际数据内容（如检测到 >50% 的邮箱格式则自动归类为 ID 列，检测到是/否值则归类为分类列）来提高识别准确率。
			
 
				+
			
 
				+## 自动推断能力
			
 
				+
			
 
				+`data_profiler.py` 是数据探查核心引擎，提供以下自动推断能力：
			
 
				+
			
 
				+### 1. 列类型与角色推断
			
 
				+
			
 
				+| 分析维度 | 检测内容 |
			
 
				+|---------|---------|
			
 
				+| 列类型推断 | 数值列（int/float）、分类列（低基数）、时间列、文本列、ID 列、布尔列 |
			
 
				+| 值模式分析 | 百分比值、二值（是/否）、序数值（高/中/低）、电话格式、邮箱格式、URL、纯数字ID |
			
 
				+| 语义关键词匹配 | 通过列名关键词推断业务含义（支持中英文，覆盖 100+ 关键词） |
			
 
				+
			
 
				+### 2. 统计特征提取
			
 
				+
			
 
				+| 分析维度 | 检测内容 |
			
 
				+|---------|---------|
			
 
				+| 基础统计 | count、sum、mean、median、min、max、std |
			
 
				+| 分位数 | p25（下四分位）、p75（上四分位） |
			
 
				+| 分布形态 | 偏度（skewness）、峰度（kurtosis）、偏态方向、变差系数（CV） |
			
 
				+| 集中度分析 | 高度集中（CV<0.3）/ 中度集中 / 适度分散 / 高度分散 |
			
 
				+
			
 
				+### 3. 分类维度分析
			
 
				+
			
 
				+| 分析维度 | 检测内容 |
			
 
				+|---------|---------|
			
 
				+| 基数统计 | 唯一值数量、占比 |
			
 
				+| 集中度 | Herfindahl-Hirschman 指数（HHI），识别头部集中度 |
			
 
				+| 分布描述 | Top-N 项及其占比 |
			
 
				+
			
 
				+### 4. 数据质量评估
			
 
				+
			
 
				+| 评分维度 | 权重 | 检测内容 |
			
 
				+|---------|------|---------|
			
 
				+| 完整性 | 30% | 缺失率综合评分，高缺失列标识（>30%） |
			
 
				+| 数值健康度 | 25% | 异常值比例、负值检查、零值过多检查 |
			
 
				+| 唯一性 | 20% | ID 列的识别和覆盖度 |
			
 
				+| 时间一致性 | 15% | 时间范围的合法性和有序性 |
			
 
				+| 分类完整性 | 10% | 分类列缺失比例 |
			
 
				+
			
 
				+### 5. 衍生关系检测
			
 
				+
			
 
				+引擎自动检测数值列之间的潜在算术关系：
			
 
				+
			
 
				+| 关系类型 | 示例 | 用途 |
			
 
				+|---------|------|------|
			
 
				+| 减法关系 | `A - B ≈ C` | 发现派生指标（如：总需求 - 已下单 = 未下单） |
			
 
				+| 加法关系 | `A + B ≈ C` | 发现总和关系 |
			
 
				+| 比例关系 | `A / B ≈ 常数` | 发现固定比率（如：转化率、占比） |
			
 
				+
			
 
				+### 6. 数据问题检测
			
 
				+
			
 
				+`detect_data_issues()` 自动扫描：
			
 
				+
			
 
				+- **高缺失率列**（>50%）— 建议排除或补全
			
 
				+- **中度缺失列**（>10%）— 提示关注
			
 
				+- **异常值**（IQR 3倍以外）— 标识离群点
			
 
				+- **负值** — 对非负指标列进行标记
			
 
				+- **常量列** — 仅 1 个唯一值，对分析无贡献
			
 
				+
			
 
				+## 数据加载
			
 
				+
			
 
				+`data_loader.py` 提供自动格式检测、编码识别、智能清洗等通用加载能力。
			
 
				+
			
 
				+### 自动格式检测
			
 
				+
			
 
				+`auto_detect_file_format()` 根据扩展名自动识别：
			
 
				+
			
 
				+| 格式 | 扩展名 | 支持说明 |
			
 
				+|------|--------|---------|
			
 
				+| Excel (.xlsx) | `.xlsx` | 标准 Excel 格式，主力支持 |
			
 
				+| Excel 97 (.xls) | `.xls` | 兼容模式，如遇读取错误建议另存为 .xlsx |
			
 
				+| CSV | `.csv` | 自动编码检测（utf-8 → gbk → latin-1 等逐级尝试） |
			
 
				+
			
 
				+### 核心加载函数
			
 
				+
			
 
				+```python
			
 
				+from scripts.data_loader import (
			
 
				+    load_generic_excel,
			
 
				+    load_generic_all_sheets,
			
 
				+    load_generic_csv,
			
 
				+    auto_detect_date_column,
			
 
				+    load_generic_file_info,
			
 
				+    normalize_column_names,
			
 
				+)
			
 
				+
			
 
				+# 加载主表（自动识别 xlsx/xls/csv，自动清洗）
			
 
				+df = load_generic_excel('任意数据文件.xlsx')
			
 
				+df = load_generic_excel('任意数据文件.csv', encoding='gbk')  # CSV 可指定编码
			
 
				+df = load_generic_excel('data.xlsx', sheet_name='Sheet1')    # 指定 sheet
			
 
				+
			
 
				+# 合并所有 Sheet（Excel 文件）
			
 
				+df_all = load_generic_all_sheets('多sheet文件.xlsx')
			
 
				+
			
 
				+# 直接加载 CSV（带自动编码检测）
			
 
				+df_csv = load_generic_csv('data.csv')
			
 
				+
			
 
				+# 轻量文件信息（不加载全量数据）
			
 
				+info = load_generic_file_info('data.xlsx')
			
 
				+# 返回: {format, sheet_names, sheet_count, file_size_mb}
			
 
				+```
			
 
				+
			
 
				+### 智能清洗特性
			
 
				+
			
 
				+`load_generic_excel()` 与 `_clean_generic_dataframe()` 自动执行：
			
 
				+
			
 
				+| 清洗步骤 | 说明 |
			
 
				+|---------|------|
			
 
				+| 去除全空行列 | `dropna(how='all')` 清除完全为空的行和列 |
			
 
				+| 去除 Unnamed 列 | 自动过滤 pandas 自动生成的 Unnamed 列 |
			
 
				+| **列名规范化** | 全角括号→半角、去除首尾空格、统一空白字符 |
			
 
				+| **汇总行自动去除** | 自动检测底部"合计/总计/小计/total/sum"等汇总行并移除 |
			
 
				+| 空首尾行清理 | 检测并裁剪前导和尾随的完全空行 |
			
 
				+| 日期智能解析 | object 类型列尝试 `pd.to_datetime()`，成功 >70% 则转换 |
			
 
				+| 数值智能解析 | object 类型列尝试 `pd.to_numeric()`，成功 >70% 则转换 |
			
 
				+
			
 
				+> 注意：`load_generic_all_sheets()` 会给每行添加 `_source_sheet` 列标记来源 sheet。
			
 
				+
			
 
				+### 列名规范化示例
			
 
				+
			
 
				+```python
			
 
				+from scripts.data_loader import normalize_column_names
			
 
				+
			
 
				+# 全角括号 → 半角：   描述（国家+车型+台数+交期） → 描述(国家+车型+台数+交期)
			
 
				+# 首尾空格去除：         "  客户姓名  "           → "客户姓名"
			
 
				+# 换行符替换：           "客户\n姓名"             → "客户 姓名"
			
 
				+```
			
 
				+
			
 
				+## 数据探查
			
 
				+
			
 
				+```python
			
 
				+from scripts.data_profiler import profile_dataframe, detect_data_issues
			
 
				+
			
 
				+# 自动探查数据结构
			
 
				+profile = profile_dataframe(df)
			
 
				+
			
 
				+# 检测数据问题
			
 
				+issues = detect_data_issues(df)
			
 
				+
			
 
				+# 生成探索摘要
			
 
				+from scripts.data_profiler import generate_summary_text
			
 
				+print(generate_summary_text(profile))
			
 
				+
			
 
				+# 分类分布细化分析
			
 
				+from scripts.data_profiler import profile_category_distribution, profile_numeric_series
			
 
				+dist = profile_category_distribution(df, '客户类型')       # 分类分布（含 HHI 集中度）
			
 
				+stats = profile_numeric_series(df, '总需求台数')           # 数值详细统计（含分布形态）
			
 
				+```
			
 
				+
			
 
				+## 配置驱动的指标计算
			
 
				+
			
 
				+```python
			
 
				+from scripts.metrics_calculator import (
			
 
				+    calc_generic_metrics,
			
 
				+    calc_generic_trend,
			
 
				+    calc_generic_distribution,
			
 
				+    calc_generic_ranking,
			
 
				+    generate_generic_insights,
			
 
				+)
			
 
				+from scripts.report_config import ReportConfig, MetricDef
			
 
				+
			
 
				+# 按配置计算指标
			
 
				+metrics = calc_generic_metrics(df, config)
			
 
				+
			
 
				+# 按指定列计算趋势
			
 
				+trend = calc_generic_trend(df, '日期列', '数值列')
			
 
				+
			
 
				+# 分布分析
			
 
				+dist = calc_generic_distribution(df, '分类列', '数值列')
			
 
				+
			
 
				+# 排行分析
			
 
				+ranking = calc_generic_ranking(df, '排行维度列', '数值列')
			
 
				+
			
 
				+# 智能洞察生成
			
 
				+insights = generate_generic_insights(profile, metrics)
			
 
				+```
			
 
				+
			
 
				+## 推荐配置生成
			
 
				+
			
 
				+```python
			
 
				+from scripts.agent_analyzer import analyze_and_recommend
			
 
				+
			
 
				+# 根据数据探查结果自动推荐指标集和页面结构
			
 
				+recommendations = analyze_and_recommend(profile, period_type)
			
 
				+```
			
 
				+
			
 
				+## 完整工作流示例
			
 
				+
			
 
				+```python
			
 
				+# 1. 加载数据
			
 
				+df = load_generic_excel('data.xlsx')
			
 
				+
			
 
				+# 2. 数据探查
			
 
				+profile = profile_dataframe(df)
			
 
				+
			
 
				+# 3. 自动推荐
			
 
				+recs = analyze_and_recommend(profile)
			
 
				+
			
 
				+# 4. 构建配置（可人工确认调整）
			
 
				+config = ReportConfig(
			
 
				+    title='数据分析报告',
			
 
				+    metrics=[...],  # 从 recs['suggested_metrics'] 选取
			
 
				+    pages=[...],    # 从 recs['suggested_pages'] 选取
			
 
				+)
			
 
				+
			
 
				+# 5. 计算指标
			
 
				+metrics = calc_generic_metrics(df, config)
			
 
				+
			
 
				+# 6. 生成 PPT
			
 
				+output_path, issues = quality_assured_build(DATA_FILE, config, OUTPUT_FILE)
			
 
				+```
			
--- a/generate-data-report-ppt/references/quality-standards.md
+++ b/generate-data-report-ppt/references/quality-standards.md
@@ -0,0 +1,244 @@
 
				+# PPT 生成质量强制规范
			
 
				+
			
 
				+> **核心原则**：生成的每一页 PPT 都必须包含**数据可视化图表 + 深度分析文本**，
			
 
				+> 严禁出现空页、纯图表页、纯文字页。质量检查不合格的页面必须自动修复或回退重建。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 一、页面内容完整性强制标准
			
 
				+
			
 
				+### 1.1 每页必须包含的元素（MINIMUM_REQUIRED）
			
 
				+
			
 
				+| 页面类型 | 至少图表数 | 至少分析段数 | 每段最少字数 |
			
 
				+|---------|-----------|------------|------------|
			
 
				+| KPI概览页 | 6个KPI卡片 | **4段** | 150字/段 |
			
 
				+| 趋势分析页 | 1个趋势图 | **4段** | 150字/段 |
			
 
				+| 分布分析页 | 1个图表（环形/柱状） | **3段** | 150字/段 |
			
 
				+| 排行分析页 | 1个条形图 | **4段** | 150字/段 |
			
 
				+| 总结/建议页 | 无硬性要求 | **4段** | 150字/段 |
			
 
				+| 封面/目录/尾页 | 无硬性要求 | 1段简介 | 30字/段 |
			
 
				+
			
 
				+### 1.2 分析文本深度要求
			
 
				+
			
 
				+分析文本必须包含**具体数值引用**和**业务洞察建议**，不得是泛泛概括：
			
 
				+
			
 
				+| 分析维度 | 必须包含的内容 |
			
 
				+|---------|-------------|
			
 
				+| 数据引用 | 引用具体数值（含单位），如"XXX 台"、"占比 XX%"、"增长 XX%" |
			
 
				+| 对比分析 | 与同类/历史/目标进行对比，说明高低/好坏 |
			
 
				+| 原因解读 | 对数据背后的原因进行分析（至少 2 条可能原因） |
			
 
				+| 业务建议 | 给出可执行的业务行动建议（不空泛说"加强"、"优化"） |
			
 
				+
			
 
				+### 1.3 页面为空判定（CRITICAL）
			
 
				+
			
 
				+以下任一种情况判定为**页面为空**，严重级别 **critical**：
			
 
				+
			
 
				+- 页面内所有文本框总字数 < 50 字
			
 
				+- 页面没有任何图表（shape_type 为 chart 的元素数为 0）
			
 
				+- 页面有图表但无分析文本（图表下方/右侧无 insight 文本块）
			
 
				+- 页面所有文本均为占位符替换后的默认文本（如标题"数据详情"无实质内容）
			
 
				+- 页面仅有一行文字（如仅有标题没有正文）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 二、图表完整性强制标准
			
 
				+
			
 
				+### 2.1 图表必须可读
			
 
				+
			
 
				+- 图表标题不可为空
			
 
				+- 坐标轴标签必须显示完整（不被截断）
			
 
				+- 数据系列颜色必须与主题配色一致
			
 
				+- 图表尺寸必须占据合理区域（宽度 ≥ 页面宽度的 35%）
			
 
				+
			
 
				+### 2.2 图表与分析文本的关联
			
 
				+
			
 
				+- 图表中的关键数据必须在右侧/下方分析文本中被引用
			
 
				+- 分析文本中的数值必须与图表数据一致
			
 
				+- 图表和分析文本必须在视觉上属于同一页（不能有割裂感）
			
 
				+
			
 
				+### 2.3 图表尺寸下限
			
 
				+
			
 
				+| 图表类型 | 最小宽度 | 最小高度 |
			
 
				+|---------|---------|---------|
			
 
				+| 趋势折线图 | SLIDE_WIDTH × 0.40 | SLIDE_HEIGHT × 0.35 |
			
 
				+| 分布环形图/饼图 | SLIDE_WIDTH × 0.30 | SLIDE_HEIGHT × 0.30 |
			
 
				+| 排行条形图 | SLIDE_WIDTH × 0.45 | SLIDE_HEIGHT × 0.45 |
			
 
				+| KPI 卡片 | SLIDE_WIDTH × 0.28（单张） | SLIDE_HEIGHT × 0.15（单张） |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 三、布局合理性强制标准
			
 
				+
			
 
				+### 3.1 安全区域
			
 
				+
			
 
				+所有元素必须位于以下安全区域内：
			
 
				+
			
 
				+- 左边界 ≥ CONTENT_LEFT (Emu(762000) ≈ 2cm)
			
 
				+- 右边界 ≤ SLIDE_WIDTH - Emu(762000)
			
 
				+- 上边界 ≥ Emu(1524000)（避开页眉导航栏）
			
 
				+- 下边界 ≤ SLIDE_HEIGHT - Emu(700000)（避开页脚区域）
			
 
				+
			
 
				+### 3.2 图文重叠检测
			
 
				+
			
 
				+- 任意两个形状的重叠面积 > 任一形状面积的 10% 则判定为重叠
			
 
				+- 标题文字与导航标签的重叠豁免
			
 
				+- 必须检测 chart 与 textbox 的重叠
			
 
				+
			
 
				+### 3.3 填充率
			
 
				+
			
 
				+- 内容填充率 < 20% → **critical**（页面基本为空）
			
 
				+- 内容填充率 < 35% → **major**（留白严重）
			
 
				+- 内容填充率 < 50% → **minor**（留白偏多）
			
 
				+- 内容填充率 ≥ 65% → 合格
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 四、视觉一致性强制标准
			
 
				+
			
 
				+### 4.1 字体规范
			
 
				+
			
 
				+- 中文字体：微软雅黑
			
 
				+- 数字/英文字体：Arial
			
 
				+- 标题字号：24pt - 32pt
			
 
				+- 正文字号：11pt - 14pt
			
 
				+- KPI 数值字号：28pt - 36pt
			
 
				+- 同一页面字体种类 ≤ 2 种
			
 
				+
			
 
				+### 4.2 颜色规范
			
 
				+
			
 
				+- 主色：主题 primary 色
			
 
				+- 强调色：主题 accent 色
			
 
				+- 图表系列色：主题 series 色板
			
 
				+- 正文色：#333333
			
 
				+- 次要文字色：#666666
			
 
				+- 背景色：#FFFFFF
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 五、六项用户确认的强制校验
			
 
				+
			
 
				+### 5.1 报告周期与页数范围
			
 
				+
			
 
				+| 校验项 | 说明 |
			
 
				+|--------|------|
			
 
				+| 封面日期 | 必须与用户确认的周期一致 |
			
 
				+| 总页数 | 必须在用户确认的范围内（允许 ±1 页） |
			
 
				+| 目录页 | 若页数 ≥ 5 则必须包含目录页 |
			
 
				+
			
 
				+### 5.2 核心指标集
			
 
				+
			
 
				+- 每个 KPI 指标必须在 KPI 概览页出现
			
 
				+- 每个 KPI 的值必须来自数据计算而非硬编码
			
 
				+- KPI 卡片不得超过 6 个（超出则合并或精简）
			
 
				+
			
 
				+### 5.3 受众与决策场景
			
 
				+
			
 
				+- 分析文本的语言风格必须匹配受众（管理层 → 结论优先，执行层 → 细节优先）
			
 
				+- 建议内容必须对应决策场景
			
 
				+
			
 
				+### 5.4 视觉风格与配色方向
			
 
				+
			
 
				+- 每页遵循相同的主题配色
			
 
				+- 不允许出现硬编码的颜色值（必须从 theme 获取）
			
 
				+
			
 
				+### 5.5 页面结构与模板方案
			
 
				+
			
 
				+- 实际生成的页面类型和顺序必须与用户确认的 pages 列表一致
			
 
				+- 不允许跳过任何用户选定的页面（除非数据不支持且已告警）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 六、自检修复迭代标准
			
 
				+
			
 
				+### 6.1 迭代退出条件
			
 
				+
			
 
				+| 条件 | 说明 |
			
 
				+|------|------|
			
 
				+| 无任何 critical 级别问题 | 必须满足 |
			
 
				+| 无任何 major 级别问题 | 必须满足 |
			
 
				+| minor 级别问题 ≤ 2 个 | 强烈推荐 |
			
 
				+| 质量评分 ≥ 85 | 必须满足 |
			
 
				+
			
 
				+### 6.2 修复策略
			
 
				+
			
 
				+| 问题类型 | 修复方式 |
			
 
				+|---------|---------|
			
 
				+| 页面为空 | **不允许简单添加提示文字"建议补充"**，必须回退重建页面，调用 build 函数重新生成完整分析内容 |
			
 
				+| 图文重叠 | 重新计算位置，向下/右偏移；若空间不足则缩小图表 |
			
 
				+| 飞出页面 | 裁剪到安全区域内，必要时缩放 |
			
 
				+| 分析文本过短 | 扩写分析文本，补充数据引用和业务洞察 |
			
 
				+| 图表无数据 | 检查数据来源，使用备用数据列或降级为表格 |
			
 
				+| 占位符未替换 | 清空未替换的占位符文本 |
			
 
				+
			
 
				+### 6.3 最大迭代次数
			
 
				+
			
 
				+- 默认最大迭代 5 次
			
 
				+- 若第 5 次仍有 critical 问题，**必须报错**，不允许输出不合格 PPT
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 七、禁止事项（DO NOT）
			
 
				+
			
 
				+1. **禁止** 生成只有图表没有分析文本的页面
			
 
				+2. **禁止** 生成只有分析文本没有图表的分析页（KPI/趋势/分布/排行页）
			
 
				+3. **禁止** 分析文本中出现"暂无数据"、"数据不足"作为唯一内容（必须挖掘现有数据维度）
			
 
				+4. **禁止** 在页面空白处简单添加"建议补充图表"的提示文字代替实际内容
			
 
				+5. **禁止** 使用 placeholder 文本（如 `{page_title}` 未替换）
			
 
				+6. **禁止** 任何元素飞出或紧贴页面边缘（安全边距≥2cm）
			
 
				+7. **禁止** 在总结页仅列出不足 3 条建议
			
 
				+8. **禁止** 分析文本使用模糊措辞如"要加强"、"进一步优化"（必须具体可执行）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 八、理想 PPT 页面范例
			
 
				+
			
 
				+### KPI 概览页 范例
			
 
				+
			
 
				+```
			
 
				+┌──────────────────────────────────────────────────────┐
			
 
				+│  KPI1: 总需求 1,250 台   KPI2: 已下单 780 台         │
			
 
				+│  KPI3: 转化率 62.4%      KPI4: 覆盖客户 93 个          │
			
 
				+│  KPI5: 支持闭环率 78%    KPI6: 覆盖国家 96 个          │
			
 
				+├──────────────────────────────────────────────────────┤
			
 
				+│  需求总量与转化结构                                    │
			
 
				+│  本期客户意向项目总需求台数为 1,250 台，其中累计已下单    │
			
 
				+│  780 台（占比 62.4%），未下单 470 台（占比 37.6%），    │
			
 
				+│  下单转化率 62.4%...（≥150字深度分析）                  │
			
 
				+├──────────────────────────────────────────────────────┤
			
 
				+│  客户覆盖与服务广度                                    │
			
 
				+│  本期覆盖客户 93 个，涉及 96 个意向国家...              │
			
 
				+├──────────────────────────────────────────────────────┤
			
 
				+│  跨部门支持闭环效率                                    │
			
 
				+│  支持需求闭环率 78%，...                               │
			
 
				+├──────────────────────────────────────────────────────┤
			
 
				+│  未下单需求跟进策略                                    │
			
 
				+│  当前未下单 470 台，...                                │
			
 
				+└──────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+### 分布分析页 范例
			
 
				+
			
 
				+```
			
 
				+┌───────────────────────┬──────────────────────────────┐
			
 
				+│                       │  意向级别分布概况              │
			
 
				+│    环形图              │  共有 6 个不同的意向级别...    │
			
 
				+│    (左 55%)           │                              │
			
 
				+│                       │  排名第一: A级                │
			
 
				+│                       │  A级以 450 台（占比 36%）...   │
			
 
				+│                       │                              │
			
 
				+│                       │  长尾分布特征                  │
			
 
				+│                       │  前三名累计占比 72%...         │
			
 
				+└───────────────────────┴──────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 九、质检流程集成
			
 
				+
			
 
				+### Agent 在生成 PPT 前必须读取本文档
			
 
				+
			
 
				+Agent 在调用 `quality_assured_build` 之前必须：
			
 
				+1. 读取本文档了解质量标准
			
 
				+2. 将本文档的质量要求映射到 `QualityRules` 参数中
			
 
				+3. 确保 `ReportConfig` 的 `quality_threshold` ≥ 85
			
 
				+4. 确保 `ReportConfig` 的 `max_fix_iterations` ≥ 5
			
--- a/generate-data-report-ppt/references/report-structures.md
+++ b/generate-data-report-ppt/references/report-structures.md
@@ -82,3 +82,29 @@
 
				 | `{page_num}` | 内容页底部 | 页码 |
			
 
				 | `{kpiN_label}` / `{kpiN_value}` | 封面/尾页KPI卡片 | 第N个指标的标签和数值 |
			
 
				 | `{chapterN_title}` / `{chapterN_desc}` | 目录页 | 第N章标题和描述 |
			
 
				+
			
 
				+
			
 
				+## 通用报告页面结构（新增）
			
 
				+
			
 
				+通用构建器支持动态页面结构，通过 `ReportConfig.pages` 配置，无需硬编码。
			
 
				+
			
 
				+### 支持的页面类型
			
 
				+
			
 
				+| page_type | 用途 | 布局模板 |
			
 
				+|-----------|------|---------|
			
 
				+| `cover` | 封面页 | 固定封面布局 |
			
 
				+| `toc` | 目录页 | 章节目录网格 |
			
 
				+| `kpi_overview` | 核心指标概览 | KPI 卡片网格（3×2 / 自定义行列） |
			
 
				+| `trend` | 趋势分析 | 左侧趋势图 + 右侧洞察文本 |
			
 
				+| `distribution` | 分布分析 | 左侧图表 + 右侧洞察文本 |
			
 
				+| `ranking` | 排行分析 | 左侧条形图 + 右侧排行说明 |
			
 
				+| `summary` | 总结与建议 | 全宽洞察文本块 |
			
 
				+| `end` | 结束页 | 固定尾页布局 |
			
 
				+
			
 
				+### 页面确认项
			
 
				+
			
 
				+用户需确认每页的：
			
 
				+1. 页面标题（如"月度销售额趋势"）
			
 
				+2. 结论标题（用于导航标签和洞察总结）
			
 
				+3. 图表类型（BAR / LINE / PIE / DOUGHNUT / TABLE / AUTO）
			
 
				+4. 布局模板（chart_left / two_column / full_width / kpi_grid）
			
--- a/generate-data-report-ppt/references/visual-style-guide.md
+++ b/generate-data-report-ppt/references/visual-style-guide.md
@@ -53,3 +53,38 @@
 
				 - **KPI卡片**：圆角矩形（ROUNDED_RECTANGLE），填充 `#E7F0F7`，无边框
			
 
				 - **告警卡片**：矩形，左侧带 50800 EMU 宽度的色条（严重=红色/警告=橙色/关注=蓝色）
			
 
				 - **分隔线**：高度 0-50800 EMU 的矩形，填充 `#D9D9D9` 或 `#2E5B8B`
			
 
				+
			
 
				+
			
 
				+## 多主题配色方案（新增）
			
 
				+
			
 
				+通用报告支持 5 套预设主题，可通过 `ReportConfig.theme` 选择。
			
 
				+
			
 
				+| 主题ID | 名称 | 主色 | 辅色 | 强调色 | 风格描述 |
			
 
				+|--------|------|------|------|--------|---------|
			
 
				+| `business_classic` | 商务经典 | `#1E3A5F` | `#10B981` | `#5B9BD5` | 深海蓝主调，稳重大气，适合正式汇报 |
			
 
				+| `fresh_minimal` | 清新简约 | `#059669` | `#34D399` | `#F59E0B` | 翠绿主调，清爽现代，适合创新团队 |
			
 
				+| `tech_blue` | 科技蓝调 | `#2563EB` | `#06B6D4` | `#8B5CF6` | 科技蓝主调，年轻动感，适合数字化报告 |
			
 
				+| `warm_orange` | 暖橙活力 | `#EA580C` | `#F97316` | `#EAB308` | 暖橙主调，温暖亲和，适合运营报告 |
			
 
				+| `dark_pro` | 暗夜深邃 | `#0F172A` | `#38BDF8` | `#818CF8` | 深色主调，高端神秘，适合战略报告 |
			
 
				+
			
 
				+### 自定义配色
			
 
				+
			
 
				+```python
			
 
				+from scripts.report_config import ReportConfig
			
 
				+
			
 
				+config = ReportConfig(
			
 
				+    custom_colors={
			
 
				+        'primary': '#4F46E5',
			
 
				+        'secondary': '#EC4899',
			
 
				+        'accent': '#F59E0B',
			
 
				+    }
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+### 质量自检视觉效果
			
 
				+
			
 
				+- 检测字体是否超过 3 种，超过自动统一为微软雅黑
			
 
				+- 字号 < 8pt 时自动提升至 8pt，> 60pt 时自动降至 60pt
			
 
				+- 元素飞出页面边界时自动推回安全区域
			
 
				+- 页面填充率 < 25% 时追加补充建议文本框
			
 
				+- 图文重叠时自动错开位置
			
--- a/generate-data-report-ppt/scripts/agent_analyzer.py
+++ b/generate-data-report-ppt/scripts/agent_analyzer.py
@@ -0,0 +1,360 @@
 
				+"""
			
 
				+Agent analyzer: intelligent analysis of data profile to generate
			
 
				+recommendations for metrics, pages, charts, and overall report structure.
			
 
				+Uses rule-based heuristics for analysis and generates structured recommendations.
			
 
				+"""
			
 
				+from report_config import (
			
 
				+    MetricDef, PageDef, MetricType, AggregationType, ChartType,
			
 
				+    PeriodType, ColumnRole
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def analyze_and_recommend(profile: dict, period_type: PeriodType = PeriodType.MONTHLY) -> dict:
			
 
				+    recommendations = {
			
 
				+        'suggested_metrics': _recommend_metrics(profile),
			
 
				+        'suggested_pages': _recommend_pages(profile, period_type),
			
 
				+        'suggested_period': period_type.value,
			
 
				+        'suggested_page_range': (6, 15),
			
 
				+        'data_summary': _build_summary(profile),
			
 
				+        'chart_mapping': _build_chart_mapping(profile),
			
 
				+        'analysis_notes': _build_analysis_notes(profile),
			
 
				+    }
			
 
				+    recommendations.update(_suggest_period_and_range(profile))
			
 
				+    return recommendations
			
 
				+
			
 
				+
			
 
				+def _recommend_metrics(profile: dict) -> list[dict]:
			
 
				+    metrics = []
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+
			
 
				+    for i, col in enumerate(num_cols):
			
 
				+        ns = col.get('numeric_stats', {}) or {}
			
 
				+        label = col.get('inferred_label', col['column_name'])
			
 
				+        unit = _infer_unit(col['column_name'])
			
 
				+        is_primary = i < 4
			
 
				+
			
 
				+        metrics.append({
			
 
				+            'name': f"{label}_{col['column_name']}",
			
 
				+            'label': label,
			
 
				+            'column': col['column_name'],
			
 
				+            'aggregation': 'sum',
			
 
				+            'metric_type': 'kpi',
			
 
				+            'unit': unit,
			
 
				+            'selected': is_primary,
			
 
				+            'is_primary': is_primary,
			
 
				+            'sample_value': ns.get('sum', 0),
			
 
				+        })
			
 
				+
			
 
				+        if len(num_cols) <= 4 and ns.get('sum', 0) > 100:
			
 
				+            metrics.append({
			
 
				+                'name': f"日均{label}",
			
 
				+                'label': f"日均{label}",
			
 
				+                'column': col['column_name'],
			
 
				+                'aggregation': 'avg',
			
 
				+                'metric_type': 'kpi',
			
 
				+                'unit': unit,
			
 
				+                'selected': False,
			
 
				+                'is_primary': False,
			
 
				+                'sample_value': ns.get('mean', 0),
			
 
				+            })
			
 
				+
			
 
				+    if cat_cols:
			
 
				+        top_cat = cat_cols[0]
			
 
				+        metrics.append({
			
 
				+            'name': f"覆盖{top_cat['inferred_label']}数",
			
 
				+            'label': f"覆盖{top_cat['inferred_label']}数",
			
 
				+            'column': top_cat['column_name'],
			
 
				+            'aggregation': 'distinct_count',
			
 
				+            'metric_type': 'kpi',
			
 
				+            'unit': '个',
			
 
				+            'selected': True,
			
 
				+            'is_primary': False,
			
 
				+            'sample_value': top_cat.get('unique_count', 0),
			
 
				+        })
			
 
				+
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+def _recommend_pages(profile: dict, period_type: PeriodType) -> list[dict]:
			
 
				+    pages = []
			
 
				+    order = 0
			
 
				+
			
 
				+    pages.append({
			
 
				+        'page_id': 'cover',
			
 
				+        'title': '封面',
			
 
				+        'page_type': 'cover',
			
 
				+        'order': order,
			
 
				+        'selected': True,
			
 
				+        'elements': [],
			
 
				+        'conclusion_title': '',
			
 
				+    })
			
 
				+    order += 1
			
 
				+
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    if period_type in (PeriodType.MONTHLY, PeriodType.QUARTERLY):
			
 
				+        pages.append({
			
 
				+            'page_id': 'toc',
			
 
				+            'title': '目录',
			
 
				+            'page_type': 'toc',
			
 
				+            'order': order,
			
 
				+            'selected': True,
			
 
				+            'elements': [],
			
 
				+        })
			
 
				+        order += 1
			
 
				+
			
 
				+    pages.append({
			
 
				+        'page_id': 'kpi_overview',
			
 
				+        'title': '核心指标概览',
			
 
				+        'page_type': 'kpi_overview',
			
 
				+        'order': order,
			
 
				+        'selected': True,
			
 
				+        'elements': [{'type': 'kpi_cards', 'count': min(6, len(num_cols))}],
			
 
				+        'conclusion_title': '核心指标概览',
			
 
				+    })
			
 
				+    order += 1
			
 
				+
			
 
				+    time_cols = profile.get('time_columns', [])
			
 
				+    if time_cols and num_cols:
			
 
				+        top_num = num_cols[0]
			
 
				+        pages.append({
			
 
				+            'page_id': 'trend',
			
 
				+            'title': f'{top_num["inferred_label"]}趋势',
			
 
				+            'page_type': 'trend',
			
 
				+            'order': order,
			
 
				+            'selected': True,
			
 
				+            'elements': [
			
 
				+                {'type': 'line_chart', 'metric': top_num['column_name'],
			
 
				+                 'dimension': time_cols[0]['column_name'], 'title': f'{top_num["inferred_label"]}趋势'}
			
 
				+            ],
			
 
				+            'conclusion_title': f'{top_num["inferred_label"]}趋势',
			
 
				+        })
			
 
				+        order += 1
			
 
				+
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+    if cat_cols and num_cols:
			
 
				+        top_cat = cat_cols[0]
			
 
				+        top_num = num_cols[0]
			
 
				+        pages.append({
			
 
				+            'page_id': 'distribution',
			
 
				+            'title': f'{top_cat["inferred_label"]}分布',
			
 
				+            'page_type': 'distribution',
			
 
				+            'order': order,
			
 
				+            'selected': True,
			
 
				+            'elements': [
			
 
				+                {'type': 'doughnut_chart', 'metric': top_num['column_name'],
			
 
				+                 'dimension': top_cat['column_name'], 'title': f'{top_cat["inferred_label"]}占比'}
			
 
				+            ],
			
 
				+            'conclusion_title': f'{top_cat["inferred_label"]}分布',
			
 
				+        })
			
 
				+        order += 1
			
 
				+
			
 
				+        if len(cat_cols) >= 2:
			
 
				+            cat2 = cat_cols[1] if len(cat_cols) > 1 else cat_cols[0]
			
 
				+            pages.append({
			
 
				+                'page_id': 'ranking',
			
 
				+                'title': f'{cat2["inferred_label"]}排行',
			
 
				+                'page_type': 'ranking',
			
 
				+                'order': order,
			
 
				+                'selected': True,
			
 
				+                'elements': [
			
 
				+                    {'type': 'bar_chart', 'metric': num_cols[0]['column_name'],
			
 
				+                     'dimension': cat2['column_name'], 'title': f'{cat2["inferred_label"]}TOP排行'}
			
 
				+                ],
			
 
				+                'conclusion_title': f'{cat2["inferred_label"]}TOP排行',
			
 
				+            })
			
 
				+            order += 1
			
 
				+
			
 
				+    pages.append({
			
 
				+        'page_id': 'summary',
			
 
				+        'title': '总结与建议',
			
 
				+        'page_type': 'summary',
			
 
				+        'order': order,
			
 
				+        'selected': True,
			
 
				+        'elements': [{'type': 'insight_block', 'title': '总结与建议'}],
			
 
				+        'conclusion_title': '总结与建议',
			
 
				+    })
			
 
				+    order += 1
			
 
				+
			
 
				+    pages.append({
			
 
				+        'page_id': 'end',
			
 
				+        'title': '尾页',
			
 
				+        'page_type': 'end',
			
 
				+        'order': order,
			
 
				+        'selected': True,
			
 
				+        'elements': [],
			
 
				+    })
			
 
				+
			
 
				+    return pages
			
 
				+
			
 
				+
			
 
				+def _suggest_period_and_range(profile: dict) -> dict:
			
 
				+    granularity = profile.get('time_granularity', 'monthly')
			
 
				+    dr = profile.get('date_range', (None, None))
			
 
				+
			
 
				+    period_map = {
			
 
				+        'daily': PeriodType.DAILY,
			
 
				+        'weekly': PeriodType.WEEKLY,
			
 
				+        'monthly': PeriodType.MONTHLY,
			
 
				+        'quarterly': PeriodType.QUARTERLY,
			
 
				+        'yearly': PeriodType.MONTHLY,
			
 
				+    }
			
 
				+    suggested = period_map.get(granularity, PeriodType.MONTHLY)
			
 
				+
			
 
				+    page_range_map = {
			
 
				+        'daily': (6, 9),
			
 
				+        'weekly': (7, 11),
			
 
				+        'monthly': (8, 14),
			
 
				+        'quarterly': (10, 18),
			
 
				+        'yearly': (12, 20),
			
 
				+    }
			
 
				+    page_range = page_range_map.get(granularity, (8, 14))
			
 
				+
			
 
				+    return {
			
 
				+        'suggested_period': suggested.value,
			
 
				+        'suggested_page_range': page_range,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _build_chart_mapping(profile: dict) -> list[dict]:
			
 
				+    mapping = []
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    time_cols = profile.get('time_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+
			
 
				+    if time_cols and num_cols:
			
 
				+        for nc in num_cols[:3]:
			
 
				+            mapping.append({
			
 
				+                'metric': nc['inferred_label'],
			
 
				+                'metric_col': nc['column_name'],
			
 
				+                'dimension': time_cols[0]['column_name'],
			
 
				+                'dimension_label': '时间',
			
 
				+                'chart_type': ChartType.LINE.value,
			
 
				+                'rationale': f'{nc["inferred_label"]}随时间变化趋势',
			
 
				+            })
			
 
				+
			
 
				+    if cat_cols and num_cols:
			
 
				+        top_num = num_cols[0]
			
 
				+        for cc in cat_cols[:3]:
			
 
				+            chart_type = ChartType.DOUGHNUT.value if cc['unique_count'] <= 8 else ChartType.BAR.value
			
 
				+            mapping.append({
			
 
				+                'metric': top_num['inferred_label'],
			
 
				+                'metric_col': top_num['column_name'],
			
 
				+                'dimension': cc['column_name'],
			
 
				+                'dimension_label': cc['inferred_label'],
			
 
				+                'chart_type': chart_type,
			
 
				+                'rationale': f'{top_num["inferred_label"]}按{cc["inferred_label"]}的分布',
			
 
				+            })
			
 
				+
			
 
				+    return mapping
			
 
				+
			
 
				+
			
 
				+def _build_summary(profile: dict) -> str:
			
 
				+    lines = []
			
 
				+    lines.append(f"数据量: {profile['total_rows']:,} 行 × {profile['total_columns']} 列")
			
 
				+
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+    time_cols = profile.get('time_columns', [])
			
 
				+
			
 
				+    lines.append(f"可计算指标: {len(num_cols)} 个数值列")
			
 
				+    lines.append(f"可分析维度: {len(cat_cols)} 个分类列")
			
 
				+    if time_cols:
			
 
				+        lines.append(f"时间列: {time_cols[0]['column_name']}")
			
 
				+    lines.append(f"数据粒度: {profile.get('time_granularity', 'unknown')}")
			
 
				+
			
 
				+    dr = profile.get('date_range', (None, None))
			
 
				+    if dr[0]:
			
 
				+        lines.append(f"时间范围: {dr[0]} ~ {dr[1]}")
			
 
				+    q = profile.get('data_quality', {})
			
 
				+    lines.append(f"质量评分: {q.get('score', 0)}/100")
			
 
				+
			
 
				+    return '\n'.join(lines)
			
 
				+
			
 
				+
			
 
				+def _build_analysis_notes(profile: dict) -> list[str]:
			
 
				+    notes = []
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+
			
 
				+    if not cat_cols:
			
 
				+        notes.append('数据中缺少分类维度列，报告将以数值汇总为主，建议补充分类字段以增强分析深度。')
			
 
				+
			
 
				+    if len(num_cols) >= 4:
			
 
				+        names = [c['inferred_label'] for c in num_cols[:4]]
			
 
				+        notes.append(f'核心数值指标: {", ".join(names)}')
			
 
				+
			
 
				+    if len(cat_cols) == 1:
			
 
				+        notes.append(f'仅有一个分类维度列 ({cat_cols[0]["inferred_label"]})，报告分析维度较窄。')
			
 
				+    elif len(cat_cols) >= 3:
			
 
				+        names = [c['inferred_label'] for c in cat_cols[:3]]
			
 
				+        notes.append(f'分类维度丰富 ({", ".join(names)})，可支撑多维交叉分析。')
			
 
				+
			
 
				+    q = profile.get('data_quality', {})
			
 
				+    if q.get('score', 100) < 85:
			
 
				+        notes.append(f'数据质量评分偏低 ({q["score"]}/100)，建议在生成前检查缺失值与异常值。')
			
 
				+
			
 
				+    return notes
			
 
				+
			
 
				+
			
 
				+def _infer_unit(col_name: str) -> str:
			
 
				+    col_lower = col_name.lower().strip()
			
 
				+    unit_map = {
			
 
				+        '金额': '元', '销售额': '元', '收入': '元', '利润': '元',
			
 
				+        '成本': '元', '费用': '元', '台数': '台', '件数': '件',
			
 
				+        '数量': '', '人数': '人', '天数': '天', '占比': '%',
			
 
				+        '比率': '%', '比例': '%', '率': '%',
			
 
				+    }
			
 
				+    for kw, unit in unit_map.items():
			
 
				+        if kw in col_lower:
			
 
				+            return unit
			
 
				+    return ''
			
 
				+
			
 
				+
			
 
				+def generate_interaction_prompts(recommendations: dict, profile: dict) -> dict:
			
 
				+    return {
			
 
				+        'period': {
			
 
				+            'question': '报告周期与页数范围',
			
 
				+            'detail': f"建议周期: {recommendations['suggested_period']}报\n建议页数: {recommendations['suggested_page_range'][0]}-{recommendations['suggested_page_range'][1]} 页\n请确认或调整",
			
 
				+        },
			
 
				+        'metrics': {
			
 
				+            'question': '核心指标集',
			
 
				+            'detail': f"检测到 {len(recommendations['suggested_metrics'])} 个可计算指标\n已自动推荐主要的 {min(6, len(recommendations['suggested_metrics']))} 个\n请确认或增删",
			
 
				+        },
			
 
				+        'audience': {
			
 
				+            'question': '受众与决策场景',
			
 
				+            'detail': '请选择: 管理层汇报 | 运营分析会 | 对外客户报告 | 自定义描述',
			
 
				+        },
			
 
				+        'style': {
			
 
				+            'question': '视觉风格与配色方向',
			
 
				+            'detail': '推荐方案: 商务经典(深蓝) | 清新简约(绿色) | 深色专业 | 温暖品牌\n请选择配色方案',
			
 
				+        },
			
 
				+        'pages': {
			
 
				+            'question': '页面结构与模板方案',
			
 
				+            'detail': f'推荐 {len(recommendations["suggested_pages"])} 个页面\n可增删调整页面顺序',
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    profile = {
			
 
				+        'total_rows': 3240,
			
 
				+        'total_columns': 15,
			
 
				+        'numeric_columns': [
			
 
				+            {'column_name': '销售额', 'inferred_label': '销售额', 'numeric_stats': {'sum': 500000, 'mean': 154}},
			
 
				+            {'column_name': '订单量', 'inferred_label': '订单量', 'numeric_stats': {'sum': 3240, 'mean': 1.0}},
			
 
				+            {'column_name': '利润', 'inferred_label': '利润', 'numeric_stats': {'sum': 80000, 'mean': 25}},
			
 
				+        ],
			
 
				+        'category_columns': [
			
 
				+            {'column_name': '区域', 'inferred_label': '区域', 'unique_count': 5},
			
 
				+            {'column_name': '产品', 'inferred_label': '产品', 'unique_count': 12},
			
 
				+        ],
			
 
				+        'time_columns': [{'column_name': '日期', 'inferred_label': '日期'}],
			
 
				+        'time_granularity': 'monthly',
			
 
				+        'date_range': ('2026-01-01', '2026-04-30'),
			
 
				+        'data_quality': {'score': 92},
			
 
				+    }
			
 
				+    recs = analyze_and_recommend(profile, PeriodType.MONTHLY)
			
 
				+    prompts = generate_interaction_prompts(recs, profile)
			
 
				+    for k, v in prompts.items():
			
 
				+        print(f"\n{k}: {v['question']}\n{v['detail']}")
			
--- a/generate-data-report-ppt/scripts/data_loader.py
+++ b/generate-data-report-ppt/scripts/data_loader.py
@@ -1,12 +1,19 @@
 
				 """
			
 
				 Excel data loader for daily/weekly/monthly report generation.
			
 
				+Contains both legacy order-specific loaders and enhanced generic loaders.
			
 
				 """
			
 
				 import pandas as pd
			
 
				 from datetime import datetime, timedelta
			
 
				 import re
			
 
				 import warnings
			
 
				+import os
			
 
				+import io
			
 
				+import csv
			
 
				+
			
 
				+# =====================================================================
			
 
				+# LEGACY SECTION — Order-specific loaders (kept for backward compat)
			
 
				+# =====================================================================
			
 
				 
			
 
				-# Field mapping: Excel column name -> internal field name
			
 
				 FIELD_MAP = {
			
 
				     '序号': 'seq',
			
 
				     '目的国家': 'country',
			
@@ -55,7 +62,6 @@ def _normalize_status(val):
 
				     if pd.isna(val):
			
 
				         return None
			
 
				     s = str(val).strip()
			
 
				-    # Match pattern like "A（合同拟定中）" or "A"
			
 
				     m = re.match(r'^([A-F])', s)
			
 
				     if m:
			
 
				         return m.group(1)
			
@@ -131,28 +137,21 @@ def load_weekly(filepath: str, year: int, week_num: int, week_start_day=0) -> tu
 
				     Returns (current_week_df, prev_week_df).
			
 
				     week_start_day: 0=Monday, 6=Sunday
			
 
				     """
			
 
				-    # Find the first day of the given week
			
 
				-    # Simplified: assume data starts from a known reference
			
 
				     meta = load_workbook_metadata(filepath)
			
 
				     first_date, last_date = meta['date_range']
			
 
				     if first_date is None:
			
 
				         raise ValueError("No valid date sheets found")
			
 
				 
			
 
				-    # Find the Monday of the target week (using ISO week definition)
			
 
				-    # Jan 4 is always in week 1
			
 
				     jan4 = datetime(year, 1, 4)
			
 
				-    # Adjust to Monday
			
 
				     jan4_monday = jan4 - timedelta(days=jan4.weekday())
			
 
				     target_monday = jan4_monday + timedelta(weeks=week_num - 1)
			
 
				     target_sunday = target_monday + timedelta(days=6)
			
 
				 
			
 
				-    # Clamp to available data range
			
 
				     start = max(target_monday, first_date)
			
 
				     end = min(target_sunday, last_date)
			
 
				 
			
 
				     current = load_date_range(filepath, start, end)
			
 
				 
			
 
				-    # Previous week
			
 
				     prev_start = start - timedelta(days=7)
			
 
				     prev_end = end - timedelta(days=7)
			
 
				     if prev_start >= first_date:
			
@@ -169,7 +168,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
				     Returns (current_month_df, prev_month_df, yoy_month_df).
			
 
				     """
			
 
				     start = datetime(year, month, 1)
			
 
				-    # Last day of month
			
 
				     if month == 12:
			
 
				         end = datetime(year + 1, 1, 1) - timedelta(days=1)
			
 
				     else:
			
@@ -177,7 +175,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
				 
			
 
				     current = load_date_range(filepath, start, end)
			
 
				 
			
 
				-    # Previous month
			
 
				     if month == 1:
			
 
				         prev_start = datetime(year - 1, 12, 1)
			
 
				         prev_end = datetime(year, 1, 1) - timedelta(days=1)
			
@@ -190,7 +187,6 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
				     except ValueError:
			
 
				         previous = pd.DataFrame(columns=current.columns)
			
 
				 
			
 
				-    # YoY (same month last year)
			
 
				     yoy_start = datetime(year - 1, month, 1)
			
 
				     if month == 12:
			
 
				         yoy_end = datetime(year, 1, 1) - timedelta(days=1)
			
@@ -206,20 +202,16 @@ def load_monthly(filepath: str, year: int, month: int) -> tuple:
 
				 
			
 
				 
			
 
				 def _clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
			
 
				-    """Rename columns, parse dates, clean statuses."""
			
 
				-    # Rename known columns
			
 
				+    """Rename columns, parse dates, clean statuses (legacy)."""
			
 
				     rename_map = {k: v for k, v in FIELD_MAP.items() if k in df.columns}
			
 
				     df = df.rename(columns=rename_map)
			
 
				 
			
 
				-    # Normalize status
			
 
				     if 'status' in df.columns:
			
 
				         df['status_code'] = df['status'].apply(_normalize_status)
			
 
				 
			
 
				-    # Parse numeric fields
			
 
				     if 'order_qty' in df.columns:
			
 
				         df['order_qty'] = pd.to_numeric(df['order_qty'], errors='coerce')
			
 
				 
			
 
				-    # Parse date fields
			
 
				     date_fields = ['contract_date', 'deposit_date', 'order_gen_date',
			
 
				                    'price_review_date', 'contract_review_date', 'seal_apply_date',
			
 
				                    'seal_date', 'final_pay_date', 'customs_date', 'license_date',
			
@@ -228,22 +220,293 @@ def _clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 
				         if field in df.columns:
			
 
				             df[field] = df[field].apply(_parse_date)
			
 
				 
			
 
				-    # Tracking days
			
 
				     if 'tracking_days' in df.columns:
			
 
				         df['tracking_days'] = pd.to_numeric(df['tracking_days'], errors='coerce')
			
 
				 
			
 
				-    # Boolean updated
			
 
				     if 'is_updated' in df.columns:
			
 
				         df['is_updated_flag'] = df['is_updated'].astype(str).str.strip() == '是'
			
 
				 
			
 
				     return df
			
 
				 
			
 
				 
			
 
				+# =====================================================================
			
 
				+# GENERIC LOADING SECTION — Universal loaders for any Excel data
			
 
				+# =====================================================================
			
 
				+
			
 
				+# Summary row keywords (Chinese and English) to auto-detect and skip
			
 
				+SUMMARY_KEYWORDS = [
			
 
				+    '合计', '总计', '小计', '汇总', '累计', '总和',
			
 
				+    'total', 'sum', 'subtotal', 'grand total', '合计：', '总计：',
			
 
				+    '平均', 'avg', 'average',
			
 
				+]
			
 
				+
			
 
				+# Encoding priority list for CSV detection
			
 
				+CSV_ENCODINGS = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'gb18030', 'latin-1', 'cp1252']
			
 
				+
			
 
				+
			
 
				+def auto_detect_file_format(filepath: str) -> str:
			
 
				+    """Auto-detect file format: xlsx, xls, csv, or unknown."""
			
 
				+    ext = os.path.splitext(filepath)[1].lower()
			
 
				+    if ext in ('.xlsx', '.xls'):
			
 
				+        return ext[1:]
			
 
				+    if ext == '.csv':
			
 
				+        return 'csv'
			
 
				+    if ext in ('.xlsm', '.xlsb'):
			
 
				+        return ext[1:]
			
 
				+    return 'unknown'
			
 
				+
			
 
				+
			
 
				+def load_generic_csv(filepath: str, encoding=None, **kwargs) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Load a CSV file with auto-encoding detection.
			
 
				+    Tries common encodings until one succeeds.
			
 
				+    """
			
 
				+    if encoding:
			
 
				+        try:
			
 
				+            return pd.read_csv(filepath, encoding=encoding, **kwargs)
			
 
				+        except (UnicodeDecodeError, UnicodeError):
			
 
				+            raise ValueError(f"Failed to decode {filepath} with encoding {encoding}")
			
 
				+
			
 
				+    last_error = None
			
 
				+    for enc in CSV_ENCODINGS:
			
 
				+        try:
			
 
				+            df = pd.read_csv(filepath, encoding=enc, **kwargs)
			
 
				+            if len(df.columns) > 0:
			
 
				+                return df
			
 
				+        except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError) as e:
			
 
				+            last_error = e
			
 
				+            continue
			
 
				+
			
 
				+    raise ValueError(
			
 
				+        f"Unable to decode CSV file {filepath}. Tried encodings: "
			
 
				+        f"{CSV_ENCODINGS}. Last error: {last_error}"
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def _detect_and_skip_footer_rows(df_raw: pd.DataFrame) -> pd.DataFrame:
			
 
				+    """Detect and remove summary/aggregation rows at the end of the data."""
			
 
				+    if df_raw.empty:
			
 
				+        return df_raw
			
 
				+
			
 
				+    rows_to_drop = []
			
 
				+    text_cols = [c for c in df_raw.columns if df_raw[c].dtype == 'object']
			
 
				+
			
 
				+    for idx in range(len(df_raw) - 1, -1, -1):
			
 
				+        row = df_raw.iloc[idx]
			
 
				+        is_summary = False
			
 
				+        for col in text_cols:
			
 
				+            val = str(row.get(col, '')).strip().lower()
			
 
				+            if any(kw in val for kw in SUMMARY_KEYWORDS):
			
 
				+                is_summary = True
			
 
				+                break
			
 
				+        if is_summary:
			
 
				+            rows_to_drop.append(idx)
			
 
				+        else:
			
 
				+            break
			
 
				+
			
 
				+        if len(rows_to_drop) > 20:
			
 
				+            break
			
 
				+
			
 
				+    if rows_to_drop:
			
 
				+        df_raw = df_raw.drop(index=rows_to_drop)
			
 
				+        df_raw = df_raw.reset_index(drop=True)
			
 
				+
			
 
				+    return df_raw
			
 
				+
			
 
				+
			
 
				+def _detect_empty_or_notes_rows(df_raw: pd.DataFrame) -> pd.DataFrame:
			
 
				+    """Remove leading empty rows and trailing fully-empty rows."""
			
 
				+    if df_raw.empty:
			
 
				+        return df_raw
			
 
				+
			
 
				+    non_empty_rows = df_raw.notna().any(axis=1)
			
 
				+    first_valid = non_empty_rows.idxmax() if non_empty_rows.any() else 0
			
 
				+    last_valid = non_empty_rows[non_empty_rows].index[-1] if non_empty_rows.any() else len(df_raw)
			
 
				+
			
 
				+    df_raw = df_raw.iloc[first_valid:last_valid + 1].reset_index(drop=True)
			
 
				+    return df_raw
			
 
				+
			
 
				+
			
 
				+def normalize_column_names(col_name: str) -> str:
			
 
				+    """
			
 
				+    Normalize a single column name: strip whitespace, unify brackets, remove special chars.
			
 
				+    """
			
 
				+    if not isinstance(col_name, str):
			
 
				+        return col_name
			
 
				+    name = col_name.strip()
			
 
				+    name = name.replace('（', '(').replace('）', ')')
			
 
				+    name = name.replace('【', '[').replace('】', ']')
			
 
				+    name = name.replace('\n', ' ').replace('\r', ' ')
			
 
				+    name = re.sub(r'\s+', ' ', name)
			
 
				+    return name
			
 
				+
			
 
				+
			
 
				+def _clean_generic_dataframe(df: pd.DataFrame, skip_summary_rows=True) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Universal DataFrame cleaning:
			
 
				+    - Remove fully empty rows/columns
			
 
				+    - Drop 'Unnamed' columns
			
 
				+    - Normalize column names (strip whitespace, unify brackets)
			
 
				+    - Auto-detect and remove summary/total rows
			
 
				+    - Try to parse date columns
			
 
				+    - Try to parse numeric columns
			
 
				+    """
			
 
				+    if df.empty:
			
 
				+        return df
			
 
				+
			
 
				+    df = df.dropna(how='all').reset_index(drop=True)
			
 
				+    df = df.dropna(axis=1, how='all')
			
 
				+
			
 
				+    df = df.loc[:, ~df.columns.astype(str).str.contains('^Unnamed', na=False)]
			
 
				+
			
 
				+    df = df.rename(columns=normalize_column_names)
			
 
				+
			
 
				+    if skip_summary_rows:
			
 
				+        df = _detect_and_skip_footer_rows(df)
			
 
				+        df = _detect_empty_or_notes_rows(df)
			
 
				+
			
 
				+    for col in df.columns:
			
 
				+        if df[col].dtype == 'object':
			
 
				+            try:
			
 
				+                parsed = pd.to_datetime(df[col], errors='coerce', infer_datetime_format=True)
			
 
				+                if parsed.notna().sum() > len(df) * 0.7:
			
 
				+                    df[col] = parsed
			
 
				+                    continue
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+            try:
			
 
				+                numeric = pd.to_numeric(df[col], errors='coerce')
			
 
				+                if numeric.notna().sum() > len(df) * 0.7:
			
 
				+                    df[col] = numeric
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def load_generic_excel(filepath: str, sheet_name=0, skip_summary_rows=True,
			
 
				+                       encoding=None, dtype_backend=None) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Load any Excel/CSV file into a cleaned DataFrame.
			
 
				+
			
 
				+    Args:
			
 
				+        filepath: Path to the data file (.xlsx, .xls, or .csv)
			
 
				+        sheet_name: Sheet name or index (for Excel). Ignored for CSV.
			
 
				+        skip_summary_rows: Auto-detect and remove summary/total footer rows
			
 
				+        encoding: File encoding (auto-detected for CSV if None)
			
 
				+        dtype_backend: Optional pandas dtype backend ('numpy_nullable', 'pyarrow')
			
 
				+    """
			
 
				+    fmt = auto_detect_file_format(filepath)
			
 
				+
			
 
				+    kwargs = {}
			
 
				+    if dtype_backend:
			
 
				+        kwargs['dtype_backend'] = dtype_backend
			
 
				+
			
 
				+    if fmt == 'csv':
			
 
				+        df = load_generic_csv(filepath, encoding=encoding, **kwargs)
			
 
				+    else:
			
 
				+        try:
			
 
				+            df = pd.read_excel(filepath, sheet_name=sheet_name, **kwargs)
			
 
				+        except Exception as e:
			
 
				+            if fmt == 'xls':
			
 
				+                raise ValueError(
			
 
				+                    f"Failed to read .xls file. Try converting to .xlsx format. "
			
 
				+                    f"Error: {e}"
			
 
				+                )
			
 
				+            raise
			
 
				+
			
 
				+    return _clean_generic_dataframe(df, skip_summary_rows=skip_summary_rows)
			
 
				+
			
 
				+
			
 
				+def load_generic_all_sheets(filepath: str, skip_summary_rows=True) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Load all sheets from an Excel file, merge into a single DataFrame.
			
 
				+    Adds '_source_sheet' column to track the source sheet.
			
 
				+    """
			
 
				+    fmt = auto_detect_file_format(filepath)
			
 
				+    if fmt == 'csv':
			
 
				+        return load_generic_excel(filepath, skip_summary_rows=skip_summary_rows)
			
 
				+
			
 
				+    xl = pd.ExcelFile(filepath)
			
 
				+    if len(xl.sheet_names) == 1:
			
 
				+        df = pd.read_excel(filepath, sheet_name=xl.sheet_names[0])
			
 
				+        return _clean_generic_dataframe(df, skip_summary_rows=skip_summary_rows)
			
 
				+
			
 
				+    frames = []
			
 
				+    for sheet in xl.sheet_names:
			
 
				+        try:
			
 
				+            df = pd.read_excel(filepath, sheet_name=sheet)
			
 
				+            df['_source_sheet'] = sheet
			
 
				+            frames.append(df)
			
 
				+        except Exception:
			
 
				+            continue
			
 
				+    if not frames:
			
 
				+        raise ValueError(f"No valid sheets found in {filepath}")
			
 
				+    combined = pd.concat(frames, ignore_index=True)
			
 
				+    return _clean_generic_dataframe(combined, skip_summary_rows=skip_summary_rows)
			
 
				+
			
 
				+
			
 
				+def auto_detect_date_column(df: pd.DataFrame) -> str:
			
 
				+    """Auto-detect the primary date/time column in a DataFrame."""
			
 
				+    date_keywords = ['日期', '时间', 'date', 'time', '年', '月', '日']
			
 
				+    for col in df.columns:
			
 
				+        col_str = str(col).lower().strip()
			
 
				+        if any(kw in col_str for kw in date_keywords):
			
 
				+            parsed = pd.to_datetime(df[col], errors='coerce')
			
 
				+            if parsed.notna().sum() > len(df) * 0.5:
			
 
				+                return col
			
 
				+    return ''
			
 
				+
			
 
				+
			
 
				+def auto_parse_single_sheet(filepath: str, sheet_name=0) -> pd.DataFrame:
			
 
				+    """Load and clean a single sheet (shortcut for load_generic_excel)."""
			
 
				+    return load_generic_excel(filepath, sheet_name=sheet_name)
			
 
				+
			
 
				+
			
 
				+def load_generic_file_info(filepath: str) -> dict:
			
 
				+    """
			
 
				+    Return file metadata without full data loading.
			
 
				+    Useful for quick inspection before deciding how to load.
			
 
				+    """
			
 
				+    info = {'filepath': filepath, 'format': auto_detect_file_format(filepath)}
			
 
				+
			
 
				+    if info['format'] == 'csv':
			
 
				+        try:
			
 
				+            with open(filepath, 'r', encoding='utf-8-sig') as f:
			
 
				+                sample = f.read(8192)
			
 
				+            dialect = csv.Sniffer().sniff(sample[:4096])
			
 
				+            info['delimiter'] = dialect.delimiter
			
 
				+            info['has_header'] = csv.Sniffer().has_header(sample)
			
 
				+            info['approx_rows'] = sample.count('\n')
			
 
				+        except Exception:
			
 
				+            info['delimiter'] = ','
			
 
				+    else:
			
 
				+        try:
			
 
				+            xl = pd.ExcelFile(filepath)
			
 
				+            info['sheet_names'] = xl.sheet_names
			
 
				+            info['sheet_count'] = len(xl.sheet_names)
			
 
				+        except Exception as e:
			
 
				+            info['error'] = str(e)
			
 
				+
			
 
				+    info['file_size_mb'] = round(os.path.getsize(filepath) / (1024 * 1024), 2)
			
 
				+    return info
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     import sys
			
 
				     if len(sys.argv) > 1:
			
 
				         fp = sys.argv[1]
			
 
				-        meta = load_workbook_metadata(fp)
			
 
				-        print(f"Sheets: {meta['sheets'][:5]}...")
			
 
				-        print(f"Date range: {meta['date_range'][0]} ~ {meta['date_range'][1]}")
			
 
				-        print(f"Total days: {meta['total_days']}")
			
 
				+        fmt = auto_detect_file_format(fp)
			
 
				+        print(f"File: {fp}")
			
 
				+        print(f"Format: {fmt}")
			
 
				+
			
 
				+        file_info = load_generic_file_info(fp)
			
 
				+        print(f"Size: {file_info.get('file_size_mb', '?')} MB")
			
 
				+        if 'sheet_names' in file_info:
			
 
				+            print(f"Sheets ({file_info['sheet_count']}): {file_info['sheet_names'][:5]}...")
			
 
				+
			
 
				+        df = load_generic_excel(fp)
			
 
				+        date_col = auto_detect_date_column(df)
			
 
				+        print(f"Generic load: {len(df)} rows x {len(df.columns)} cols, "
			
 
				+              f"date column: {date_col}")
			
 
				+        print(f"Columns: {list(df.columns)}")
			
--- a/generate-data-report-ppt/scripts/data_profiler.py
+++ b/generate-data-report-ppt/scripts/data_profiler.py
@@ -0,0 +1,838 @@
 
				+"""
			
 
				+Universal data profiling engine: auto-detect schema, statistical features,
			
 
				+and semantic inference from arbitrary Excel data.
			
 
				+Enhanced with content-based value analysis, distribution shape analysis,
			
 
				+derived metric detection, and multi-dimensional quality scoring.
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from datetime import datetime, date
			
 
				+from collections import Counter
			
 
				+import re
			
 
				+import math
			
 
				+
			
 
				+from report_config import ColumnProfile, ColumnRole
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# DATE PATTERNS — expanded for broader format coverage
			
 
				+# =====================================================================
			
 
				+DATE_PATTERNS = [
			
 
				+    re.compile(r'^\d{4}年\d{1,2}月\d{1,2}日$'),
			
 
				+    re.compile(r'^\d{4}-\d{2}-\d{2}$'),
			
 
				+    re.compile(r'^\d{4}/\d{1,2}/\d{1,2}$'),
			
 
				+    re.compile(r'^\d{4}\.\d{1,2}\.\d{1,2}$'),
			
 
				+    re.compile(r'^\d{4}年\d{1,2}月$'),
			
 
				+    re.compile(r'^\d{4}-\d{2}$'),
			
 
				+    re.compile(r'^\d{2}-\d{2}$'),
			
 
				+    re.compile(r'^\d{2}/\d{2}$'),
			
 
				+    re.compile(r'^\d{8}$'),  # YYYYMMDD
			
 
				+]
			
 
				+
			
 
				+TIME_KEYWORDS = [
			
 
				+    '日期', '时间', 'date', 'time', '年', '月', '日', '周', '期', '季度',
			
 
				+    'period', 'month', 'year', 'quarter', 'week', 'day', 'timestamp',
			
 
				+    '月份', '年份', '周期', '时段', 'time', 'datetime',
			
 
				+    'date', 'created', 'updated', 'modified', '发生', '录入', '创建',
			
 
				+]
			
 
				+
			
 
				+NUMERIC_KEYWORDS = [
			
 
				+    '金额', '数量', '台数', '件数', '元', '价格', '收入', '支出',
			
 
				+    '利润', '成本', '费用', '销量', '销售额', '总数', '合计',
			
 
				+    'amount', 'price', 'qty', 'quantity', 'revenue', 'cost',
			
 
				+    'sales', 'volume', 'value', 'total', 'sum', 'count',
			
 
				+    '单数', '笔数', '人数', '天数', '比率', '占比', '比例', '率',
			
 
				+    '预算', 'budget', '花费', 'spend', 'fee', '金额', '单价',
			
 
				+    'unit', '得分', 'score', 'rating', '评分',
			
 
				+]
			
 
				+
			
 
				+CATEGORY_KEYWORDS = [
			
 
				+    '国家', '区域', '地区', '城市', '省份', '状态', '类型', '类别',
			
 
				+    '分类', '部门', '组', '等级', '级别', '品牌', '渠道',
			
 
				+    'country', 'region', 'city', 'status', 'type', 'category',
			
 
				+    'department', 'group', 'level', 'brand', 'channel',
			
 
				+    '负责人', 'owner', 'manager', '产品', 'product', '阶段',
			
 
				+    '供应商', 'supplier', '客户', 'customer', '行业', 'industry',
			
 
				+    '性别', 'gender', '职位', 'title', '角色', 'role', '标签', 'tag',
			
 
				+    '科目', 'account', '方向', 'direction', '方式', 'method',
			
 
				+    '意向', 'intent', 'intention', 'priority', '优先级',
			
 
				+]
			
 
				+
			
 
				+ID_KEYWORDS = [
			
 
				+    'id', '编号', '序号', '代码', 'code', 'no', '编码', '合同号',
			
 
				+    '订单号', '工单号', '流水号', '单号', '标识', 'key',
			
 
				+    'uuid', 'guid', 'sn', '序列号', '身份证', 'phone', '手机',
			
 
				+    '邮箱', 'email', '电话', 'tel', 'mobile',
			
 
				+]
			
 
				+
			
 
				+TEXT_KEYWORDS = [
			
 
				+    '备注', '描述', '说明', '详情', '内容', '意见', '建议', '进度更新',
			
 
				+    'note', 'description', 'detail', 'remark', 'comment', 'memo',
			
 
				+    '地址', 'address', '介绍', '摘要', 'summary', '附注',
			
 
				+    '反馈', 'feedback', '理由', 'reason', '原因', 'cause',
			
 
				+]
			
 
				+
			
 
				+RATE_KEYWORDS = [
			
 
				+    '率', 'ratio', 'rate', '占比', '比例', 'percentage', 'pct',
			
 
				+    'percent', 'conversion', '转化率', '完成率', '增长率',
			
 
				+]
			
 
				+
			
 
				+# Patterns for value-based content detection
			
 
				+PHONE_PATTERN = re.compile(r'^[\+]?[\d\-\(\)\s]{6,20}$')
			
 
				+EMAIL_PATTERN = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
			
 
				+URL_PATTERN = re.compile(r'^https?://', re.IGNORECASE)
			
 
				+YEAR_PATTERN = re.compile(r'^\d{4}$')
			
 
				+
			
 
				+
			
 
				+def _parse_date(val):
			
 
				+    if pd.isna(val):
			
 
				+        return None
			
 
				+    if isinstance(val, (datetime, date)):
			
 
				+        return val
			
 
				+    if isinstance(val, (int, float)) and not math.isnan(val):
			
 
				+        try:
			
 
				+            return pd.Timestamp(val).to_pydatetime()
			
 
				+        except (ValueError, OverflowError):
			
 
				+            pass
			
 
				+    s = str(val).strip()
			
 
				+    for pattern in DATE_PATTERNS:
			
 
				+        if pattern.match(s):
			
 
				+            for fmt in ('%Y年%m月%d日', '%Y-%m-%d', '%Y/%m/%d',
			
 
				+                        '%Y.%m.%d', '%Y年%m月', '%Y-%m',
			
 
				+                        '%m-%d', '%m/%d', '%Y%m%d'):
			
 
				+                try:
			
 
				+                    return datetime.strptime(s, fmt)
			
 
				+                except ValueError:
			
 
				+                    continue
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# VALUE-BASED CONTENT ANALYSIS
			
 
				+# =====================================================================
			
 
				+
			
 
				+def _analyze_value_patterns(series: pd.Series, sample_count: int = 100) -> dict:
			
 
				+    """Analyze actual data values to detect patterns and content types."""
			
 
				+    non_null = series.dropna().astype(str).head(sample_count)
			
 
				+    if len(non_null) == 0:
			
 
				+        return {}
			
 
				+
			
 
				+    patterns = {}
			
 
				+
			
 
				+    # Check if values look like percentages
			
 
				+    # Only flag as percentage if: ends with % OR is a decimal fraction (0.0-1.0)
			
 
				+    pct_like = sum(1 for v in non_null if v.endswith('%') or
			
 
				+                   (v.replace('.', '', 1).lstrip('-').isdigit() and
			
 
				+                    0 < float(v) <= 1 and not v.isdigit()))
			
 
				+    patterns['pct_ratio'] = pct_like / len(non_null)
			
 
				+
			
 
				+    # Check for yes/no or true/false patterns
			
 
				+    yn_vals = {'是', '否', 'yes', 'no', 'true', 'false', 'y', 'n', 't', 'f',
			
 
				+               '有', '无', '0', '1'}
			
 
				+    yn_like = sum(1 for v in non_null if v.lower() in yn_vals)
			
 
				+    patterns['binary_ratio'] = yn_like / len(non_null)
			
 
				+
			
 
				+    # Check for ordinal/categorical short text
			
 
				+    short_text = sum(1 for v in non_null if len(v) <= 20)
			
 
				+    patterns['short_text_ratio'] = short_text / len(non_null)
			
 
				+
			
 
				+    # Check for phone-like patterns
			
 
				+    phone_like = sum(1 for v in non_null if PHONE_PATTERN.match(v))
			
 
				+    patterns['phone_ratio'] = phone_like / len(non_null)
			
 
				+
			
 
				+    # Check for email-like patterns
			
 
				+    email_like = sum(1 for v in non_null if EMAIL_PATTERN.match(v))
			
 
				+    patterns['email_ratio'] = email_like / len(non_null)
			
 
				+
			
 
				+    # Check for URL-like patterns
			
 
				+    url_like = sum(1 for v in non_null if URL_PATTERN.match(v))
			
 
				+    patterns['url_ratio'] = url_like / len(non_null)
			
 
				+
			
 
				+    # Check for pure digit strings (possible IDs)
			
 
				+    digit_only = sum(1 for v in non_null if v.isdigit() and len(v) >= 6)
			
 
				+    patterns['digit_id_ratio'] = digit_only / len(non_null)
			
 
				+
			
 
				+    # Check for year-like values
			
 
				+    year_like = sum(1 for v in non_null if YEAR_PATTERN.match(v))
			
 
				+    patterns['year_ratio'] = year_like / len(non_null)
			
 
				+
			
 
				+    # Detect ordinal levels
			
 
				+    ordinal_sets = [
			
 
				+        {'高', '中', '低', 'A', 'B', 'C', '甲', '乙', '丙'},
			
 
				+        {'一级', '二级', '三级', '四级', 'level 1', 'level 2', 'level 3'},
			
 
				+        {'critical', 'major', 'minor', 'high', 'medium', 'low'},
			
 
				+    ]
			
 
				+    for oset in ordinal_sets:
			
 
				+        ord_like = sum(1 for v in non_null if v in oset)
			
 
				+        if ord_like / len(non_null) > 0.3:
			
 
				+            patterns['ordinal'] = True
			
 
				+            break
			
 
				+    else:
			
 
				+        patterns['ordinal'] = False
			
 
				+
			
 
				+    # Average text length
			
 
				+    patterns['avg_text_len'] = round(non_null.str.len().mean(), 1)
			
 
				+
			
 
				+    # Unique ratio
			
 
				+    unique_ratio = series.nunique() / max(len(non_null), 1)
			
 
				+    patterns['unique_ratio'] = round(unique_ratio, 4)
			
 
				+
			
 
				+    return patterns
			
 
				+
			
 
				+
			
 
				+def _infer_role_from_values(value_patterns: dict, col_name: str,
			
 
				+                            dtype_str: str, unique_count: int, total_rows: int) -> str:
			
 
				+    """Infer column role based on value content analysis results."""
			
 
				+    up = value_patterns
			
 
				+
			
 
				+    # High ratio of email patterns
			
 
				+    if up.get('email_ratio', 0) > 0.5:
			
 
				+        return 'id'
			
 
				+
			
 
				+    # High ratio of phone patterns
			
 
				+    if up.get('phone_ratio', 0) > 0.5:
			
 
				+        return 'id'
			
 
				+
			
 
				+    # High ratio of URL patterns
			
 
				+    if up.get('url_ratio', 0) > 0.5:
			
 
				+        return 'text'
			
 
				+
			
 
				+    # Mostly binary values (yes/no)
			
 
				+    if up.get('binary_ratio', 0) > 0.6:
			
 
				+        return 'category'
			
 
				+
			
 
				+    # Mostly percentage values
			
 
				+    if up.get('pct_ratio', 0) > 0.5:
			
 
				+        return 'numeric'
			
 
				+
			
 
				+    # High ratio of digit-only long strings (likely IDs)
			
 
				+    if up.get('digit_id_ratio', 0) > 0.5 and unique_count > total_rows * 0.5:
			
 
				+        return 'id'
			
 
				+
			
 
				+    # Ordinal level detected
			
 
				+    if up.get('ordinal', False):
			
 
				+        return 'category'
			
 
				+
			
 
				+    return None  # No clear signal from values
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# SEMANTIC KEYWORD-BASED ROLE INFERENCE
			
 
				+# =====================================================================
			
 
				+
			
 
				+def _infer_column_role(col_name: str, dtype_str: str, sample_values: list,
			
 
				+                       null_rate: float, unique_count: int, total_rows: int,
			
 
				+                       value_patterns: dict = None) -> ColumnRole:
			
 
				+    col_lower = col_name.lower().strip()
			
 
				+
			
 
				+    # 1) Value-based inference first (stronger signal)
			
 
				+    if value_patterns:
			
 
				+        value_role = _infer_role_from_values(value_patterns, col_name,
			
 
				+                                              dtype_str, unique_count, total_rows)
			
 
				+        if value_role:
			
 
				+            return ColumnRole(value_role)
			
 
				+
			
 
				+    # 2) Keyword-based inference (expanded)
			
 
				+    if any(kw in col_lower for kw in ID_KEYWORDS):
			
 
				+        return ColumnRole.ID
			
 
				+
			
 
				+    if any(kw in col_lower for kw in TIME_KEYWORDS):
			
 
				+        return ColumnRole.TIME
			
 
				+
			
 
				+    if any(kw in col_lower for kw in NUMERIC_KEYWORDS):
			
 
				+        return ColumnRole.NUMERIC
			
 
				+
			
 
				+    if any(kw in col_lower for kw in CATEGORY_KEYWORDS):
			
 
				+        return ColumnRole.CATEGORY
			
 
				+
			
 
				+    if any(kw in col_lower for kw in TEXT_KEYWORDS):
			
 
				+        return ColumnRole.TEXT
			
 
				+
			
 
				+    # 3) dtype-based fallback
			
 
				+    if 'int' in dtype_str or 'float' in dtype_str:
			
 
				+        if unique_count <= 15 and total_rows > 20:
			
 
				+            return ColumnRole.CATEGORY
			
 
				+        return ColumnRole.NUMERIC
			
 
				+
			
 
				+    if 'bool' in dtype_str:
			
 
				+        return ColumnRole.CATEGORY
			
 
				+
			
 
				+    if 'datetime' in dtype_str:
			
 
				+        return ColumnRole.TIME
			
 
				+
			
 
				+    # 4) Cardinality-based inference
			
 
				+    if total_rows > 0:
			
 
				+        cardinality_ratio = unique_count / total_rows
			
 
				+        if cardinality_ratio > 0.8 and unique_count > 20:
			
 
				+            return ColumnRole.TEXT
			
 
				+        if cardinality_ratio < 0.3 and unique_count <= 30:
			
 
				+            return ColumnRole.CATEGORY
			
 
				+
			
 
				+    return ColumnRole.TEXT
			
 
				+
			
 
				+
			
 
				+def _infer_metric_label(col_name: str, role: ColumnRole, value_patterns: dict = None) -> str:
			
 
				+    col_lower = col_name.lower().strip()
			
 
				+
			
 
				+    # If values are percentage-like, mark as '比率'
			
 
				+    if value_patterns and value_patterns.get('pct_ratio', 0) > 0.5:
			
 
				+        for kw in ['率', '转化', '占比', '比例']:
			
 
				+            if kw in col_lower:
			
 
				+                return col_name
			
 
				+        return col_name + '(占比)'
			
 
				+
			
 
				+    label_map = {
			
 
				+        '金额': '金额', '销售额': '销售额', '收入': '收入', '利润': '利润',
			
 
				+        '数量': '数量', '台数': '台数', '件数': '件数', '订单数': '订单数',
			
 
				+        '成本': '成本', '费用': '费用', '销量': '销量', '占比': '占比',
			
 
				+        '天数': '天数', '人数': '人数', '比率': '比率', '转化率': '转化率',
			
 
				+        '增长率': '增长率', '完成率': '完成率', '单价': '单价',
			
 
				+        '价格': '价格', '得分': '得分', '评分': '评分',
			
 
				+    }
			
 
				+    for kw, label in label_map.items():
			
 
				+        if kw in col_lower:
			
 
				+            return label
			
 
				+
			
 
				+    # Check for rate-related keywords
			
 
				+    if any(kw in col_lower for kw in RATE_KEYWORDS):
			
 
				+        return '比率'
			
 
				+
			
 
				+    if role == ColumnRole.NUMERIC:
			
 
				+        return col_name
			
 
				+    elif role == ColumnRole.TIME:
			
 
				+        return '日期'
			
 
				+    elif role == ColumnRole.CATEGORY:
			
 
				+        return col_name
			
 
				+    return col_name
			
 
				+
			
 
				+
			
 
				+def _infer_unit(col_name: str, value_patterns: dict = None) -> str:
			
 
				+    col_lower = col_name.lower().strip()
			
 
				+
			
 
				+    # If values are percentage-like
			
 
				+    if value_patterns and value_patterns.get('pct_ratio', 0) > 0.5:
			
 
				+        return '%'
			
 
				+
			
 
				+    unit_map = {
			
 
				+        '金额': '元', '销售额': '元', '收入': '元', '利润': '元',
			
 
				+        '成本': '元', '费用': '元', '台数': '台', '件数': '件',
			
 
				+        '数量': '', '人数': '人', '天数': '天', '占比': '%',
			
 
				+        '比率': '%', '比例': '%', '率': '%', '转化率': '%',
			
 
				+        '增长率': '%', '完成率': '%', '单价': '元', '价格': '元',
			
 
				+        '得分': '分', '评分': '分',
			
 
				+    }
			
 
				+    for kw, unit in unit_map.items():
			
 
				+        if kw in col_lower:
			
 
				+            return unit
			
 
				+    return ''
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# DISTRIBUTION SHAPE ANALYSIS
			
 
				+# =====================================================================
			
 
				+
			
 
				+def _calc_distribution_shape(series: pd.Series) -> dict:
			
 
				+    """Compute skewness, kurtosis and distribution type for numeric series."""
			
 
				+    try:
			
 
				+        s = series.dropna()
			
 
				+        if len(s) < 4:
			
 
				+            return {}
			
 
				+        skew = round(float(s.skew()), 3)
			
 
				+        kurt = round(float(s.kurtosis()), 3)
			
 
				+
			
 
				+        # Determine distribution type
			
 
				+        abs_skew = abs(skew)
			
 
				+        if abs_skew < 0.5:
			
 
				+            skew_type = '近似对称'
			
 
				+        elif abs_skew < 1.0:
			
 
				+            skew_type = '轻度偏态'
			
 
				+        else:
			
 
				+            skew_type = '显著偏态'
			
 
				+
			
 
				+        if skew > 0.5:
			
 
				+            skew_dir = '右偏（长尾在右侧，大部分值偏小）'
			
 
				+        elif skew < -0.5:
			
 
				+            skew_dir = '左偏（长尾在左侧，大部分值偏大）'
			
 
				+        else:
			
 
				+            skew_dir = '基本对称'
			
 
				+
			
 
				+        # Concentration analysis
			
 
				+        cv = float(s.std()) / float(s.mean()) if float(s.mean()) != 0 else 0
			
 
				+        if cv < 0.3:
			
 
				+            concentration = '高度集中'
			
 
				+        elif cv < 0.7:
			
 
				+            concentration = '中度集中'
			
 
				+        elif cv < 1.2:
			
 
				+            concentration = '适度分散'
			
 
				+        else:
			
 
				+            concentration = '高度分散'
			
 
				+
			
 
				+        return {
			
 
				+            'skewness': skew,
			
 
				+            'kurtosis': kurt,
			
 
				+            'skew_type': skew_type,
			
 
				+            'skew_direction': skew_dir,
			
 
				+            'cv': round(cv, 3),
			
 
				+            'concentration': concentration,
			
 
				+        }
			
 
				+    except Exception:
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# DERIVED METRIC DETECTION
			
 
				+# =====================================================================
			
 
				+
			
 
				+def _detect_derived_relations(df: pd.DataFrame, numeric_cols: list) -> list[dict]:
			
 
				+    """
			
 
				+    Detect potential derived relationships among numeric columns.
			
 
				+    E.g., A - B = C, A + B = C, A / B = C (approx.)
			
 
				+    """
			
 
				+    relations = []
			
 
				+    num_names = [c['column_name'] for c in numeric_cols]
			
 
				+    if len(num_names) < 3:
			
 
				+        return relations
			
 
				+
			
 
				+    sample = df[num_names].dropna().head(500)
			
 
				+
			
 
				+    for i, a_name in enumerate(num_names):
			
 
				+        for j, b_name in enumerate(num_names):
			
 
				+            if j <= i:
			
 
				+                continue
			
 
				+            a = sample[a_name]
			
 
				+            b = sample[b_name]
			
 
				+
			
 
				+            # Check subtraction: a - b ≈ c or b - a ≈ c
			
 
				+            for diff_name in num_names:
			
 
				+                if diff_name in (a_name, b_name):
			
 
				+                    continue
			
 
				+                d = sample[diff_name]
			
 
				+                diff_ab = (a - b - d).abs().mean()
			
 
				+                diff_ba = (b - a - d).abs().mean()
			
 
				+                threshold = max(d.mean(), 1) * 0.1
			
 
				+                if diff_ab < threshold:
			
 
				+                    relations.append({
			
 
				+                        'type': 'subtraction',
			
 
				+                        'expression': f'{a_name} - {b_name} ≈ {diff_name}',
			
 
				+                        'accuracy': round(float(1 - diff_ab / max(float(d.mean()), 1)), 3),
			
 
				+                        'formula': f'{diff_name} = {a_name} - {b_name}',
			
 
				+                    })
			
 
				+                    break
			
 
				+                elif diff_ba < threshold:
			
 
				+                    relations.append({
			
 
				+                        'type': 'subtraction',
			
 
				+                        'expression': f'{b_name} - {a_name} ≈ {diff_name}',
			
 
				+                        'accuracy': round(float(1 - diff_ba / max(float(d.mean()), 1)), 3),
			
 
				+                        'formula': f'{diff_name} = {b_name} - {a_name}',
			
 
				+                    })
			
 
				+                    break
			
 
				+
			
 
				+            # Check addition: a + b ≈ c
			
 
				+            for sum_name in num_names:
			
 
				+                if sum_name in (a_name, b_name):
			
 
				+                    continue
			
 
				+                s = sample[sum_name]
			
 
				+                sum_ab = (a + b - s).abs().mean()
			
 
				+                threshold = max(s.mean(), 1) * 0.1
			
 
				+                if sum_ab < threshold:
			
 
				+                    relations.append({
			
 
				+                        'type': 'addition',
			
 
				+                        'expression': f'{a_name} + {b_name} ≈ {sum_name}',
			
 
				+                        'accuracy': round(float(1 - sum_ab / max(float(s.mean()), 1)), 3),
			
 
				+                        'formula': f'{sum_name} = {a_name} + {b_name}',
			
 
				+                    })
			
 
				+                    break
			
 
				+
			
 
				+    # Also check for ratio relations
			
 
				+    if len(num_names) >= 2:
			
 
				+        for i, a_name in enumerate(num_names):
			
 
				+            for j, b_name in enumerate(num_names):
			
 
				+                if j <= i:
			
 
				+                    continue
			
 
				+                a = sample[a_name]
			
 
				+                b = sample[b_name]
			
 
				+                ratio = (a / b.replace(0, np.nan)).dropna()
			
 
				+                if len(ratio) > 0:
			
 
				+                    ratio_std = float(ratio.std())
			
 
				+                    ratio_mean = float(ratio.mean())
			
 
				+                    if ratio_mean > 0 and ratio_std / ratio_mean < 0.1:
			
 
				+                        # Consistent ratio found
			
 
				+                        relations.append({
			
 
				+                            'type': 'ratio',
			
 
				+                            'expression': f'{a_name} / {b_name} ≈ {ratio_mean:.3f}',
			
 
				+                            'accuracy': round(float(1 - ratio_std / ratio_mean), 3),
			
 
				+                            'formula': f'{a_name} = {b_name} × {ratio_mean:.2f}',
			
 
				+                        })
			
 
				+
			
 
				+    return relations
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# MAIN PROFILING FUNCTION
			
 
				+# =====================================================================
			
 
				+
			
 
				+def profile_dataframe(df: pd.DataFrame) -> dict:
			
 
				+    total_rows = len(df)
			
 
				+    columns = []
			
 
				+
			
 
				+    for col in df.columns:
			
 
				+        series = df[col]
			
 
				+        dtype_str = str(series.dtype)
			
 
				+        null_count = int(series.isna().sum())
			
 
				+        null_rate = round(null_count / total_rows, 4) if total_rows else 0.0
			
 
				+
			
 
				+        non_null = series.dropna()
			
 
				+        unique_count = int(non_null.nunique())
			
 
				+
			
 
				+        sample_values = non_null.head(5).tolist()
			
 
				+        sample_values = [str(v) for v in sample_values]
			
 
				+
			
 
				+        # Enhanced: Value pattern analysis
			
 
				+        value_patterns = _analyze_value_patterns(series)
			
 
				+
			
 
				+        # Enhanced: Distribution shape analysis for numeric columns
			
 
				+        distribution_shape = None
			
 
				+        numeric_stats = None
			
 
				+        if pd.api.types.is_numeric_dtype(series) and not pd.api.types.is_bool_dtype(series):
			
 
				+            try:
			
 
				+                numeric_stats = {
			
 
				+                    'mean': round(float(series.mean()), 2) if not pd.isna(series.mean()) else 0,
			
 
				+                    'median': round(float(series.median()), 2) if not pd.isna(series.median()) else 0,
			
 
				+                    'min': round(float(series.min()), 2) if not pd.isna(series.min()) else 0,
			
 
				+                    'max': round(float(series.max()), 2) if not pd.isna(series.max()) else 0,
			
 
				+                    'std': round(float(series.std()), 2) if not pd.isna(series.std()) else 0,
			
 
				+                    'sum': round(float(series.sum()), 2) if not pd.isna(series.sum()) else 0,
			
 
				+                    'p25': round(float(series.quantile(0.25)), 2) if not pd.isna(series.quantile(0.25)) else 0,
			
 
				+                    'p75': round(float(series.quantile(0.75)), 2) if not pd.isna(series.quantile(0.75)) else 0,
			
 
				+                }
			
 
				+                distribution_shape = _calc_distribution_shape(series)
			
 
				+            except Exception:
			
 
				+                numeric_stats = None
			
 
				+
			
 
				+        # Enhanced role inference with value patterns
			
 
				+        role = _infer_column_role(col, dtype_str, sample_values, null_rate,
			
 
				+                                  unique_count, total_rows, value_patterns)
			
 
				+        label = _infer_metric_label(col, role, value_patterns)
			
 
				+        unit = _infer_unit(col, value_patterns)
			
 
				+
			
 
				+        # Enhanced: detect if column is a high-cardinality ID
			
 
				+        is_high_cardinality_id = (role == ColumnRole.TEXT and
			
 
				+                                  unique_count / max(total_rows, 1) > 0.8 and
			
 
				+                                  unique_count > 20)
			
 
				+        if is_high_cardinality_id:
			
 
				+            role = ColumnRole.ID
			
 
				+
			
 
				+        columns.append(ColumnProfile(
			
 
				+            column_name=col,
			
 
				+            dtype=dtype_str,
			
 
				+            role=role,
			
 
				+            null_count=null_count,
			
 
				+            null_rate=null_rate,
			
 
				+            unique_count=unique_count,
			
 
				+            sample_values=sample_values,
			
 
				+            numeric_stats=numeric_stats,
			
 
				+            inferred_label=label,
			
 
				+        ))
			
 
				+
			
 
				+        # Append extra metadata not in ColumnProfile
			
 
				+        columns[-1]._unit = unit
			
 
				+        columns[-1]._distribution_shape = distribution_shape
			
 
				+        columns[-1]._value_patterns = value_patterns
			
 
				+
			
 
				+    time_cols = [c for c in columns if c.role == ColumnRole.TIME]
			
 
				+    numeric_cols = [c for c in columns if c.role == ColumnRole.NUMERIC]
			
 
				+    category_cols = [c for c in columns if c.role == ColumnRole.CATEGORY]
			
 
				+    text_cols = [c for c in columns if c.role == ColumnRole.TEXT]
			
 
				+    id_cols = [c for c in columns if c.role == ColumnRole.ID]
			
 
				+
			
 
				+    # Date range inference
			
 
				+    date_range = (None, None)
			
 
				+    time_granularity = 'unknown'
			
 
				+    if time_cols:
			
 
				+        series = df[time_cols[0].column_name].dropna()
			
 
				+        parsed = series.apply(_parse_date).dropna()
			
 
				+        if len(parsed) > 0:
			
 
				+            date_range = (parsed.min(), parsed.max())
			
 
				+            if len(parsed) >= 2:
			
 
				+                diff = (parsed.max() - parsed.min()).days
			
 
				+                if diff <= 1:
			
 
				+                    time_granularity = 'daily'
			
 
				+                elif diff <= 7:
			
 
				+                    time_granularity = 'weekly'
			
 
				+                elif diff <= 31:
			
 
				+                    time_granularity = 'monthly'
			
 
				+                elif diff <= 92:
			
 
				+                    time_granularity = 'quarterly'
			
 
				+                else:
			
 
				+                    time_granularity = 'yearly'
			
 
				+
			
 
				+    # Enhanced: Detect derived relations among numeric columns
			
 
				+    derived_relations = _detect_derived_relations(df, [c.__dict__ for c in numeric_cols])
			
 
				+
			
 
				+    # Enhanced: Multi-dimensional quality scoring
			
 
				+    quality_score, quality_details = _calc_quality_score(
			
 
				+        df, columns, numeric_cols, date_range
			
 
				+    )
			
 
				+
			
 
				+    # Outlier detection (Enhanced: with CV-based filtering)
			
 
				+    outlier_columns = []
			
 
				+    for c in numeric_cols:
			
 
				+        ns = c.numeric_stats
			
 
				+        if ns and ns.get('std', 0) > 0 and ns.get('mean', 0) > 0:
			
 
				+            cv = ns['std'] / ns['mean']
			
 
				+            if cv > 3:
			
 
				+                outlier_columns.append(c.column_name)
			
 
				+
			
 
				+    return {
			
 
				+        'total_rows': total_rows,
			
 
				+        'total_columns': len(columns),
			
 
				+        'columns': [c.__dict__ for c in columns],
			
 
				+        'time_columns': [c.__dict__ for c in time_cols],
			
 
				+        'numeric_columns': [c.__dict__ for c in numeric_cols],
			
 
				+        'category_columns': [c.__dict__ for c in category_cols],
			
 
				+        'text_columns': [c.__dict__ for c in text_cols],
			
 
				+        'id_columns': [c.__dict__ for c in id_cols],
			
 
				+        'date_range': (
			
 
				+            date_range[0].strftime('%Y-%m-%d') if date_range[0] else None,
			
 
				+            date_range[1].strftime('%Y-%m-%d') if date_range[1] else None,
			
 
				+        ),
			
 
				+        'time_granularity': time_granularity,
			
 
				+        'data_quality': {
			
 
				+            'score': quality_score,
			
 
				+            'details': quality_details,
			
 
				+            'high_null_columns': [c.column_name for c in columns if c.null_rate > 0.3],
			
 
				+            'outlier_columns': outlier_columns,
			
 
				+        },
			
 
				+        'derived_relations': derived_relations,
			
 
				+        'column_stats': [{
			
 
				+            'column_name': col.column_name,
			
 
				+            'role': col.role.value,
			
 
				+            'dtype': col.dtype,
			
 
				+            'null_rate': col.null_rate,
			
 
				+            'unique_count': col.unique_count,
			
 
				+            'distribution_shape': getattr(col, '_distribution_shape', None),
			
 
				+            'inferred_label': col.inferred_label,
			
 
				+            'unit': getattr(col, '_unit', ''),
			
 
				+            'numeric_stats': col.numeric_stats,
			
 
				+        } for col in columns],
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# ENHANCED QUALITY SCORING
			
 
				+# =====================================================================
			
 
				+
			
 
				+def _calc_quality_score(df: pd.DataFrame, columns: list,
			
 
				+                        numeric_cols: list, date_range: tuple) -> tuple:
			
 
				+    """Multi-dimensional quality scoring (0-100)."""
			
 
				+    score = 100
			
 
				+    details = {}
			
 
				+
			
 
				+    # 1) Completeness (30%) — null rates
			
 
				+    avg_null_rate = np.mean([c.null_rate for c in columns]) if columns else 0
			
 
				+    completeness_penalty = min(30, avg_null_rate * 100 * 2)
			
 
				+    completeness = max(0, 30 - completeness_penalty)
			
 
				+    details['completeness'] = round(completeness, 1)
			
 
				+
			
 
				+    # 2) Uniqueness (20%) — presence of ID columns or unique identifiers
			
 
				+    id_ratio = len([c for c in columns if c.role == ColumnRole.ID]) / max(len(columns), 1)
			
 
				+    uniqueness = min(20, 10 + id_ratio * 10)
			
 
				+    details['uniqueness'] = round(uniqueness, 1)
			
 
				+
			
 
				+    # 3) Numeric health (25%) — outliers, zeros, negative values
			
 
				+    numeric_health = 25
			
 
				+    for c in numeric_cols:
			
 
				+        series = df[c.column_name].dropna()
			
 
				+        if len(series) == 0:
			
 
				+            continue
			
 
				+        # Check for negative values in non-negative expected columns
			
 
				+        if c.inferred_label in ('台数', '数量', '金额', '人数'):
			
 
				+            neg_ratio = (series < 0).sum() / len(series)
			
 
				+            if neg_ratio > 0.05:
			
 
				+                numeric_health -= 5
			
 
				+        # Check for excessive zeros
			
 
				+        zero_ratio = (series == 0).sum() / len(series)
			
 
				+        if zero_ratio > 0.5:
			
 
				+            numeric_health -= 3
			
 
				+    details['numeric_health'] = max(0, numeric_health)
			
 
				+
			
 
				+    # 4) Temporal consistency (15%) — if time columns exist, check date ordering
			
 
				+    temporal = 15
			
 
				+    if date_range[0] and date_range[1]:
			
 
				+        if date_range[0] <= date_range[1]:
			
 
				+            temporal = 15
			
 
				+        else:
			
 
				+            temporal = 5
			
 
				+    details['temporal_consistency'] = temporal
			
 
				+
			
 
				+    # 5) Completeness of categorical data (10%)
			
 
				+    cat_health = 10
			
 
				+    for c in columns:
			
 
				+        if c.role == ColumnRole.CATEGORY and c.null_rate > 0.2:
			
 
				+            cat_health -= 2
			
 
				+    details['categorical_health'] = max(0, cat_health)
			
 
				+
			
 
				+    score = completeness + uniqueness + numeric_health + temporal + cat_health
			
 
				+    score = max(0, min(100, round(score)))
			
 
				+
			
 
				+    return score, details
			
 
				+
			
 
				+
			
 
				+# =====================================================================
			
 
				+# HELPER FUNCTIONS (enhanced)
			
 
				+# =====================================================================
			
 
				+
			
 
				+def profile_category_distribution(df: pd.DataFrame, col_name: str, top_n: int = 15) -> dict:
			
 
				+    if col_name not in df.columns:
			
 
				+        return {}
			
 
				+    counts = df[col_name].value_counts().head(top_n).to_dict()
			
 
				+    total = df[col_name].notna().sum()
			
 
				+    # Calculate concentration (Herfindahl index)
			
 
				+    pcts = [v / total for v in counts.values()] if total else []
			
 
				+    hhi = sum(p * p for p in pcts) if pcts else 0
			
 
				+
			
 
				+    return {
			
 
				+        'total_categories': df[col_name].nunique(),
			
 
				+        'top_items': {str(k): {'count': int(v), 'pct': round(v / total * 100, 1) if total else 0}
			
 
				+                      for k, v in counts.items()},
			
 
				+        'concentration_hhi': round(hhi, 4),
			
 
				+        'concentration_label': '高度集中' if hhi > 0.5 else '中度集中' if hhi > 0.2 else '分散',
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def profile_numeric_series(df: pd.DataFrame, col_name: str) -> dict:
			
 
				+    if col_name not in df.columns:
			
 
				+        return {}
			
 
				+    series = df[col_name].dropna()
			
 
				+    if len(series) == 0:
			
 
				+        return {}
			
 
				+
			
 
				+    shape = _calc_distribution_shape(series)
			
 
				+    result = {
			
 
				+        'count': len(series),
			
 
				+        'sum': round(float(series.sum()), 2),
			
 
				+        'mean': round(float(series.mean()), 2),
			
 
				+        'median': round(float(series.median()), 2),
			
 
				+        'min': round(float(series.min()), 2),
			
 
				+        'max': round(float(series.max()), 2),
			
 
				+        'std': round(float(series.std()), 2),
			
 
				+    }
			
 
				+    if shape:
			
 
				+        result.update(shape)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def detect_data_issues(df: pd.DataFrame) -> list[dict]:
			
 
				+    issues = []
			
 
				+    for col in df.columns:
			
 
				+        null_rate = df[col].isna().mean()
			
 
				+        if null_rate > 0.5:
			
 
				+            issues.append({
			
 
				+                'column': col,
			
 
				+                'type': 'high_missing',
			
 
				+                'severity': 'major',
			
 
				+                'message': f'列"{col}"缺失率{null_rate:.1%}，建议排除或补全',
			
 
				+            })
			
 
				+        elif null_rate > 0.1:
			
 
				+            issues.append({
			
 
				+                'column': col,
			
 
				+                'type': 'moderate_missing',
			
 
				+                'severity': 'minor',
			
 
				+                'message': f'列"{col}"缺失率{null_rate:.1%}',
			
 
				+            })
			
 
				+
			
 
				+        if pd.api.types.is_numeric_dtype(df[col]):
			
 
				+            series = df[col].dropna()
			
 
				+            if len(series) > 0:
			
 
				+                q1, q3 = series.quantile(0.25), series.quantile(0.75)
			
 
				+                iqr = q3 - q1
			
 
				+                lower, upper = q1 - 3 * iqr, q3 + 3 * iqr
			
 
				+                outlier_count = ((series < lower) | (series > upper)).sum()
			
 
				+                if outlier_count > len(series) * 0.1:
			
 
				+                    issues.append({
			
 
				+                        'column': col,
			
 
				+                        'type': 'outliers',
			
 
				+                        'severity': 'major',
			
 
				+                        'message': f'列"{col}"存在{outlier_count}个异常值（{outlier_count/len(series):.1%}）',
			
 
				+                    })
			
 
				+
			
 
				+                # Check for negative values
			
 
				+                neg_count = (series < 0).sum()
			
 
				+                if neg_count > 0:
			
 
				+                    issues.append({
			
 
				+                        'column': col,
			
 
				+                        'type': 'negative_values',
			
 
				+                        'severity': 'minor',
			
 
				+                        'message': f'列"{col}"存在{neg_count}个负值',
			
 
				+                    })
			
 
				+
			
 
				+        # Check for constant columns
			
 
				+        if df[col].nunique() <= 1 and null_rate < 1.0:
			
 
				+            issues.append({
			
 
				+                'column': col,
			
 
				+                'type': 'constant_column',
			
 
				+                'severity': 'minor',
			
 
				+                'message': f'列"{col}"为常量列（仅1个唯一值），对分析无贡献',
			
 
				+            })
			
 
				+
			
 
				+    return issues
			
 
				+
			
 
				+
			
 
				+def generate_summary_text(profile: dict) -> str:
			
 
				+    lines = []
			
 
				+    lines.append(f"共 {profile['total_rows']:,} 行 × {profile['total_columns']} 列")
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+    time_cols = profile.get('time_columns', [])
			
 
				+
			
 
				+    lines.append(f"数值列: {len(num_cols)} 个 | 分类列: {len(cat_cols)} 个 | 时间列: {len(time_cols)} 个")
			
 
				+
			
 
				+    dr = profile.get('date_range', (None, None))
			
 
				+    if dr[0]:
			
 
				+        lines.append(f"时间范围: {dr[0]} ~ {dr[1]}")
			
 
				+    lines.append(f"时间粒度: {profile.get('time_granularity', 'unknown')}")
			
 
				+
			
 
				+    q = profile.get('data_quality', {})
			
 
				+    lines.append(f"数据质量评分: {q.get('score', 0)}/100")
			
 
				+    if q.get('details'):
			
 
				+        det = q['details']
			
 
				+        lines.append(f"  完整性: {det.get('completeness', 0)}/30 | "
			
 
				+                     f"数值健康: {det.get('numeric_health', 0)}/25 | "
			
 
				+                     f"时间一致性: {det.get('temporal_consistency', 0)}/15")
			
 
				+    if q.get('high_null_columns'):
			
 
				+        lines.append(f"高缺失列: {', '.join(q['high_null_columns'])}")
			
 
				+
			
 
				+    # Enhanced: derived relations
			
 
				+    derived = profile.get('derived_relations', [])
			
 
				+    if derived:
			
 
				+        lines.append(f"检测到 {len(derived)} 个数值关系:")
			
 
				+        for rel in derived[:5]:
			
 
				+            lines.append(f"  {rel['formula']} (置信度: {rel['accuracy']:.0%})")
			
 
				+
			
 
				+    # Distribution shape summary for numeric columns
			
 
				+    shape_cols = []
			
 
				+    for nc in num_cols[:3]:
			
 
				+        shape = nc.get('distribution_shape')
			
 
				+        if shape:
			
 
				+            shape_cols.append(f"{nc.get('inferred_label', nc['column_name'])}[{shape.get('concentration', 'N/A')}]")
			
 
				+    if shape_cols:
			
 
				+        lines.append(f"分布特征: {' | '.join(shape_cols)}")
			
 
				+
			
 
				+    return '\n'.join(lines)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import sys
			
 
				+    if len(sys.argv) > 1:
			
 
				+        fp = sys.argv[1]
			
 
				+        try:
			
 
				+            df = pd.read_excel(fp)
			
 
				+        except Exception:
			
 
				+            df = pd.read_excel(fp, sheet_name=0)
			
 
				+        profile = profile_dataframe(df)
			
 
				+        print(generate_summary_text(profile))
			
 
				+        issues = detect_data_issues(df)
			
 
				+        if issues:
			
 
				+            print(f"\n数据问题 ({len(issues)}):")
			
 
				+            for iss in issues:
			
 
				+                print(f"  [{iss['severity']}] {iss['message']}")
			
--- a/generate-data-report-ppt/scripts/metrics_calculator.py
+++ b/generate-data-report-ppt/scripts/metrics_calculator.py
@@ -1136,6 +1136,198 @@ def avg(lst):
 
				     return sum(lst) / len(lst) if lst else 0
			
 
				 
			
 
				 
			
 
				+def calc_generic_metrics(df: pd.DataFrame, config) -> dict:
			
 
				+    metrics = {}
			
 
				+
			
 
				+    for metric_def in config.metrics:
			
 
				+        col = metric_def.column
			
 
				+        if col not in df.columns:
			
 
				+            metrics[metric_def.name] = 0
			
 
				+            continue
			
 
				+
			
 
				+        series = df[col].dropna()
			
 
				+        agg = metric_def.aggregation
			
 
				+
			
 
				+        if agg == 'sum':
			
 
				+            val = int(series.sum()) if pd.api.types.is_numeric_dtype(series) else len(series)
			
 
				+        elif agg == 'count':
			
 
				+            val = int(series.count())
			
 
				+        elif agg == 'avg':
			
 
				+            val = round(float(series.mean()), 1) if pd.api.types.is_numeric_dtype(series) else 0
			
 
				+        elif agg == 'max':
			
 
				+            val = round(float(series.max()), 1) if pd.api.types.is_numeric_dtype(series) else 0
			
 
				+        elif agg == 'min':
			
 
				+            val = round(float(series.min()), 1) if pd.api.types.is_numeric_dtype(series) else 0
			
 
				+        elif agg == 'distinct_count':
			
 
				+            val = int(series.nunique())
			
 
				+        else:
			
 
				+            val = len(series)
			
 
				+
			
 
				+        metrics[metric_def.name] = val
			
 
				+        metrics[f'{metric_def.name}_label'] = metric_def.label
			
 
				+        metrics[f'{metric_def.name}_unit'] = metric_def.unit
			
 
				+
			
 
				+    if hasattr(config, 'comparison') and config.comparison:
			
 
				+        pass
			
 
				+
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+def calc_generic_trend(df: pd.DataFrame, time_col: str, metric_col: str,
			
 
				+                       aggregation: str = 'sum') -> dict:
			
 
				+    if time_col not in df.columns or metric_col not in df.columns:
			
 
				+        return {}
			
 
				+
			
 
				+    if aggregation == 'sum':
			
 
				+        trend = df.groupby(time_col)[metric_col].sum().sort_index()
			
 
				+    elif aggregation == 'count':
			
 
				+        trend = df.groupby(time_col)[metric_col].count().sort_index()
			
 
				+    else:
			
 
				+        trend = df.groupby(time_col)[metric_col].mean().sort_index()
			
 
				+
			
 
				+    dates = []
			
 
				+    for d in trend.index:
			
 
				+        try:
			
 
				+            dates.append(pd.Timestamp(d).strftime('%m/%d'))
			
 
				+        except Exception:
			
 
				+            dates.append(str(d))
			
 
				+
			
 
				+    return {
			
 
				+        'dates': dates,
			
 
				+        'values': [int(v) if aggregation != 'avg' else round(float(v), 1) for v in trend.values],
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def calc_generic_distribution(df: pd.DataFrame, cat_col: str, metric_col: str = None,
			
 
				+                              aggregation: str = 'sum', top_n: int = 10) -> dict:
			
 
				+    if cat_col not in df.columns:
			
 
				+        return {}
			
 
				+
			
 
				+    if metric_col and metric_col in df.columns:
			
 
				+        if aggregation == 'sum':
			
 
				+            dist = df.groupby(cat_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
			
 
				+        elif aggregation == 'count':
			
 
				+            dist = df.groupby(cat_col)[metric_col].count().sort_values(ascending=False).head(top_n)
			
 
				+        else:
			
 
				+            dist = df.groupby(cat_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
			
 
				+    else:
			
 
				+        dist = df[cat_col].value_counts().head(top_n)
			
 
				+
			
 
				+    total = sum(dist.values)
			
 
				+    return {
			
 
				+        'categories': [str(k) for k in dist.index],
			
 
				+        'values': [int(v) for v in dist.values],
			
 
				+        'percentages': [round(v / total * 100, 1) if total else 0 for v in dist.values],
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def calc_generic_ranking(df: pd.DataFrame, rank_col: str, metric_col: str,
			
 
				+                         aggregation: str = 'sum', top_n: int = 15) -> list[dict]:
			
 
				+    if rank_col not in df.columns or metric_col not in df.columns:
			
 
				+        return []
			
 
				+
			
 
				+    if aggregation == 'sum':
			
 
				+        ranked = df.groupby(rank_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
			
 
				+    elif aggregation == 'count':
			
 
				+        ranked = df.groupby(rank_col)[metric_col].count().sort_values(ascending=False).head(top_n)
			
 
				+    else:
			
 
				+        ranked = df.groupby(rank_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
			
 
				+
			
 
				+    return [{'name': str(k), 'value': int(v), 'rank': i + 1}
			
 
				+            for i, (k, v) in enumerate(ranked.items())]
			
 
				+
			
 
				+
			
 
				+def generate_generic_insights(data_profile: dict, metrics: dict) -> list[dict]:
			
 
				+    items = []
			
 
				+
			
 
				+    num_cols = data_profile.get('numeric_columns', [])
			
 
				+    cat_cols = data_profile.get('category_columns', [])
			
 
				+    time_cols = data_profile.get('time_columns', [])
			
 
				+    q = data_profile.get('data_quality', {})
			
 
				+    score = q.get('score', 100)
			
 
				+
			
 
				+    if metrics:
			
 
				+        metric_details = []
			
 
				+        for k, v in metrics.items():
			
 
				+            if isinstance(v, (int, float)):
			
 
				+                metric_details.append(f'{k}: {v:,.0f}')
			
 
				+        if metric_details:
			
 
				+            items.append({
			
 
				+                'title': '核心指标总览',
			
 
				+                'content': f'本期关键指标：{"；".join(metric_details[:6])}。'
			
 
				+                           f'综合来看，业务运行态势可通过这些核心数据进行量化评估，'
			
 
				+                           f'建议结合业务目标与实际值的差距进行针对性分析。',
			
 
				+            })
			
 
				+
			
 
				+    if num_cols:
			
 
				+        for nc in num_cols[:2]:
			
 
				+            ns = nc.get('numeric_stats', {}) or {}
			
 
				+            col_name = nc.get('inferred_label', nc['column_name'])
			
 
				+            stats_parts = []
			
 
				+            if 'sum' in ns and ns['sum']:
			
 
				+                stats_parts.append(f'总量 {ns["sum"]:,.0f}')
			
 
				+            if 'mean' in ns and ns['mean']:
			
 
				+                stats_parts.append(f'均值 {ns["mean"]:,.1f}')
			
 
				+            if 'max' in ns and ns['max']:
			
 
				+                stats_parts.append(f'峰值 {ns["max"]:,.0f}')
			
 
				+            if 'min' in ns and ns['min']:
			
 
				+                stats_parts.append(f'最低 {ns["min"]:,.0f}')
			
 
				+            if stats_parts:
			
 
				+                items.append({
			
 
				+                    'title': f'{col_name}数据特征',
			
 
				+                    'content': f'指标"{col_name}"的统计特征：{"，".join(stats_parts)}。'
			
 
				+                               f'标准差 {ns.get("std", "N/A")}，数据波动幅度'
			
 
				+                               f'{"较大" if isinstance(ns.get("std"), (int,float)) and ns["std"] > ns.get("mean", 1) * 0.5 else "适中"}。',
			
 
				+                })
			
 
				+
			
 
				+    if cat_cols:
			
 
				+        for cc in cat_cols[:2]:
			
 
				+            uc = cc.get('unique_count', 0)
			
 
				+            items.append({
			
 
				+                'title': f'{cc.get("inferred_label", cc["column_name"])}维度分析',
			
 
				+                'content': f'数据覆盖 {uc} 个不同的{cc.get("inferred_label", cc["column_name"])}类别，'
			
 
				+                           f'丰富的分类维度支持多角度交叉分析。'
			
 
				+                           f'建议重点关注主要类别的集中度与分布均衡性，'
			
 
				+                           f'识别高价值类别与低效类别之间的差异特征。',
			
 
				+            })
			
 
				+
			
 
				+    if time_cols:
			
 
				+        tc = time_cols[0]
			
 
				+        items.append({
			
 
				+            'title': '时间维度覆盖',
			
 
				+            'content': f'数据包含时间列"{tc.get("inferred_label", tc["column_name"])}"，'
			
 
				+                       f'支持按时间维度进行趋势分析。通过对时间序列数据的分解，'
			
 
				+                       f'可识别周期性波动、趋势变化及异常时间节点，为预测与规划提供依据。',
			
 
				+        })
			
 
				+
			
 
				+    items.append({
			
 
				+        'title': '数据质量评估',
			
 
				+        'content': f'数据质量评分 {score}/100，'
			
 
				+                   f'{"数据完整可靠，" if score >= 90 else "数据质量良好，建议关注缺失值" if score >= 80 else "数据需重点关注质量控制"}'
			
 
				+                   f'缺失率 {q.get("null_rate", 0)*100:.1f}%。'
			
 
				+                   f'本报告中的分析与图表均基于现有数据进行自动化生成，确保数据准确性。',
			
 
				+    })
			
 
				+
			
 
				+    high_null = q.get('high_null_columns', [])
			
 
				+    if high_null:
			
 
				+        items.append({
			
 
				+            'title': '数据完整性说明',
			
 
				+            'content': f'以下列缺失值比例较高：{", ".join(high_null[:5])}。'
			
 
				+                       f'在分析涉及这些列时已进行空值排除处理，'
			
 
				+                       f'建议后续数据录入环节关注这些字段的完整填写，以提升分析精度。',
			
 
				+        })
			
 
				+
			
 
				+    total_rows = data_profile.get('total_rows', 0)
			
 
				+    if total_rows:
			
 
				+        items.append({
			
 
				+            'title': '数据规模概述',
			
 
				+            'content': f'本期报告基于 {total_rows} 条数据记录进行分析，'
			
 
				+                       f'样本量{"充足，统计结果具有较好的代表性" if total_rows >= 100 else "适中，统计结果可作为参考" if total_rows >= 30 else "有限，分析结果仅供参考"}。',
			
 
				+        })
			
 
				+
			
 
				+    return items
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     import sys
			
 
				     if len(sys.argv) > 1:
			
--- a/generate-data-report-ppt/scripts/page_layouts.py
+++ b/generate-data-report-ppt/scripts/page_layouts.py
@@ -0,0 +1,223 @@
 
				+"""
			
 
				+Dynamic page layout engine for the universal data report generator.
			
 
				+Provides pre-defined layout templates and layout calculation utilities.
			
 
				+"""
			
 
				+from pptx.util import Emu, Pt
			
 
				+from pptx.dml.color import RGBColor
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Optional
			
 
				+
			
 
				+
			
 
				+SLIDE_WIDTH = 16256000
			
 
				+SLIDE_HEIGHT = 9144000
			
 
				+MARGIN_LEFT = Emu(762000)
			
 
				+MARGIN_RIGHT = Emu(762000)
			
 
				+MARGIN_TOP = Emu(254000)
			
 
				+CONTENT_TOP_BASE = Emu(1600200)
			
 
				+FOOTER_TOP = Emu(8824000)
			
 
				+FOOTER_HEIGHT = Emu(320000)
			
 
				+CONTENT_WIDTH = SLIDE_WIDTH - MARGIN_LEFT - MARGIN_RIGHT
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class LayoutZone:
			
 
				+    x: int
			
 
				+    y: int
			
 
				+    width: int
			
 
				+    height: int
			
 
				+    zone_type: str
			
 
				+
			
 
				+
			
 
				+def calculate_content_area(content_top_emu: int = None) -> LayoutZone:
			
 
				+    top = content_top_emu or int(CONTENT_TOP_BASE)
			
 
				+    height = FOOTER_TOP - top - Emu(100000)
			
 
				+    return LayoutZone(
			
 
				+        x=int(MARGIN_LEFT),
			
 
				+        y=top,
			
 
				+        width=int(CONTENT_WIDTH),
			
 
				+        height=int(height),
			
 
				+        zone_type='content_area',
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def get_kpi_grid(content_top_emu: int = None, cols: int = 3, rows: int = 2,
			
 
				+                 card_width_emu: int = 4699000, card_height_emu: int = 3048000,
			
 
				+                 gap_x_emu: int = 444500, gap_y_emu: int = 381000) -> list[LayoutZone]:
			
 
				+    start_y = max(int(CONTENT_TOP_BASE), content_top_emu or int(CONTENT_TOP_BASE))
			
 
				+    zones = []
			
 
				+    for row in range(rows):
			
 
				+        for col in range(cols):
			
 
				+            x = int(MARGIN_LEFT) + col * (card_width_emu + gap_x_emu)
			
 
				+            y = start_y + row * (card_height_emu + gap_y_emu)
			
 
				+            zones.append(LayoutZone(x=x, y=y, width=card_width_emu, height=card_height_emu, zone_type='kpi_card'))
			
 
				+    return zones
			
 
				+
			
 
				+
			
 
				+def get_chart_left_zone(content_top_emu: int = None, chart_ratio: float = 0.6) -> LayoutZone:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    chart_w = int(content.width * chart_ratio) - Emu(200000)
			
 
				+    return LayoutZone(
			
 
				+        x=content.x,
			
 
				+        y=content.y,
			
 
				+        width=chart_w,
			
 
				+        height=content.height,
			
 
				+        zone_type='chart_left',
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def get_insight_right_zone(content_top_emu: int = None, chart_ratio: float = 0.6) -> LayoutZone:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    chart_w = int(content.width * chart_ratio)
			
 
				+    text_left = content.x + chart_w + Emu(200000)
			
 
				+    text_w = content.x + content.width - text_left
			
 
				+    return LayoutZone(
			
 
				+        x=text_left,
			
 
				+        y=content.y,
			
 
				+        width=text_w,
			
 
				+        height=content.height,
			
 
				+        zone_type='insight_right',
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def get_full_width_zone(content_top_emu: int = None) -> LayoutZone:
			
 
				+    return calculate_content_area(content_top_emu)
			
 
				+
			
 
				+
			
 
				+def get_two_column_zones(content_top_emu: int = None, gap_emu: int = 381000) -> tuple[LayoutZone, LayoutZone]:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    half_w = (content.width - gap_emu) // 2
			
 
				+    left = LayoutZone(x=content.x, y=content.y, width=half_w, height=content.height, zone_type='column_left')
			
 
				+    right = LayoutZone(x=content.x + half_w + gap_emu, y=content.y, width=half_w, height=content.height, zone_type='column_right')
			
 
				+    return left, right
			
 
				+
			
 
				+
			
 
				+def get_two_row_zones(content_top_emu: int = None, gap_emu: int = 381000,
			
 
				+                      top_ratio: float = 0.55) -> tuple[LayoutZone, LayoutZone]:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    top_h = int(content.height * top_ratio)
			
 
				+    top = LayoutZone(x=content.x, y=content.y, width=content.width, height=top_h, zone_type='row_top')
			
 
				+    bottom = LayoutZone(
			
 
				+        x=content.x,
			
 
				+        y=content.y + top_h + gap_emu,
			
 
				+        width=content.width,
			
 
				+        height=content.height - top_h - gap_emu,
			
 
				+        zone_type='row_bottom',
			
 
				+    )
			
 
				+    return top, bottom
			
 
				+
			
 
				+
			
 
				+def get_card_grid(n: int, content_top_emu: int = None, max_cols: int = 3) -> list[LayoutZone]:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    cols = min(max_cols, n)
			
 
				+    rows = (n + cols - 1) // cols
			
 
				+    card_w = (content.width - (cols - 1) * Emu(254000)) // cols
			
 
				+    card_h = (content.height - (rows - 1) * Emu(254000)) // rows
			
 
				+
			
 
				+    zones = []
			
 
				+    for i in range(n):
			
 
				+        col = i % cols
			
 
				+        row = i // cols
			
 
				+        x = content.x + col * (card_w + Emu(254000))
			
 
				+        y = content.y + row * (card_h + Emu(254000))
			
 
				+        zones.append(LayoutZone(x=x, y=y, width=card_w, height=card_h, zone_type=f'card_{i}'))
			
 
				+    return zones
			
 
				+
			
 
				+
			
 
				+def get_alert_card_zones(n: int, content_top_emu: int = None) -> list[LayoutZone]:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    card_h = Emu(2286000)
			
 
				+    gap = Emu(254000)
			
 
				+    return get_card_grid(n, content_top_emu, max_cols=3)
			
 
				+
			
 
				+
			
 
				+def get_issue_card_zones(n: int, content_top_emu: int = None) -> list[LayoutZone]:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    card_h = Emu(2032000)
			
 
				+    gap = Emu(254000)
			
 
				+    start_y = content.y
			
 
				+    zones = []
			
 
				+    for i in range(min(n, 3)):
			
 
				+        y = start_y + i * (card_h + gap)
			
 
				+        zones.append(LayoutZone(x=content.x, y=y, width=content.width, height=card_h, zone_type=f'issue_{i}'))
			
 
				+    return zones
			
 
				+
			
 
				+
			
 
				+def get_table_zone(content_top_emu: int = None, ratio: float = 0.5) -> LayoutZone:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    return LayoutZone(
			
 
				+        x=content.x,
			
 
				+        y=content.y + int(content.height * ratio) + Emu(200000),
			
 
				+        width=content.width,
			
 
				+        height=int(content.height * (1 - ratio)),
			
 
				+        zone_type='table_bottom',
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def detect_layout_slots(slide) -> dict:
			
 
				+    slots = {
			
 
				+        'has_header': False,
			
 
				+        'has_footer': False,
			
 
				+        'has_page_title': False,
			
 
				+        'content_top': int(CONTENT_TOP_BASE),
			
 
				+        'content_width': int(CONTENT_WIDTH),
			
 
				+        'content_height': FOOTER_TOP - int(CONTENT_TOP_BASE) - Emu(100000),
			
 
				+    }
			
 
				+    for shape in slide.shapes:
			
 
				+        if shape.has_text_frame:
			
 
				+            text = shape.text_frame.text
			
 
				+            if 'page_title' in text or '报告' in text:
			
 
				+                slots['has_page_title'] = True
			
 
				+            if '数据来源' in text:
			
 
				+                slots['has_footer'] = True
			
 
				+                slots['footer_top'] = int(shape.top)
			
 
				+        if shape.top + shape.height < Emu(1300000):
			
 
				+            slots['has_header'] = True
			
 
				+    return slots
			
 
				+
			
 
				+
			
 
				+def ensure_safe_position(shape, slide_width: int, slide_height: int) -> bool:
			
 
				+    margin = Emu(254000)
			
 
				+    adjusted = False
			
 
				+    if shape.left < 0:
			
 
				+        shape.left = margin
			
 
				+        adjusted = True
			
 
				+    if shape.top < 0:
			
 
				+        shape.top = margin
			
 
				+        adjusted = True
			
 
				+    if shape.left + shape.width > slide_width:
			
 
				+        shape.left = slide_width - shape.width - margin
			
 
				+        adjusted = True
			
 
				+    if shape.top + shape.height > slide_height:
			
 
				+        shape.top = slide_height - shape.height - margin
			
 
				+        adjusted = True
			
 
				+    return adjusted
			
 
				+
			
 
				+
			
 
				+def calculate_fill_ratio(slide, content_top_emu: int = None) -> float:
			
 
				+    content = calculate_content_area(content_top_emu)
			
 
				+    total_area = content.width * content.height
			
 
				+    if total_area <= 0:
			
 
				+        return 0.0
			
 
				+    filled_area = 0
			
 
				+    for shape in slide.shapes:
			
 
				+        sx = int(shape.left)
			
 
				+        sy = int(shape.top)
			
 
				+        sw = int(shape.width)
			
 
				+        sh = int(shape.height)
			
 
				+        if sy < content.y:
			
 
				+            continue
			
 
				+        if sy > content.y + content.height:
			
 
				+            continue
			
 
				+        overlap_x = max(0, min(sx + sw, content.x + content.width) - max(sx, content.x))
			
 
				+        overlap_y = max(0, min(sy + sh, content.y + content.height) - max(sy, content.y))
			
 
				+        filled_area += overlap_x * overlap_y
			
 
				+    return min(1.0, filled_area / total_area)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    ca = calculate_content_area()
			
 
				+    print(f"Content area: {ca.width}x{ca.height}")
			
 
				+    kpis = get_kpi_grid()
			
 
				+    for i, z in enumerate(kpis):
			
 
				+        print(f"KPI {i}: x={z.x}, y={z.y}")
			
 
				+    print(f"Fill ratio test: hypothetical")
			
--- a/generate-data-report-ppt/scripts/ppt_builder.py
+++ b/generate-data-report-ppt/scripts/ppt_builder.py
@@ -8,6 +8,7 @@ insights (title + body per paragraph) aligned with reference PPT style.
 
				 import copy
			
 
				 import os
			
 
				 import sys
			
 
				+import re as re_module
			
 
				 from pathlib import Path
			
 
				 from datetime import datetime, timedelta
			
 
				 
			
@@ -19,13 +20,31 @@ from pptx.dml.color import RGBColor
 
				 from pptx.enum.text import PP_ALIGN
			
 
				 from pptx.enum.shapes import MSO_SHAPE
			
 
				 
			
 
				-from data_loader import load_daily, load_weekly, load_monthly, load_date_range
			
 
				-from metrics_calculator import calc_daily_metrics, calc_weekly_metrics, calc_monthly_metrics, generate_deep_insights
			
 
				+from data_loader import (
			
 
				+    load_daily, load_weekly, load_monthly, load_date_range,
			
 
				+    load_generic_excel,
			
 
				+)
			
 
				+from metrics_calculator import (
			
 
				+    calc_daily_metrics, calc_weekly_metrics, calc_monthly_metrics, generate_deep_insights,
			
 
				+    calc_generic_metrics, calc_generic_trend, calc_generic_distribution,
			
 
				+    calc_generic_ranking, generate_generic_insights,
			
 
				+)
			
 
				 from chart_factory import (
			
 
				     add_column_chart, add_bar_chart, add_line_chart, add_doughnut_chart,
			
 
				     add_pie_chart, add_funnel_chart, add_horizontal_bar_chart,
			
 
				     add_grouped_bar_chart, add_table
			
 
				 )
			
 
				+from page_layouts import (
			
 
				+    get_kpi_grid, get_chart_left_zone, get_insight_right_zone,
			
 
				+    get_full_width_zone, get_two_column_zones,
			
 
				+)
			
 
				+from quality_inspector import QualityInspector
			
 
				+from theme_manager import theme_to_rgb_colors, get_theme
			
 
				+from report_config import (
			
 
				+    ReportConfig, PageDef, MetricDef, PeriodType, ChartType,
			
 
				+    validate_six_confirmations,
			
 
				+)
			
 
				+from quality_rules import SLIDE_WIDTH, SLIDE_HEIGHT, CONTENT_LEFT, CONTENT_TOP_BASE, FOOTER_TOP
			
 
				 
			
 
				 # Colors — aligned with reference design theme YAML
			
 
				 C_PRIMARY = RGBColor(0x1E, 0x3A, 0x5F)
			
@@ -113,6 +132,53 @@ def _replace_all_placeholders(slide, mapping: dict):
 
				         _replace_placeholder(slide, placeholder, new_text)
			
 
				 
			
 
				 
			
 
				+def _remove_shape(shape):
			
 
				+    """Remove a python-pptx shape from its parent tree."""
			
 
				+    el = shape.element
			
 
				+    el.getparent().remove(el)
			
 
				+
			
 
				+
			
 
				+def _remove_empty_cover_kpi_placeholders(slide):
			
 
				+    """
			
 
				+    Remove template KPI cards when generic cover data does not provide values.
			
 
				+    This prevents empty rounded rectangles from staying on the cover.
			
 
				+    """
			
 
				+    kpi_pattern = re_module.compile(r'\{kpi\d+_(label|value)\}')
			
 
				+    placeholder_shapes = [
			
 
				+        shape for shape in slide.shapes
			
 
				+        if shape.has_text_frame and kpi_pattern.search(shape.text_frame.text or '')
			
 
				+    ]
			
 
				+    if not placeholder_shapes:
			
 
				+        return
			
 
				+
			
 
				+    x_min = min(int(shape.left) for shape in placeholder_shapes)
			
 
				+    x_max = max(int(shape.left) + int(shape.width) for shape in placeholder_shapes)
			
 
				+    y_min = min(int(shape.top) for shape in placeholder_shapes)
			
 
				+    y_max = max(int(shape.top) + int(shape.height) for shape in placeholder_shapes)
			
 
				+    pad = Emu(220000)
			
 
				+
			
 
				+    to_remove = []
			
 
				+    for shape in slide.shapes:
			
 
				+        sx = int(shape.left)
			
 
				+        sy = int(shape.top)
			
 
				+        sw = int(shape.width)
			
 
				+        sh = int(shape.height)
			
 
				+        in_region = (
			
 
				+            sx >= x_min - pad and sx + sw <= x_max + pad and
			
 
				+            sy >= y_min - pad and sy + sh <= y_max + pad
			
 
				+        )
			
 
				+        is_text_placeholder = shape in placeholder_shapes
			
 
				+        is_empty_kpi_card = (
			
 
				+            in_region and
			
 
				+            getattr(shape, 'auto_shape_type', None) == MSO_SHAPE.ROUNDED_RECTANGLE
			
 
				+        )
			
 
				+        if is_text_placeholder or is_empty_kpi_card:
			
 
				+            to_remove.append(shape)
			
 
				+
			
 
				+    for shape in to_remove:
			
 
				+        _remove_shape(shape)
			
 
				+
			
 
				+
			
 
				 # ==============================================================================
			
 
				 # NAVIGATION TABS
			
 
				 # ==============================================================================
			
@@ -903,6 +969,746 @@ def _safe_div(a, b):
 
				 
			
 
				 
			
 
				 # ==============================================================================
			
 
				+# DYNAMIC / UNIVERSAL REPORT BUILDER
			
 
				+# ==============================================================================
			
 
				+
			
 
				+def build_report(data_file: str, config: ReportConfig, output_path: str) -> str:
			
 
				+    master_path = config.template_path or get_master_template('daily')
			
 
				+    prs = Presentation(master_path)
			
 
				+
			
 
				+    df = load_generic_excel(data_file)
			
 
				+    if config.require_six_confirmations:
			
 
				+        confirmation_issues = validate_six_confirmations(config, list(df.columns))
			
 
				+        if confirmation_issues:
			
 
				+            raise ValueError('生成前六项确认未通过：\n- ' + '\n- '.join(confirmation_issues))
			
 
				+    profile = config.data_profiling or {}
			
 
				+
			
 
				+    colors = theme_to_rgb_colors(config.theme)
			
 
				+
			
 
				+    metrics = calc_generic_metrics(df, config)
			
 
				+
			
 
				+    content_top = _detect_content_top(prs.slides[1]) if len(prs.slides) > 1 else 1524000
			
 
				+
			
 
				+    total_pages = len([p for p in config.pages if p.selected])
			
 
				+    if total_pages == 0:
			
 
				+        total_pages = len(config.pages)
			
 
				+
			
 
				+    for page_idx, page_def in enumerate(config.pages):
			
 
				+        if not page_def.selected:
			
 
				+            continue
			
 
				+
			
 
				+        page_num = page_idx + 1
			
 
				+
			
 
				+        if page_def.page_type == 'cover':
			
 
				+            _build_cover_page(prs, config, colors)
			
 
				+        elif page_def.page_type == 'toc':
			
 
				+            _build_toc_page(prs, config, colors)
			
 
				+        elif page_def.page_type == 'kpi_overview':
			
 
				+            _build_kpi_overview_page(prs, config, metrics, colors, content_top, df, profile)
			
 
				+        elif page_def.page_type == 'trend':
			
 
				+            _build_trend_page(prs, config, df, profile, colors, content_top)
			
 
				+        elif page_def.page_type == 'distribution':
			
 
				+            _build_distribution_page(prs, config, df, profile, colors, content_top, page_def)
			
 
				+        elif page_def.page_type == 'ranking':
			
 
				+            _build_ranking_page(prs, config, df, profile, colors, content_top, page_def)
			
 
				+        elif page_def.page_type == 'summary':
			
 
				+            _build_summary_page(prs, config, metrics, profile, colors, content_top, page_def)
			
 
				+        elif page_def.page_type == 'end':
			
 
				+            _build_end_page(prs, config, colors)
			
 
				+
			
 
				+    for slide in prs.slides:
			
 
				+        _ensure_word_wrap_all(slide)
			
 
				+
			
 
				+    _delete_template_slides(prs)
			
 
				+    prs.save(output_path)
			
 
				+    print(f"Report saved: {output_path}")
			
 
				+    return output_path
			
 
				+
			
 
				+
			
 
				+def quality_assured_build(data_file: str, config: ReportConfig,
			
 
				+                          output_path: str) -> tuple:
			
 
				+    if config.require_six_confirmations:
			
 
				+        df = load_generic_excel(data_file)
			
 
				+        confirmation_issues = validate_six_confirmations(config, list(df.columns))
			
 
				+        if confirmation_issues:
			
 
				+            raise ValueError('生成前六项确认未通过：\n- ' + '\n- '.join(confirmation_issues))
			
 
				+
			
 
				+    inspector = QualityInspector(theme_to_rgb_colors(config.theme))
			
 
				+
			
 
				+    return inspector.quality_assured_build(
			
 
				+        build_fn=lambda d, c: _build_without_save(d, c, config),
			
 
				+        data=data_file,
			
 
				+        config=config,
			
 
				+        output_path=output_path,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def _build_without_save(data_file, temp_config, original_config):
			
 
				+    from pptx import Presentation as Prs
			
 
				+    prs = Prs(get_master_template('daily'))
			
 
				+    df = load_generic_excel(data_file)
			
 
				+    profile = original_config.data_profiling or {}
			
 
				+    colors = theme_to_rgb_colors(original_config.theme)
			
 
				+    metrics = calc_generic_metrics(df, original_config)
			
 
				+    content_top = _detect_content_top(prs.slides[1]) if len(prs.slides) > 1 else 1524000
			
 
				+
			
 
				+    for page_def in original_config.pages:
			
 
				+        if not page_def.selected:
			
 
				+            continue
			
 
				+        if page_def.page_type == 'cover':
			
 
				+            _build_cover_page(prs, original_config, colors)
			
 
				+        elif page_def.page_type == 'kpi_overview':
			
 
				+            _build_kpi_overview_page(prs, original_config, metrics, colors, content_top, df, profile)
			
 
				+        elif page_def.page_type == 'trend':
			
 
				+            if not _build_trend_page(prs, original_config, df, profile, colors, content_top):
			
 
				+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
			
 
				+        elif page_def.page_type == 'distribution':
			
 
				+            if not _build_distribution_page(prs, original_config, df, profile, colors, content_top, page_def):
			
 
				+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
			
 
				+        elif page_def.page_type == 'ranking':
			
 
				+            if not _build_ranking_page(prs, original_config, df, profile, colors, content_top, page_def):
			
 
				+                _build_fallback_analysis_page(prs, original_config, page_def, df, profile, metrics, colors, content_top)
			
 
				+        elif page_def.page_type == 'summary':
			
 
				+            _build_summary_page(prs, original_config, metrics, profile, colors, content_top, page_def)
			
 
				+        elif page_def.page_type == 'end':
			
 
				+            _build_end_page(prs, original_config, colors)
			
 
				+        elif page_def.page_type == 'toc':
			
 
				+            _build_toc_page(prs, original_config, colors)
			
 
				+
			
 
				+    for slide in prs.slides:
			
 
				+        _ensure_word_wrap_all(slide)
			
 
				+    _delete_template_slides(prs)
			
 
				+    return prs
			
 
				+
			
 
				+
			
 
				+def _build_cover_page(prs, config, colors):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[0])
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{report_type}': '数据报告',
			
 
				+        '{date}': config.period_str or config.date_range[0].strftime('%Y年%m月%d日'),
			
 
				+        '{department}': config.source_label,
			
 
				+        '{period}': config.period_str,
			
 
				+        '{gen_time}': datetime.now().strftime('%Y-%m-%d %H:%M'),
			
 
				+    })
			
 
				+    _remove_empty_cover_kpi_placeholders(slide)
			
 
				+    _add_footer_if_missing(slide, f'数据来源：{config.source_label} | 1/{len(config.pages)}')
			
 
				+
			
 
				+
			
 
				+def _build_fallback_analysis_page(prs, config, page_def, df, profile, metrics, colors, content_top):
			
 
				+    """
			
 
				+    Fallback page builder: generates analysis text from available data
			
 
				+    when the primary page type cannot produce content (e.g. no time columns
			
 
				+    for trend, no category columns for distribution).
			
 
				+    Produces at least 4 deep analysis blocks with data citations.
			
 
				+    """
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    page_title = page_def.title if page_def and page_def.title else f'{config.title}数据分析'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+
			
 
				+    insight_items = []
			
 
				+
			
 
				+    if num_cols:
			
 
				+        top_metric = num_cols[0]
			
 
				+        top_name = top_metric.get('inferred_label', top_metric['column_name'])
			
 
				+        top_vals = df[top_metric['column_name']].dropna()
			
 
				+        if len(top_vals) > 0:
			
 
				+            mean_val = top_vals.mean()
			
 
				+            max_val = top_vals.max()
			
 
				+            min_val = top_vals.min()
			
 
				+            median_val = top_vals.median()
			
 
				+            total_val = top_vals.sum()
			
 
				+            insight_items.append({
			
 
				+                'title': f'{top_name}整体概览',
			
 
				+                'content': f'报告周期内，{top_name}统计数据共包含 {len(top_vals)} 条有效记录。'
			
 
				+                           f'总和为 {total_val:,.0f}，平均值为 {mean_val:,.2f}，中位数为 {median_val:,.2f}。'
			
 
				+                           f'最大值为 {max_val:,.2f}，最小值为 {min_val:,.2f}。'
			
 
				+                           f'{"数据波动范围较大，最大值与最小值差距显著，说明不同条目间差异明显，建议深入分析极端值成因" if min_val > 0 and max_val / max(min_val, 1) > 100 else "数据整体分布较为均衡，波动性在合理范围内"}。'
			
 
				+                           f'中位数与平均值的偏差反映了数据的{"右偏分布（少数大值拉高了均值），说明存在显著头部效应" if median_val < mean_val * 0.8 else "左偏分布" if median_val > mean_val * 1.2 else "较为对称，数据呈正态分布趋势"}。',
			
 
				+            })
			
 
				+
			
 
				+            insight_items.append({
			
 
				+                'title': f'{top_name}分段分析',
			
 
				+                'content': f'对 {top_name} 进行四分段统计：上四分位数（25%数据高于此值）为 {top_vals.quantile(0.75):,.2f}，'
			
 
				+                           f'下四分位数（25%数据低于此值）为 {top_vals.quantile(0.25):,.2f}，'
			
 
				+                           f'四分位距（IQR）为 {top_vals.quantile(0.75) - top_vals.quantile(0.25):,.2f}。'
			
 
				+                           f'{"IQR较大，数据分布较为离散，不同类别的表现差异明显，需关注尾部类别的提升空间" if (top_vals.quantile(0.75) - top_vals.quantile(0.25)) > abs(mean_val) * 0.5 else "IQR在合理范围内，数据集中度较好"}。'
			
 
				+                           f'建议按四分位将数据分为四组，重点跟踪上四分位组的表现，识别可复制的成功因素。',
			
 
				+            })
			
 
				+
			
 
				+    if cat_cols and num_cols:
			
 
				+        cat = cat_cols[0]
			
 
				+        cat_name = cat.get('inferred_label', cat['column_name'])
			
 
				+        num = num_cols[0]
			
 
				+        num_name = num.get('inferred_label', num['column_name'])
			
 
				+        cat_unique = df[cat['column_name']].dropna().nunique()
			
 
				+        insight_items.append({
			
 
				+            'title': f'{cat_name}分类覆盖分析',
			
 
				+            'content': f'数据共覆盖 {cat_unique} 个不同的{cat_name}，在 {num_name} 维度上呈现差异化分布。'
			
 
				+                       f'不同{cat_name}对整体{num_name}的贡献度各异，建议按贡献度大小将{cat_name}进行分类管理。'
			
 
				+                       f'高贡献类别应重点维护和深度挖掘，中等贡献类别需持续培育和资源投入，'
			
 
				+                       f'低贡献类别可评估其战略价值，适当调整投入节奏。建议建立分类分级管理体系，'
			
 
				+                       f'每月跟踪各类别的变化趋势和占比波动。',
			
 
				+        })
			
 
				+
			
 
				+    if len(num_cols) >= 2:
			
 
				+        num1 = num_cols[0]
			
 
				+        num2 = num_cols[1]
			
 
				+        ratio = df[num1['column_name']].sum() / max(df[num2['column_name']].sum(), 1)
			
 
				+        insight_items.append({
			
 
				+            'title': '关键比率与效率指标',
			
 
				+            'content': f'{num1.get("inferred_label", num1["column_name"])}与{num2.get("inferred_label", num2["column_name"])}的比率为 {ratio:.2f}，'
			
 
				+                       f'该比率是衡量业务效率的重要参考指标。'
			
 
				+                       f'{"比率处于较高水平，表明单位投入产出效率良好" if ratio > 1 else "比率偏低，单位投入的产出效益有限，存在效率提升空间"}。'
			
 
				+                       f'建议将此比率纳入定期监控指标，按月环比追踪变化趋势，'
			
 
				+                       f'并针对低比率项目制定专项提升计划，分析制约因素和可优化环节。',
			
 
				+        })
			
 
				+
			
 
				+    insight_items.append({
			
 
				+        'title': '数据质量与代表性评估',
			
 
				+        'content': f'本报告基于共 {len(df)} 条记录进行分析，数据覆盖范围包括上述多个维度。'
			
 
				+                   f'建议在后续周期中持续关注数据完整性和及时性，确保分析结果准确反映业务真实情况。'
			
 
				+                   f'对于数据量较小或集中度较高的维度，应结合业务判断进行解读，避免以偏概全。'
			
 
				+                   f'同时建议补充更多维度的数据（如时间序列数据、竞品对标数据等），'
			
 
				+                   f'以支撑更全面的分析视角和更精准的决策建议。',
			
 
				+    })
			
 
				+
			
 
				+    if not insight_items:
			
 
				+        insight_items = [{
			
 
				+            'title': '数据总览',
			
 
				+            'content': f'当前数据集包含 {len(df)} 条记录，{len(df.columns)} 个字段。'
			
 
				+                       f'数值字段 {len(num_cols)} 个，分类字段 {len(cat_cols)} 个。'
			
 
				+                       f'建议结合业务场景规划具体的数据分析维度，'
			
 
				+                       f'以生成更具洞察力和指导意义的数据报告。',
			
 
				+        }]
			
 
				+
			
 
				+    if num_cols and len(df) > 0:
			
 
				+        top_col = num_cols[0]
			
 
				+        chart_zone = get_chart_left_zone(content_top, 0.4)
			
 
				+        text_zone = get_insight_right_zone(content_top, 0.4)
			
 
				+        sample_vals = df[top_col['column_name']].dropna().head(10).tolist()
			
 
				+        sample_labels = [f'记录{i+1}' for i in range(len(sample_vals))]
			
 
				+        if sample_vals:
			
 
				+            add_bar_chart(slide, sample_labels, sample_vals,
			
 
				+                         Emu(chart_zone.x), Emu(chart_zone.y),
			
 
				+                         Emu(chart_zone.width), Emu(chart_zone.height),
			
 
				+                         series_name=top_col.get('inferred_label', top_col['column_name']),
			
 
				+                         color=colors.get('primary'))
			
 
				+        _add_structured_insight(slide, insight_items,
			
 
				+                                Emu(text_zone.x), Emu(text_zone.y),
			
 
				+                                Emu(text_zone.width), Emu(text_zone.height))
			
 
				+    else:
			
 
				+        zone = get_full_width_zone(content_top)
			
 
				+        _add_structured_insight(slide, insight_items,
			
 
				+                                Emu(zone.x), Emu(zone.y),
			
 
				+                                Emu(zone.width), Emu(zone.height))
			
 
				+
			
 
				+
			
 
				+def _build_toc_page(prs, config, colors):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    active_pages = [p for p in config.pages if p.selected and p.page_type not in ('cover', 'toc', 'end')]
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': '目录',
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': f'2/{len(config.pages)}',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+    for i, page in enumerate(active_pages[:6], 1):
			
 
				+        _replace_placeholder(slide, f'{{chapter{i}_title}}', page.title)
			
 
				+        _replace_placeholder(slide, f'{{chapter{i}_desc}}', page.conclusion_title or page.title)
			
 
				+
			
 
				+
			
 
				+def _build_kpi_overview_page(prs, config, metrics, colors, content_top, df=None, profile=None):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    page_title = '核心指标概览'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+    kpi_items = []
			
 
				+    primary_vals = {}
			
 
				+    all_vals = {}
			
 
				+    for md in config.metrics:
			
 
				+        if md.metric_type.value == 'kpi' and md.selected:
			
 
				+            val = metrics.get(md.name, 0)
			
 
				+            display_val = format(val, md.format_spec) if isinstance(val, (int, float)) else str(val)
			
 
				+            kpi_items.append({
			
 
				+                'label': md.label,
			
 
				+                'value': display_val,
			
 
				+                'unit': md.unit,
			
 
				+                'change': '',
			
 
				+                'sub': '',
			
 
				+            })
			
 
				+            if md.is_primary:
			
 
				+                primary_vals[md.label] = val
			
 
				+            all_vals[md.label] = val
			
 
				+
			
 
				+    if kpi_items:
			
 
				+        _add_kpi_cards(slide, kpi_items[:6], start_y=Emu(content_top))
			
 
				+
			
 
				+        insight_items = []
			
 
				+
			
 
				+        kpi_names = [m.label for m in config.metrics if m.selected]
			
 
				+        kpi_str = "、".join(kpi_names[:6]) if kpi_names else "各指标"
			
 
				+        primary_kpis = [m for m in config.metrics if m.is_primary and m.selected]
			
 
				+        if not primary_kpis:
			
 
				+            primary_kpis = [m for m in config.metrics if m.selected][:3]
			
 
				+
			
 
				+        kpi_detail_parts = []
			
 
				+        for i, pk in enumerate(primary_kpis):
			
 
				+            val = all_vals.get(pk.label, 0)
			
 
				+            unit_str = pk.unit if pk.unit else ''
			
 
				+            display_val = format(val, pk.format_spec) if isinstance(val, (int, float)) else str(val)
			
 
				+            kpi_detail_parts.append(f'{pk.label}: {display_val}{unit_str}')
			
 
				+
			
 
				+        insight_items.append({
			
 
				+            'title': '核心数据概览',
			
 
				+            'content': f'本期报告涵盖 {kpi_str} 共 {len(kpi_names)} 项核心指标。'
			
 
				+                       f'{"；".join(kpi_detail_parts[:4])}。'
			
 
				+                       f'其中{"、".join(p.label for p in primary_kpis[:3])}为本次分析的重点关注指标。'
			
 
				+                       f'建议将这些指标与历史同期数据进行纵向对比，以及与行业基准进行横向对标，以全面评估当前业务健康度。'
			
 
				+                       f'对于波动较大的指标，需深入追溯其背后的业务动因，判断是否为趋势性变化还是季节性波动。',
			
 
				+        })
			
 
				+
			
 
				+        cat_cols = profile.get('category_columns', []) if profile else []
			
 
				+        num_cols = profile.get('numeric_columns', []) if profile else []
			
 
				+        total_rows = profile.get('total_rows', 0) if profile else 0
			
 
				+
			
 
				+        if cat_cols:
			
 
				+            top_cats = [c.get('inferred_label', c.get('column_name', '')) for c in cat_cols[:3]]
			
 
				+            cat_details = []
			
 
				+            for c in cat_cols[:3]:
			
 
				+                uc = c.get('unique_count', 'N/A')
			
 
				+                cat_details.append(f'{c.get("inferred_label", c.get("column_name", ""))}({uc}类)')
			
 
				+            insight_items.append({
			
 
				+                'title': '数据覆盖与维度分析',
			
 
				+                'content': f'数据覆盖 {total_rows:,} 条记录，包含 {", ".join(cat_details)} 等多个分析维度。'
			
 
				+                           f'丰富的维度数据支持从 {", ".join(top_cats)} 等角度进行多维度联动分析。'
			
 
				+                           f'建议关注各维度下的数据分布特征，识别高贡献或异常的分类群体，'
			
 
				+                           f'针对性地分析不同维度的表现差异，为精细化运营和数据驱动决策提供支撑。',
			
 
				+            })
			
 
				+
			
 
				+        if len(config.metrics) >= 3:
			
 
				+            compare_items = []
			
 
				+            for a, b in zip(primary_kpis[:2], primary_kpis[1:3]):
			
 
				+                va = all_vals.get(a.label, 0)
			
 
				+                vb = all_vals.get(b.label, 0)
			
 
				+                if va and vb:
			
 
				+                    ratio = round(va / vb, 2) if vb else 0
			
 
				+                    compare_items.append(f'{a.label}与{b.label}的比值为 {ratio}')
			
 
				+            if compare_items:
			
 
				+                insight_items.append({
			
 
				+                    'title': '指标间关联分析',
			
 
				+                    'content': f'{"；".join(compare_items)}。通过指标间的比值关系可以发现数据的内在规律，'
			
 
				+                               f'比值异常偏离正常区间时需重点关注。建议进一步计算各指标与核心业务目标之间的相关系数，'
			
 
				+                               f'量化不同指标对业务目标的影响力排序，将有限资源聚焦在驱动型指标上。',
			
 
				+                })
			
 
				+            else:
			
 
				+                insight_items.append({
			
 
				+                    'title': '指标间关联分析',
			
 
				+                    'content': f'本期核心指标包括 {", ".join(p.label for p in primary_kpis[:3])}。'
			
 
				+                               f'建议通过散点图或相关系数分析探索指标间的线性/非线性关系，识别是否存在协同或对冲效应。'
			
 
				+                               f'同时建议按时间序列分析各指标的周期性规律，为资源配置和预测提供依据。',
			
 
				+                })
			
 
				+
			
 
				+        insight_items.append({
			
 
				+            'title': '关键发现与行动建议',
			
 
				+            'content': f'综合分析 {len(kpi_names)} 项指标，建议重点关注以下方向：'
			
 
				+                       f'(1) 定期监控核心指标的趋势变化，建立异常预警机制，当指标偏离正常区间时及时触发排查流程；'
			
 
				+                       f'(2) 深化多维度交叉分析，挖掘不同群体间的结构差异，识别增长机会和风险点；'
			
 
				+                       f'(3) 结合业务经验和外部数据，验证数据指标的准确性和合理性；'
			
 
				+                       f'(4) 将分析结论转化为可执行的具体行动项，明确责任人和时间节点，建立跟踪闭环机制。',
			
 
				+        })
			
 
				+
			
 
				+        kpi_rows = 2 if len(kpi_items) > 3 else 1
			
 
				+        kpi_grid_bottom = int(content_top) + Emu(3048000)
			
 
				+        if kpi_rows == 2:
			
 
				+            kpi_grid_bottom += Emu(3429000)
			
 
				+        insight_zone_y = kpi_grid_bottom + Emu(254000)
			
 
				+        remaining_height = int(FOOTER_TOP - insight_zone_y - Emu(180000))
			
 
				+        if remaining_height >= Emu(1400000):
			
 
				+            compact_items = insight_items[:2] if kpi_rows == 2 else insight_items[:3]
			
 
				+            _add_structured_insight(slide, compact_items,
			
 
				+                                    Emu(CONTENT_LEFT), Emu(insight_zone_y),
			
 
				+                                    Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT), Emu(remaining_height),
			
 
				+                                    title_size=Pt(10), body_size=Pt(9), min_body_size=Pt(8))
			
 
				+
			
 
				+
			
 
				+def _build_trend_page(prs, config, df, profile, colors, content_top):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    time_cols = profile.get('time_columns', [])
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    if not time_cols or not num_cols:
			
 
				+        return False
			
 
				+
			
 
				+    time_col = time_cols[0]['column_name']
			
 
				+    metric_col = num_cols[0]['column_name']
			
 
				+    label = num_cols[0].get('inferred_label', metric_col)
			
 
				+
			
 
				+    page_title = f'{label}趋势'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+
			
 
				+    trend_data = calc_generic_trend(df, time_col, metric_col)
			
 
				+
			
 
				+    if trend_data.get('dates'):
			
 
				+        chart_zone = get_chart_left_zone(content_top, 0.6)
			
 
				+        text_zone = get_insight_right_zone(content_top, 0.6)
			
 
				+        add_line_chart(slide, trend_data['dates'], trend_data['values'],
			
 
				+                       Emu(chart_zone.x), Emu(chart_zone.y),
			
 
				+                       Emu(chart_zone.width), Emu(chart_zone.height),
			
 
				+                       series_name=label, color=colors.get('primary'))
			
 
				+
			
 
				+        dates = trend_data['dates']
			
 
				+        vals = trend_data['values']
			
 
				+        n = len(vals)
			
 
				+        first_v, last_v = vals[0], vals[-1]
			
 
				+        change = last_v - first_v
			
 
				+        change_pct = round(change / first_v * 100, 1) if first_v else 0
			
 
				+        max_v = max(vals) if vals else 0
			
 
				+        min_v = min(vals) if vals else 0
			
 
				+        max_idx = vals.index(max_v) if vals else 0
			
 
				+        min_idx = vals.index(min_v) if vals else 0
			
 
				+        peak_date = dates[max_idx] if max_idx < len(dates) else 'N/A'
			
 
				+        trough_date = dates[min_idx] if min_idx < len(dates) else 'N/A'
			
 
				+
			
 
				+        direction_text = '上升' if change > 0 else '下降' if change < 0 else '平稳'
			
 
				+        volatility = round((max_v - min_v) / (sum(vals) / n) * 100, 1) if sum(vals) else 0 if vals else 0
			
 
				+        insight_items = [
			
 
				+            {
			
 
				+                'title': f'{label}整体趋势概况',
			
 
				+                'content': f'在报告周期内共采集 {n} 个时间点的数据，{label}'
			
 
				+                           f'从 {dates[0]} 的 {first_v:,.0f} 变动至 {dates[-1]} 的 {last_v:,.0f}，'
			
 
				+                           f'整体{direction_text}{abs(change_pct):.1f}%，{direction_text}趋势{"显著" if abs(change_pct) > 20 else "温和" if abs(change_pct) > 5 else "较为平缓"}。'
			
 
				+                           f'数据变化轨迹反映出{"持续向好的增长态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和改善的积极信号" if direction_text == "上升" else "回调盘整的阶段性特征" if direction_text == "下降" else "平稳运行的基本状态"}，'
			
 
				+                           f'建议将当前趋势与业务目标和历史同期数据进行交叉对比，评估达成全年目标的可行性。如需更详尽的趋势分析，建议增加数据采集频度和时间跨度。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '峰值与谷值分析',
			
 
				+                'content': f'周期内最高值出现在 {peak_date}，为 {max_v:,.0f}；'
			
 
				+                           f'最低值出现在 {trough_date}，为 {min_v:,.0f}。'
			
 
				+                           f'极值差距 {max_v - min_v:,.0f}，波动幅度 {volatility}%，'
			
 
				+                           f'{"波动显著，需关注异常节点的驱动因素，建议排查是否受节假日、促销活动、外部政策变化等因素影响" if volatility > 30 else "波动在可控范围内，但仍需对异常波动保持警觉"}{"." if volatility > 30 else "，建立异常值的快速预警和响应机制。"}',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '趋势阶段性特征',
			
 
				+                'content': f'前半程（{dates[0]}至{dates[min(n//2, n-1)]}）'
			
 
				+                           f'{"呈上升态势" if sum(vals[:n//2]) < sum(vals[n//2:]) else "呈下降态势" if sum(vals[:n//2]) > sum(vals[n//2:]) else "基本持平"}，'
			
 
				+                           f'后半程均值为 {sum(vals[n//2:])/(n-n//2):,.0f}。建议结合业务事件节点深入分析拐点成因，'
			
 
				+                           f'重点关注是否存在季节性波动、周期性波动或外部冲击等结构性因素。'
			
 
				+                           f'若数据量较少，趋势解读应以业务经验为主，辅以数据验证。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '业务启示',
			
 
				+                'content': f'综合趋势分析，当前数据反映出{"积极向好的发展态势" if direction_text == "上升" and abs(change_pct) > 10 else "温和稳定的运行动态" if abs(change_pct) <= 10 else "需重点关注的下行风险"}。'
			
 
				+                           f'建议{"加大资源投入以把握增长机遇，同时关注增速的可持续性，避免盲目扩张" if direction_text == "上升" else "排查下降原因并制定针对性应对措施，分析是短期波动还是长期趋势转折" if direction_text == "下降" else "保持当前运营节奏，同时关注潜在变化信号，适时调整策略" if direction_text == "平稳" else "继续观察数据走势"}。'
			
 
				+                           f'建议将数据与业务KPI目标进行对标分析，定期回顾趋势变化。',
			
 
				+            },
			
 
				+        ]
			
 
				+        _add_structured_insight(slide, insight_items,
			
 
				+                                Emu(text_zone.x), Emu(text_zone.y),
			
 
				+                                Emu(text_zone.width), Emu(text_zone.height))
			
 
				+        return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def _build_distribution_page(prs, config, df, profile, colors, content_top, page_def=None):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    if not cat_cols:
			
 
				+        return False
			
 
				+
			
 
				+    elem = (page_def.elements or [{}])[0] if page_def else {}
			
 
				+    cat_col = elem.get('category') or cat_cols[0]['column_name']
			
 
				+    cat_label = elem.get('category_label') or next(
			
 
				+        (c.get('inferred_label', cat_col) for c in cat_cols if c['column_name'] == cat_col), cat_col)
			
 
				+    metric_col = elem.get('metric') or (num_cols[0]['column_name'] if num_cols else None)
			
 
				+    metric_label = elem.get('metric_label') or (next(
			
 
				+        (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col) if metric_col else '')
			
 
				+
			
 
				+    page_title = page_def.title if page_def and page_def.title else f'{cat_label}分布'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+
			
 
				+    dist = calc_generic_distribution(df, cat_col, metric_col, top_n=8)
			
 
				+
			
 
				+    if dist.get('categories'):
			
 
				+        chart_zone = get_chart_left_zone(content_top, 0.55)
			
 
				+        text_zone = get_insight_right_zone(content_top, 0.55)
			
 
				+        if len(dist['categories']) <= 8:
			
 
				+            add_doughnut_chart(slide, dist['categories'], dist['values'],
			
 
				+                              Emu(chart_zone.x), Emu(chart_zone.y),
			
 
				+                              Emu(chart_zone.width), Emu(chart_zone.height),
			
 
				+                              colors=colors.get('series'))
			
 
				+        else:
			
 
				+            add_bar_chart(slide, dist['categories'], dist['values'],
			
 
				+                         Emu(chart_zone.x), Emu(chart_zone.y),
			
 
				+                         Emu(chart_zone.width), Emu(chart_zone.height),
			
 
				+                         series_name=metric_label, color=colors.get('primary'))
			
 
				+
			
 
				+        cats, vals, pcts = dist['categories'], dist['values'], dist['percentages']
			
 
				+        grand_total = sum(vals)
			
 
				+        top3_pct = sum(pcts[:3])
			
 
				+        top1_name, top1_val, top1_pct = cats[0], vals[0], pcts[0]
			
 
				+
			
 
				+        metric_suffix = metric_label if metric_label else '数量'
			
 
				+        insight_items = [
			
 
				+            {
			
 
				+                'title': f'{cat_label}分布概况',
			
 
				+                'content': f'共有 {len(cats)} 个不同的{cat_label}，覆盖范围'
			
 
				+                           f'{"广泛" if len(cats) >= 8 else "较为丰富" if len(cats) >= 5 else "相对集中"}。'
			
 
				+                           f'前3名合计占比 {top3_pct:.1f}%，集中度'
			
 
				+                           f'{"较高，呈现显著的头部集中特征" if top3_pct > 70 else "中等，呈现梯度递减分布" if top3_pct > 50 else "较低，分布较为均衡"}。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': f'排名第一: {top1_name}',
			
 
				+                'content': f'{top1_name}以 {top1_val:,}{metric_suffix}（占比 {top1_pct:.1f}%）位居榜首，'
			
 
				+                           f'{"是第二名" + cats[1] + "的" + f"{round(top1_val/vals[1],1)}" + "倍，优势极为显著" if len(cats) > 1 else "是该维度中最重要的类别"}。'
			
 
				+                           f'该类别贡献了超过三分之一的{metric_label}，是整体业务的基本盘和核心增长极。',
			
 
				+            },
			
 
				+        ]
			
 
				+        if len(vals) >= 3:
			
 
				+            top3_sum = sum(vals[:3])
			
 
				+            tail_sum = sum(vals[3:])
			
 
				+            tail_pct = sum(pcts[3:])
			
 
				+            insight_items.append({
			
 
				+                'title': '长尾分布特征',
			
 
				+                'content': f'前三名累计 {top3_sum:,}{metric_suffix}（{top3_pct:.1f}%），'
			
 
				+                           f'剩余 {len(cats)-3} 个合计 {tail_sum:,}{metric_suffix}（{tail_pct:.1f}%），'
			
 
				+                           f'属于{"头部集中型分布" if top3_pct > 70 else "相对均衡分布" if top3_pct < 50 else "梯度递减型分布"}。'
			
 
				+                           f'头部贡献了绝大部分{metric_label}，尾部虽数量众多但单个贡献有限。',
			
 
				+            })
			
 
				+        if len(vals) > 1:
			
 
				+            avg_val = sum(vals) / len(vals)
			
 
				+            cv = round(vals[0] / avg_val, 1) if avg_val else 0
			
 
				+            median_idx = len(vals) // 2
			
 
				+            median_val = vals[median_idx]
			
 
				+            insight_items.append({
			
 
				+                'title': '差异化与离散度分析',
			
 
				+                'content': f'排名第一的{cat_label}{top1_name}的{metric_suffix}是全部分类均值的 {cv} 倍，'
			
 
				+                           f'中位数分类（第{median_idx+1}名）为 {median_val:,}{metric_suffix}，'
			
 
				+                           f'表明该维度{"差异化显著，资源集中度较高" if cv > 3 else "差异化适中，各分类间差距可控" if cv > 1.5 else "分布较为均匀"}。'
			
 
				+                           f'头部与中位数的差距反映了{cat_label}维度上的分层特征，是运营资源重点倾斜方向。',
			
 
				+            })
			
 
				+        insight_items.append({
			
 
				+            'title': '业务启示',
			
 
				+            'content': f'建议重点关注 {cats[0]} 的增量拓展与存量维护，同时深入分析排名中位类别的提升空间。'
			
 
				+                       f'对于 {metric_label}贡献较小的尾部类别（如占比低于3%的分类），可评估是否优化资源配置、'
			
 
				+                       f'调整运营策略或将资源向高回报类别倾斜。结合{cat_label}维度持续跟踪分布变化，及时把握结构性机会。',
			
 
				+        })
			
 
				+
			
 
				+        _add_structured_insight(slide, insight_items,
			
 
				+                                Emu(text_zone.x), Emu(text_zone.y),
			
 
				+                                Emu(text_zone.width), Emu(text_zone.height))
			
 
				+        return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def _build_ranking_page(prs, config, df, profile, colors, content_top, page_def=None):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    cat_cols = profile.get('category_columns', [])
			
 
				+    num_cols = profile.get('numeric_columns', [])
			
 
				+    if not cat_cols or not num_cols:
			
 
				+        return False
			
 
				+
			
 
				+    elem = (page_def.elements or [{}])[0] if page_def else {}
			
 
				+    rank_col = elem.get('category') or cat_cols[-1]['column_name']
			
 
				+    rank_label = elem.get('category_label') or next(
			
 
				+        (c.get('inferred_label', rank_col) for c in cat_cols if c['column_name'] == rank_col), rank_col)
			
 
				+    metric_col = elem.get('metric') or num_cols[0]['column_name']
			
 
				+    metric_label = elem.get('metric_label') or next(
			
 
				+        (c.get('inferred_label', metric_col) for c in num_cols if c['column_name'] == metric_col), metric_col)
			
 
				+
			
 
				+    page_title = page_def.title if page_def and page_def.title else f'{rank_label}TOP排行'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+
			
 
				+    ranking = calc_generic_ranking(df, rank_col, metric_col, top_n=15)
			
 
				+    if ranking:
			
 
				+        chart_zone = get_chart_left_zone(content_top, 0.6)
			
 
				+        text_zone = get_insight_right_zone(content_top, 0.6)
			
 
				+        names = [r['name'] for r in ranking]
			
 
				+        vals = [r['value'] for r in ranking]
			
 
				+        add_bar_chart(slide, names, vals,
			
 
				+                     Emu(chart_zone.x), Emu(chart_zone.y),
			
 
				+                     Emu(chart_zone.width), Emu(chart_zone.height),
			
 
				+                     series_name=metric_label, color=colors.get('primary'))
			
 
				+
			
 
				+        total_val = sum(vals)
			
 
				+        top3_names = [r['name'] for r in ranking[:3]]
			
 
				+        top3_vals = [r['value'] for r in ranking[:3]]
			
 
				+        top3_pct = [round(v / total_val * 100, 1) for v in top3_vals] if total_val else [0, 0, 0]
			
 
				+        top1_vs_last = round(vals[0] / vals[-1], 1) if len(vals) > 1 and vals[-1] > 0 else 'N/A'
			
 
				+
			
 
				+        insight_items = [
			
 
				+            {
			
 
				+                'title': f'{rank_label}TOP排行概况',
			
 
				+                'content': f'共展示 {len(ranking)} 个排名项，前3名分别为 {top3_names[0]}、{top3_names[1]}、'
			
 
				+                           f'{top3_names[2]}，累计 {sum(top3_vals):,}{metric_label}（{sum(top3_pct):.1f}%）。'
			
 
				+                           f'前三名合计贡献超过总量的三分之一，表明{rank_label}维度呈现{"显著的头部集中特征" if sum(top3_pct) > 60 else "梯度递减的分布格局" if sum(top3_pct) > 40 else "相对均衡的分布态势"}。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': f'榜首分析: {top3_names[0]}',
			
 
				+                'content': f'{top3_names[0]}以 {top3_vals[0]:,}{metric_label}（占比 {top3_pct[0]:.1f}%）位居榜首，'
			
 
				+                           f'{"是第2名" + top3_names[1] + "的" + f"{round(top3_vals[0]/top3_vals[1],1)}倍，领先优势显著" if len(ranking) > 1 and top3_vals[1] > 0 else "优势突出"}。'
			
 
				+                           f'作为排名第一的{rank_label}，其业绩表现直接影响整体业务大盘，建议重点关注其可持续增长策略。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '头部与尾部差距分析',
			
 
				+                'content': f'第1名与第{len(ranking)}名差距达 {top1_vs_last} 倍，'
			
 
				+                           f'前5名平均 {round(sum(vals[:5])/5):,}{metric_label}，'
			
 
				+                           f'后5名平均 {round(sum(vals[-5:])/5):,}{metric_label}，'
			
 
				+                           f'前后差距约 {round((sum(vals[:5])/5)/(sum(vals[-5:])/5),1) if sum(vals[-5:]) > 0 else "N/A"} 倍。'
			
 
				+                           f'{"头部效应极为明显，需关注是否因资源分配不均导致" if isinstance(top1_vs_last, float) and top1_vs_last > 10 else "差距较为显著，存在分层优化的空间" if isinstance(top1_vs_last, float) and top1_vs_last > 5 else "梯度分布相对均衡，可针对性提升各层级表现"}。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '累计贡献率与分层分析',
			
 
				+                'content': f'前5名累计贡献 {sum(vals[:5]):,}{metric_label}（{round(sum(vals[:5])/total_val*100,1) if total_val else 0}%），'
			
 
				+                           f'前10名累计贡献 {sum(vals[:10]):,}{metric_label}（{round(sum(vals[:10])/total_val*100,1) if total_val else 0}%），'
			
 
				+                           f'剩余 {len(ranking)-10} 名合计贡献 {sum(vals[10:]):,}{metric_label}（{round(sum(vals[10:])/total_val*100,1) if total_val else 0}%）。'
			
 
				+                           f'从分层结构来看，可划分为三个梯队：第一梯队（前3名）为业绩核心贡献者，第二梯队（第4-8名）为稳定输出层，'
			
 
				+                           f'第三梯队（第9名及以后）为潜力提升层。',
			
 
				+            },
			
 
				+            {
			
 
				+                'title': '业务建议',
			
 
				+                'content': f'重点关注 {", ".join(top3_names)} 的发展动态，提炼其成功经验并推广至团队。'
			
 
				+                           f'对于排名靠后的{rank_label}，可评估其增长潜力与资源匹配度，'
			
 
				+                           f'识别可突破的增量空间。建议建立{rank_label}的绩效考核与激励体系，'
			
 
				+                           f'通过标杆带动和梯队培养实现整体业绩提升。',
			
 
				+            },
			
 
				+        ]
			
 
				+        _add_structured_insight(slide, insight_items,
			
 
				+                                Emu(text_zone.x), Emu(text_zone.y),
			
 
				+                                Emu(text_zone.width), Emu(text_zone.height))
			
 
				+        return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def _build_summary_page(prs, config, metrics, profile, colors, content_top, page_def=None):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[1])
			
 
				+    page_title = page_def.title if page_def and page_def.title else '总结与建议'
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+        '{date}': config.period_str,
			
 
				+        '{page_title}': page_title,
			
 
				+        '{source}': config.source_label,
			
 
				+        '{period}': '',
			
 
				+        '{page_num}': '',
			
 
				+    })
			
 
				+
			
 
				+    elem = (page_def.elements or [{}])[0] if page_def else {}
			
 
				+
			
 
				+    if elem.get('support_status') is not None:
			
 
				+        status = elem['support_status']
			
 
				+        dept = elem.get('support_by_dept', {})
			
 
				+        sc = elem.get('support_count', 0)
			
 
				+        cc = elem.get('closed_count', 0)
			
 
				+        close_rate = round(cc / sc * 100, 1) if sc else 0
			
 
				+        fully_closed = status.get('已闭环', 0)
			
 
				+        partial_closed = status.get('部分闭环', 0)
			
 
				+        not_closed = status.get('未闭环', 0)
			
 
				+        insight_items = [{
			
 
				+            'title': '支持需求总览',
			
 
				+            'content': f'本期共产生 {sc} 项跨部门支持需求，其中已闭环 {cc} 项（含完全闭环 {fully_closed} 项、部分闭环 {partial_closed} 项），'
			
 
				+                       f'闭环率 {close_rate}%。未闭环需求 {sc - cc} 项（占比 {round((sc-cc)/sc*100,1) if sc else 0}%），'
			
 
				+                       f'闭环率{"较高，跨部门协作效率良好" if close_rate >= 60 else "处于中等水平，仍有提升空间" if close_rate >= 30 else "偏低，需重点关注闭环推动"}。'
			
 
				+                       f'跨部门支持是保障项目推进的重要环节，高效的闭环机制有助于提升客户满意度和订单转化效率。',
			
 
				+        }]
			
 
				+        if status:
			
 
				+            total_status = sum(status.values())
			
 
				+            fully_pct = round(fully_closed / total_status * 100, 1) if total_status else 0
			
 
				+            partial_pct = round(partial_closed / total_status * 100, 1) if total_status else 0
			
 
				+            not_pct = round(not_closed / total_status * 100, 1) if total_status else 0
			
 
				+            insight_items.append({
			
 
				+                'title': '闭环状态明细',
			
 
				+                'content': f'已闭环 {fully_closed} 项（{fully_pct}%）、部分闭环 {partial_closed} 项（{partial_pct}%）、'
			
 
				+                           f'未闭环 {not_closed} 项（{not_pct}%）。'
			
 
				+                           f'其中完全闭环占比{"超过七成，闭环质量较高" if fully_pct >= 70 else "处于中等水平" if fully_pct >= 40 else "偏低，需提升闭环完整性"}。'
			
 
				+                           f'部分闭环表明需求已部分满足但未完全解决，需持续跟踪至彻底闭环。',
			
 
				+            })
			
 
				+        if dept:
			
 
				+            dept_top = list(dept.items())[:5]
			
 
				+            dept_top_sum = sum(v for _, v in dept_top)
			
 
				+            dept_total = sum(dept.values())
			
 
				+            dept_str = '、'.join([f'{k}（{v}项）' for k, v in dept_top])
			
 
				+            avg_dept_load = round(dept_total / len(dept), 1) if dept else 0
			
 
				+            max_dept = dept_top[0]
			
 
				+            insight_items.append({
			
 
				+                'title': '支持部门工作量分布',
			
 
				+                'content': f'需求覆盖 {len(dept)} 个部门/科室，前5个部门承接 {dept_top_sum} 项（{round(dept_top_sum/dept_total*100,1) if dept_total else 0}%）。'
			
 
				+                           f'Top部门：{dept_str}。其中{max_dept[0]}承接最多（{max_dept[1]}项），'
			
 
				+                           f'平均每个部门承接 {avg_dept_load} 项。请关注工作量较大的部门资源分配是否充足，'
			
 
				+                           f'同时识别是否有部门长期未被分配需求（可能表明资源未充分利用）。',
			
 
				+            })
			
 
				+        if sc - cc > 0:
			
 
				+            insight_items.append({
			
 
				+                'title': '未闭环需求跟进建议',
			
 
				+                'content': f'当前仍有 {sc - cc} 项需求未完成闭环。建议按以下策略推进：第一，按紧急程度和影响范围对未闭环需求进行优先级排序，'
			
 
				+                           f'高优需求指定专人负责限期解决；第二，建立周度闭环跟踪机制，定期更新需求处理进展；'
			
 
				+                           f'第三，对于跨部门协同的复杂需求，建议指定牵头部门统筹协调推进，'
			
 
				+                           f'并建立问题升级机制（当需求超期未解决时自动升级至更高层级协调）。',
			
 
				+            })
			
 
				+        insight_items.append({
			
 
				+            'title': '闭环效率提升建议',
			
 
				+            'content': f'为持续提升支持需求闭环效率，建议：一是建立标准化的需求流转流程，明确各环节责任人和响应时限；'
			
 
				+                       f'二是定期开展闭环案例复盘，提炼最佳实践并在团队内推广；'
			
 
				+                       f'三是建立闭环率考核指标，将闭环时效纳入部门协作评价体系，'
			
 
				+                       f'通过制度保障跨部门协作的效率和质量。',
			
 
				+        })
			
 
				+    else:
			
 
				+        insight_items = generate_generic_insights(profile, metrics)
			
 
				+
			
 
				+    zone = get_full_width_zone(content_top)
			
 
				+    _add_structured_insight(slide, insight_items,
			
 
				+                            Emu(zone.x), Emu(zone.y),
			
 
				+                            Emu(zone.width), Emu(zone.height))
			
 
				+
			
 
				+
			
 
				+def _build_end_page(prs, config, colors):
			
 
				+    slide = _duplicate_slide(prs, prs.slides[3] if len(prs.slides) > 3 else prs.slides[0])
			
 
				+    total = len([p for p in config.pages if p.selected])
			
 
				+    _add_footer_if_missing(slide, f'数据来源：{config.source_label} | {total}/{total}')
			
 
				+    _replace_all_placeholders(slide, {
			
 
				+        '{report_title}': config.title,
			
 
				+    })
			
 
				+
			
 
				+
			
 
				+# ==============================================================================
			
 
				 # DAILY REPORT
			
 
				 # ==============================================================================
			
 
				 
			
--- a/generate-data-report-ppt/scripts/quality_inspector.py
+++ b/generate-data-report-ppt/scripts/quality_inspector.py
@@ -0,0 +1,855 @@
 
				+"""
			
 
				+PPT quality inspector and auto-fix engine.
			
 
				+Inspects generated PPT for layout, visual, content, and data issues,
			
 
				+then auto-fixes them iteratively until quality threshold is met.
			
 
				+"""
			
 
				+import re
			
 
				+from pptx import Presentation
			
 
				+from pptx.util import Emu, Pt
			
 
				+from pptx.dml.color import RGBColor
			
 
				+from collections import Counter
			
 
				+
			
 
				+from quality_rules import (
			
 
				+    QUALITY_RULES, SEVERITY_WEIGHTS, CATEGORY_WEIGHTS,
			
 
				+    FILL_RATIO_THRESHOLDS, FONT_SIZE_MIN, FONT_SIZE_MAX,
			
 
				+    TEXT_MIN_LENGTH, INSIGHT_MIN_COUNT, PAGE_MIN_TEXT_LENGTH,
			
 
				+    SAFE_MARGIN, CONTENT_LEFT, CONTENT_TOP_BASE,
			
 
				+    FOOTER_TOP, SLIDE_WIDTH, SLIDE_HEIGHT, DEFAULT_FONT,
			
 
				+    get_quality_label, calculate_score,
			
 
				+)
			
 
				+from page_layouts import calculate_fill_ratio, ensure_safe_position
			
 
				+
			
 
				+
			
 
				+class QualityIssue:
			
 
				+    def __init__(self, severity, category, page_index, description,
			
 
				+                 rule_id='', auto_fixable=True, fix_data=None):
			
 
				+        self.severity = severity
			
 
				+        self.category = category
			
 
				+        self.page_index = page_index
			
 
				+        self.description = description
			
 
				+        self.rule_id = rule_id
			
 
				+        self.auto_fixable = auto_fixable
			
 
				+        self.fix_data = fix_data or {}
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return f"[{self.severity}] Page {self.page_index+1}: {self.description}"
			
 
				+
			
 
				+
			
 
				+class QualityInspector:
			
 
				+    def __init__(self, theme_colors: dict = None):
			
 
				+        self.theme_colors = theme_colors or {}
			
 
				+        self.fix_count = 0
			
 
				+        self.fix_log = []
			
 
				+
			
 
				+    def inspect(self, prs: Presentation, config=None) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        issues += self._check_confirmation_alignment(prs, config)
			
 
				+        for page_idx, slide in enumerate(prs.slides):
			
 
				+            page_type = self._get_page_type(page_idx, config, len(prs.slides))
			
 
				+            issues += self._check_layout(slide, page_idx)
			
 
				+            issues += self._check_visual(slide, page_idx)
			
 
				+            issues += self._check_content(slide, page_idx, config, prs, page_type)
			
 
				+            issues += self._check_data(slide, page_idx, prs)
			
 
				+        return issues
			
 
				+
			
 
				+    def _get_page_type(self, page_idx: int, config, total_slides: int) -> str:
			
 
				+        if config and hasattr(config, 'pages') and page_idx < len(config.pages):
			
 
				+            return config.pages[page_idx].page_type
			
 
				+        if page_idx == 0:
			
 
				+            return 'cover'
			
 
				+        if page_idx == total_slides - 1:
			
 
				+            return 'end'
			
 
				+        if page_idx == 1:
			
 
				+            return 'toc'
			
 
				+        return 'content'
			
 
				+
			
 
				+    def _check_confirmation_alignment(self, prs, config) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        if not config:
			
 
				+            return issues
			
 
				+
			
 
				+        selected_pages = [p for p in getattr(config, 'pages', []) if getattr(p, 'selected', True)]
			
 
				+        if getattr(config, 'require_six_confirmations', False):
			
 
				+            confirmation = getattr(config, 'user_confirmation', None)
			
 
				+            if confirmation and hasattr(confirmation, 'is_complete') and not confirmation.is_complete():
			
 
				+                issues.append(QualityIssue(
			
 
				+                    'critical', 'data', -1,
			
 
				+                    '六项确认未完成，PPT 不应进入输出阶段',
			
 
				+                    'D006', False,
			
 
				+                    {'type': 'confirmation_incomplete'}
			
 
				+                ))
			
 
				+
			
 
				+        if config and getattr(config, 'page_count_range', None) and selected_pages:
			
 
				+            low, high = config.page_count_range
			
 
				+            if len(selected_pages) < low or len(selected_pages) > high:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    'major', 'data', -1,
			
 
				+                    f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 内',
			
 
				+                    'D006', False,
			
 
				+                    {'type': 'page_count_range'}
			
 
				+                ))
			
 
				+
			
 
				+        if config and getattr(config, 'metrics', None) and len(selected_pages) > 0:
			
 
				+            selected_metrics = [m for m in config.metrics if getattr(m, 'selected', True)]
			
 
				+            if not selected_metrics:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    'critical', 'data', -1,
			
 
				+                    '未找到已确认的核心指标，无法验证输出一致性',
			
 
				+                    'D006', False,
			
 
				+                    {'type': 'missing_metrics'}
			
 
				+                ))
			
 
				+        return issues
			
 
				+
			
 
				+    def auto_fix(self, prs: Presentation, issues: list[QualityIssue]):
			
 
				+        fixable = [i for i in issues if i.auto_fixable]
			
 
				+        self.fix_count = 0
			
 
				+        self.fix_log = []
			
 
				+
			
 
				+        for issue in fixable:
			
 
				+            try:
			
 
				+                if issue.page_index < 0:
			
 
				+                    continue
			
 
				+                slide = prs.slides[issue.page_index]
			
 
				+                self._apply_fix(slide, issue, prs)
			
 
				+                self.fix_count += 1
			
 
				+            except Exception as e:
			
 
				+                self.fix_log.append(f"Fix failed for {issue.rule_id}: {e}")
			
 
				+
			
 
				+        return self.fix_count
			
 
				+
			
 
				+    def _apply_fix(self, slide, issue, prs):
			
 
				+        category = issue.category
			
 
				+        if category == 'layout':
			
 
				+            self._fix_layout(slide, issue)
			
 
				+        elif category == 'visual':
			
 
				+            self._fix_visual(slide, issue)
			
 
				+        elif category == 'content':
			
 
				+            self._fix_content(slide, issue, prs)
			
 
				+        elif category == 'data':
			
 
				+            self._fix_data(slide, issue, prs)
			
 
				+
			
 
				+    def generate_report(self, issues: list[QualityIssue], iteration: int = 1,
			
 
				+                        total_pages: int = 0) -> str:
			
 
				+        lines = []
			
 
				+        lines.append('═' * 50)
			
 
				+        lines.append(f'  PPT 质量自检报告 (第 {iteration} 轮)')
			
 
				+        lines.append('═' * 50)
			
 
				+
			
 
				+        if not issues:
			
 
				+            lines.append('[PASS] 全部通过！未发现任何质量问题。')
			
 
				+            return '\n'.join(lines)
			
 
				+
			
 
				+        by_page = {}
			
 
				+        for iss in issues:
			
 
				+            p = iss.page_index
			
 
				+            if p not in by_page:
			
 
				+                by_page[p] = []
			
 
				+            by_page[p].append(iss)
			
 
				+
			
 
				+        for p_idx in sorted(by_page.keys()):
			
 
				+            page_issues = by_page[p_idx]
			
 
				+            sev_order = {'critical': 0, 'major': 1, 'minor': 2}
			
 
				+            page_issues.sort(key=lambda x: sev_order.get(x.severity, 3))
			
 
				+
			
 
				+            has_critical = any(i.severity == 'critical' for i in page_issues)
			
 
				+            has_major = any(i.severity == 'major' for i in page_issues)
			
 
				+
			
 
				+            if has_critical:
			
 
				+                icon = '[CRIT]'
			
 
				+            elif has_major:
			
 
				+                icon = '[MAJ]'
			
 
				+            else:
			
 
				+                icon = '[OK]'
			
 
				+
			
 
				+            lines.append(f'{icon} 第{p_idx+1}页: {len(page_issues)} 个问题')
			
 
				+
			
 
				+            for iss in page_issues:
			
 
				+                sev_icon = {'critical': '[!!]', 'major': '[!]', 'minor': '[-]'}.get(iss.severity, '')
			
 
				+                status = ' [FIXED]' if iss.auto_fixable and iss.fix_data.get('fixed') else ''
			
 
				+                lines.append(f'  ├─ {sev_icon} {iss.description}{status}')
			
 
				+
			
 
				+        lines.append('─' * 50)
			
 
				+
			
 
				+        by_sev = Counter(i.severity for i in issues)
			
 
				+        by_cat = Counter(i.category for i in issues)
			
 
				+        fixed = sum(1 for i in issues if i.auto_fixable and i.fix_data.get('fixed'))
			
 
				+        score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
			
 
				+        label = get_quality_label(score)
			
 
				+
			
 
				+        lines.append(f'总结: {len(issues)} 个问题 | '
			
 
				+                     f'{by_sev.get("critical", 0)} 严重 + '
			
 
				+                     f'{by_sev.get("major", 0)} 主要 + '
			
 
				+                     f'{by_sev.get("minor", 0)} 次要')
			
 
				+        lines.append(f'自动修复: {fixed}/{sum(1 for i in issues if i.auto_fixable)} 个')
			
 
				+        lines.append(f'最终质量评分: {score}/100 [{label}]')
			
 
				+        lines.append('═' * 50)
			
 
				+
			
 
				+        return '\n'.join(lines)
			
 
				+
			
 
				+    def quality_assured_build(self, build_fn, data, config, output_path,
			
 
				+                              max_iterations=None, _attempt=0) -> tuple:
			
 
				+        max_iterations = max_iterations or config.max_fix_iterations
			
 
				+        total_pages = 0
			
 
				+        needs_rebuild = False
			
 
				+        rebuilt_once = False
			
 
				+        prs = None
			
 
				+
			
 
				+        for iteration in range(1, max_iterations + 1):
			
 
				+            if iteration == 1 or needs_rebuild:
			
 
				+                if needs_rebuild:
			
 
				+                    if rebuilt_once and iteration > 2:
			
 
				+                        print(f'[INFO] 已尝试重建，不再继续重建以避免无限循环')
			
 
				+                        needs_rebuild = False
			
 
				+                    else:
			
 
				+                        print(f'[REBUILD] 检测到需要重建的页面，触发重新生成...')
			
 
				+                        rebuilt_once = True
			
 
				+                        needs_rebuild = False
			
 
				+                prs = build_fn(data, config)
			
 
				+                total_pages = len(prs.slides)
			
 
				+
			
 
				+            issues = self.inspect(prs, config)
			
 
				+
			
 
				+            if not issues:
			
 
				+                print(f'[PASS] 第 {iteration} 次迭代：无问题，质量达标')
			
 
				+                break
			
 
				+
			
 
				+            by_sev = Counter(i.severity for i in issues)
			
 
				+            print(f'[INSPECT] 第 {iteration} 次自检：{by_sev.get("critical",0)} 严重 + '
			
 
				+                  f'{by_sev.get("major",0)} 主要 + {by_sev.get("minor",0)} 次要')
			
 
				+
			
 
				+            fixable = [i for i in issues if i.auto_fixable]
			
 
				+            self.auto_fix(prs, fixable)
			
 
				+            print(f'[FIX] 自动修复了 {self.fix_count} 个问题')
			
 
				+
			
 
				+            for issue in fixable:
			
 
				+                if issue.fix_data.get('needs_rebuild'):
			
 
				+                    needs_rebuild = True
			
 
				+                    print(f'[WARN] 检测到内容严重不足，将在下一轮迭代中重建')
			
 
				+                    break
			
 
				+
			
 
				+            unfixable = [i for i in issues if not i.auto_fixable]
			
 
				+            if unfixable:
			
 
				+                print(f'[WARN] {len(unfixable)} 个问题需人工确认')
			
 
				+
			
 
				+            remaining = self.inspect(prs, config)
			
 
				+            if not remaining:
			
 
				+                print(f'[PASS] 第 {iteration} 次修复后：所有问题已解决')
			
 
				+                break
			
 
				+
			
 
				+            has_critical = any(i.severity == 'critical' for i in remaining)
			
 
				+            has_major = any(i.severity == 'major' for i in remaining)
			
 
				+            if not has_critical and not has_major:
			
 
				+                print(f'[PASS] 第 {iteration} 次修复后：仅剩次要问题，质量达标')
			
 
				+                break
			
 
				+
			
 
				+            if needs_rebuild and iteration < max_iterations:
			
 
				+                continue
			
 
				+
			
 
				+        final_issues = self.inspect(prs, config)
			
 
				+        by_sev = Counter(i.severity for i in final_issues)
			
 
				+        by_cat = Counter(i.category for i in final_issues)
			
 
				+        score = calculate_score(dict(by_sev), dict(by_cat), max(total_pages, 1))
			
 
				+        label = get_quality_label(score)
			
 
				+
			
 
				+        report = self.generate_report(final_issues, iteration, total_pages)
			
 
				+        print(report)
			
 
				+
			
 
				+        if score >= config.quality_threshold:
			
 
				+            prs.save(output_path)
			
 
				+            print(f'[PASS] 高质量 PPT 已输出: {output_path}')
			
 
				+        else:
			
 
				+            has_critical_final = any(i.severity == 'critical' for i in final_issues)
			
 
				+            has_layout_critical = any(
			
 
				+                i.severity == 'critical' and i.category == 'layout'
			
 
				+                for i in final_issues
			
 
				+            )
			
 
				+            if has_layout_critical:
			
 
				+                raise RuntimeError(
			
 
				+                    f'PPT 存在严重布局问题（评分 {score}），无法自动修复。'
			
 
				+                    f'请检查页面配置和数据。'
			
 
				+                )
			
 
				+            prs.save(output_path)
			
 
				+            if has_critical_final:
			
 
				+                print(f'[WARN] 质量评分 {score}（低于阈值 {config.quality_threshold}），'
			
 
				+                      f'存在 {by_sev.get("critical", 0)} 个严重内容问题，建议补充分析数据后重新生成')
			
 
				+            else:
			
 
				+                print(f'[WARN] 质量评分 {score}（低于阈值 {config.quality_threshold}），已输出但建议复核')
			
 
				+
			
 
				+        return prs, final_issues
			
 
				+
			
 
				+    def _check_layout(self, slide, page_idx) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        sw = int(slide.slide_width) if hasattr(slide, 'slide_width') else SLIDE_WIDTH
			
 
				+        sh = int(slide.slide_height) if hasattr(slide, 'slide_height') else SLIDE_HEIGHT
			
 
				+
			
 
				+        for shape in slide.shapes:
			
 
				+            l, t = int(shape.left), int(shape.top)
			
 
				+            w, h = int(shape.width), int(shape.height)
			
 
				+
			
 
				+            if l < -100:
			
 
				+                issues.append(QualityIssue('critical', 'layout', page_idx,
			
 
				+                    f'形状"{_shape_name(shape)}"飞出页面左边界 (left={l})',
			
 
				+                    'L001', True, {'shape': shape, 'type': 'left'}))
			
 
				+            if l + w > sw + 500:
			
 
				+                issues.append(QualityIssue('critical', 'layout', page_idx,
			
 
				+                    f'形状"{_shape_name(shape)}"飞出页面右边界 (right={l+w}, max={sw})',
			
 
				+                    'L002', True, {'shape': shape, 'type': 'right'}))
			
 
				+            if t < -100:
			
 
				+                issues.append(QualityIssue('critical', 'layout', page_idx,
			
 
				+                    f'形状"{_shape_name(shape)}"飞出页面顶部 (top={t})',
			
 
				+                    'L003', True, {'shape': shape, 'type': 'top'}))
			
 
				+            if t + h > sh + 500:
			
 
				+                issues.append(QualityIssue('critical', 'layout', page_idx,
			
 
				+                    f'形状"{_shape_name(shape)}"飞出页面底部 (bottom={t+h}, max={sh})',
			
 
				+                    'L004', True, {'shape': shape, 'type': 'bottom'}))
			
 
				+
			
 
				+            if l < SAFE_MARGIN and l >= 0:
			
 
				+                    if l == 0 and w >= sw * 0.8:
			
 
				+                        continue
			
 
				+                    if int(shape.top) < 0 or int(shape.top) + int(shape.height) < Emu(100000):
			
 
				+                        continue
			
 
				+                    if int(shape.top) > sh - Emu(500000):
			
 
				+                        continue
			
 
				+                    issues.append(QualityIssue('minor', 'layout', page_idx,
			
 
				+                        f'形状"{_shape_name(shape)}"过于靠近左边缘',
			
 
				+                        'L007', True, {'shape': shape, 'type': 'edge_left'}))
			
 
				+
			
 
				+        placeholder_pattern = re.compile(r'\{[^}]+\}')
			
 
				+        for shape in slide.shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                text = shape.text_frame.text
			
 
				+                if placeholder_pattern.search(text):
			
 
				+                    issues.append(QualityIssue('critical', 'layout', page_idx,
			
 
				+                        f'发现未替换占位符: "{text[:50]}"',
			
 
				+                        'L006', True, {'shape': shape, 'type': 'placeholder'}))
			
 
				+
			
 
				+        empty_artifacts = self._find_empty_template_artifacts(slide)
			
 
				+        for shape in empty_artifacts:
			
 
				+            issues.append(QualityIssue(
			
 
				+                'major', 'layout', page_idx,
			
 
				+                f'发现空模板组件残留: "{_shape_name(shape)}"',
			
 
				+                'L008', True, {'shape': shape, 'type': 'empty_template_artifact'}
			
 
				+            ))
			
 
				+
			
 
				+        shapes_list = list(slide.shapes)
			
 
				+        for i, a in enumerate(shapes_list):
			
 
				+            for b in shapes_list[i+1:]:
			
 
				+                if self._shapes_overlap(a, b):
			
 
				+                    a_name = _shape_name(a)
			
 
				+                    b_name = _shape_name(b)
			
 
				+                    if self._is_intentional_overlap(a, b):
			
 
				+                        continue
			
 
				+                    issues.append(QualityIssue('major', 'layout', page_idx,
			
 
				+                        f'形状"{a_name}"与"{b_name}"存在重叠',
			
 
				+                        'L005', True, {'shape_a': a, 'shape_b': b, 'type': 'overlap'}))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_visual(self, slide, page_idx) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        fonts_seen = {}
			
 
				+
			
 
				+        for shape in slide.shapes:
			
 
				+            if not shape.has_text_frame:
			
 
				+                continue
			
 
				+            for para in shape.text_frame.paragraphs:
			
 
				+                for run in para.runs:
			
 
				+                    if run.font.size:
			
 
				+                        size_pt = run.font.size / 12700.0
			
 
				+                        if size_pt < 6:
			
 
				+                            issues.append(QualityIssue('major', 'visual', page_idx,
			
 
				+                                f'字号过小 ({size_pt:.1f}pt): "{run.text[:20]}"',
			
 
				+                                'V002', True, {'run': run, 'type': 'font_small'}))
			
 
				+                        elif size_pt > 65:
			
 
				+                            issues.append(QualityIssue('major', 'visual', page_idx,
			
 
				+                                f'字号过大 ({size_pt:.1f}pt): "{run.text[:20]}"',
			
 
				+                                'V003', True, {'run': run, 'type': 'font_large'}))
			
 
				+
			
 
				+                    if run.font.name:
			
 
				+                        fonts_seen[run.font.name] = fonts_seen.get(run.font.name, 0) + 1
			
 
				+
			
 
				+        if len(fonts_seen) > 3:
			
 
				+            issues.append(QualityIssue('minor', 'visual', page_idx,
			
 
				+                f'字体使用超过3种: {list(fonts_seen.keys())}',
			
 
				+                'V001', True, {'type': 'font_mixed', 'fonts': fonts_seen}))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_content(self, slide, page_idx, config, prs, page_type='content') -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+
			
 
				+        if page_type in ('cover', 'end'):
			
 
				+            return issues
			
 
				+
			
 
				+        issues += self._check_dynamic_page_fit(page_idx, page_type, config)
			
 
				+        issues += self._check_core_metric_presence(slide, page_idx, page_type, config)
			
 
				+
			
 
				+        if page_type == 'toc':
			
 
				+            content_shapes = [s for s in slide.shapes
			
 
				+                             if s.has_text_frame and _is_in_content_area(s)]
			
 
				+            all_content_text = ''
			
 
				+            for shape in content_shapes:
			
 
				+                text = shape.text_frame.text.strip()
			
 
				+                if text:
			
 
				+                    all_content_text += text + '\n'
			
 
				+            if len(all_content_text.strip()) < 30:
			
 
				+                issues.append(QualityIssue('minor', 'content', page_idx,
			
 
				+                    '目录页内容过少',
			
 
				+                    'C008', False, {'type': 'empty_page'}))
			
 
				+            return issues
			
 
				+
			
 
				+        fill_ratio = calculate_fill_ratio(slide)
			
 
				+
			
 
				+        if page_type in ('kpi_overview', 'trend', 'distribution', 'ranking', 'summary'):
			
 
				+            if fill_ratio < FILL_RATIO_THRESHOLDS['sparse']:
			
 
				+                issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                    f'页面内容严重不足，填充率仅 {fill_ratio:.1%}，必须补充图表和分析文本',
			
 
				+                    'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
			
 
				+            elif fill_ratio < FILL_RATIO_THRESHOLDS['low']:
			
 
				+                issues.append(QualityIssue('major', 'content', page_idx,
			
 
				+                    f'页面留白偏多，填充率 {fill_ratio:.1%}，需补充分析内容',
			
 
				+                    'C001', True, {'type': 'sparse', 'fill_ratio': fill_ratio}))
			
 
				+        elif fill_ratio < FILL_RATIO_THRESHOLDS['sparse'] / 2:
			
 
				+            issues.append(QualityIssue('minor', 'content', page_idx,
			
 
				+                f'页面填充率过低 {fill_ratio:.1%}',
			
 
				+                'C001', False))
			
 
				+
			
 
				+        content_shapes = [s for s in slide.shapes
			
 
				+                         if s.has_text_frame and _is_in_content_area(s)]
			
 
				+        all_content_text = ''
			
 
				+        insight_blocks = 0
			
 
				+        for shape in content_shapes:
			
 
				+            tf = shape.text_frame
			
 
				+            full_text = tf.text.strip()
			
 
				+            if not full_text:
			
 
				+                continue
			
 
				+            all_content_text += full_text + '\n'
			
 
				+            for para in tf.paragraphs:
			
 
				+                para_text = para.text.strip()
			
 
				+                if para_text and len(para_text) >= TEXT_MIN_LENGTH:
			
 
				+                    insight_blocks += 1
			
 
				+
			
 
				+        total_content_chars = len(all_content_text.strip())
			
 
				+
			
 
				+        text_lengths = [len(p.text.strip()) for s in content_shapes
			
 
				+                        for p in s.text_frame.paragraphs if p.text.strip()]
			
 
				+
			
 
				+        if total_content_chars < PAGE_MIN_TEXT_LENGTH:
			
 
				+            issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                f'页面内容为空！所有文本框总字数仅 {total_content_chars} 字（要求≥{PAGE_MIN_TEXT_LENGTH}字）',
			
 
				+                'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
			
 
				+        elif total_content_chars < 200:
			
 
				+            issues.append(QualityIssue('major', 'content', page_idx,
			
 
				+                f'页面内容过少，总字数仅 {total_content_chars} 字，分析深度严重不足',
			
 
				+                'C008', True, {'type': 'empty_page', 'char_count': total_content_chars}))
			
 
				+
			
 
				+        if text_lengths and max(text_lengths) < TEXT_MIN_LENGTH:
			
 
				+            issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                f'分析文本过短（最长为 {max(text_lengths)} 字），需撰写≥{TEXT_MIN_LENGTH}字的深度分析',
			
 
				+                'C005', True, {'type': 'short_text', 'max_length': max(text_lengths)}))
			
 
				+
			
 
				+        if insight_blocks < INSIGHT_MIN_COUNT:
			
 
				+            issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                f'分析段数不足，仅 {insight_blocks} 段（要求≥{INSIGHT_MIN_COUNT}段）',
			
 
				+                'C007', True, {'type': 'insight_count', 'count': insight_blocks}))
			
 
				+
			
 
				+        has_title = False
			
 
				+        for shape in slide.shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                text = shape.text_frame.text
			
 
				+                try:
			
 
				+                    sy = int(shape.top)
			
 
				+                except Exception:
			
 
				+                    sy = 99999999
			
 
				+                if sy < CONTENT_TOP_BASE + Emu(100000) and sy > Emu(500000):
			
 
				+                    if len(text.strip()) > 0 and not text.startswith('{'):
			
 
				+                        has_title = True
			
 
				+                        break
			
 
				+                if any(kw in text for kw in ['概览', '趋势', '分布', '分析', '总结',
			
 
				+                                              '排行', '报告', '建议', '告警', '要点']):
			
 
				+                    if sy < CONTENT_TOP_BASE + Emu(400000):
			
 
				+                        has_title = True
			
 
				+                        break
			
 
				+
			
 
				+        if not has_title and page_idx > 0 and page_idx < len(prs.slides) - 1:
			
 
				+            issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                '页面缺少标题', 'C006', True, {'type': 'missing_title'}))
			
 
				+
			
 
				+        for shape in slide.shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                if self._is_text_overflowing(shape):
			
 
				+                    issues.append(QualityIssue('major', 'content', page_idx,
			
 
				+                        f'文本可能超出文本框边界: "{shape.text_frame.text[:30]}"',
			
 
				+                        'C004', True, {'shape': shape, 'type': 'text_overflow'}))
			
 
				+
			
 
				+        has_chart = False
			
 
				+        for shape in slide.shapes:
			
 
				+            if shape.has_chart:
			
 
				+                has_chart = True
			
 
				+                break
			
 
				+
			
 
				+        if has_chart and insight_blocks == 0 and page_idx >= 2:
			
 
				+            issues.append(QualityIssue('critical', 'content', page_idx,
			
 
				+                '页面有图表但完全缺少分析文本，图表数据需要被解读和说明',
			
 
				+                'C009', True, {'type': 'chart_no_text'}))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_dynamic_page_fit(self, page_idx, page_type, config) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        profile = getattr(config, 'data_profiling', None) or {}
			
 
				+        if not profile:
			
 
				+            return issues
			
 
				+
			
 
				+        time_cols = profile.get('time_columns', [])
			
 
				+        cat_cols = profile.get('category_columns', [])
			
 
				+        num_cols = profile.get('numeric_columns', [])
			
 
				+
			
 
				+        if page_type == 'trend' and (not time_cols or not num_cols):
			
 
				+            issues.append(QualityIssue(
			
 
				+                'critical', 'content', page_idx,
			
 
				+                '趋势页缺少可用时间列或数值列，需要重建或降级为摘要页',
			
 
				+                'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
			
 
				+            ))
			
 
				+        elif page_type in ('distribution', 'ranking') and (not cat_cols or not num_cols):
			
 
				+            issues.append(QualityIssue(
			
 
				+                'critical', 'content', page_idx,
			
 
				+                f'{page_type} 页缺少分类维度或数值列，需要重建或降级为摘要页',
			
 
				+                'C010', True, {'type': 'dynamic_page_not_supported', 'page_type': page_type}
			
 
				+            ))
			
 
				+        elif page_type == 'kpi_overview':
			
 
				+            selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
			
 
				+            if len(selected_metrics) > 6:
			
 
				+                issues.append(QualityIssue(
			
 
				+                    'major', 'content', page_idx,
			
 
				+                    f'核心指标数量 {len(selected_metrics)} 超过 6 个，KPI页应拆页或改为紧凑布局',
			
 
				+                    'C011', True, {'type': 'kpi_layout_over_capacity', 'count': len(selected_metrics)}
			
 
				+                ))
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_core_metric_presence(self, slide, page_idx, page_type, config) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+        if page_type != 'kpi_overview' or not config:
			
 
				+            return issues
			
 
				+        selected_metrics = [m for m in getattr(config, 'metrics', []) if getattr(m, 'selected', True)]
			
 
				+        if not selected_metrics:
			
 
				+            return issues
			
 
				+        slide_text = '\n'.join(
			
 
				+            shape.text_frame.text for shape in slide.shapes
			
 
				+            if shape.has_text_frame and shape.text_frame.text
			
 
				+        )
			
 
				+        missing = [m.label for m in selected_metrics[:6] if m.label and m.label not in slide_text]
			
 
				+        if missing:
			
 
				+            issues.append(QualityIssue(
			
 
				+                'critical', 'data', page_idx,
			
 
				+                'KPI概览页缺少已确认核心指标：' + '、'.join(missing),
			
 
				+                'D006', True, {'type': 'core_metric_missing', 'missing': missing}
			
 
				+            ))
			
 
				+        return issues
			
 
				+
			
 
				+    def _check_data(self, slide, page_idx, prs) -> list[QualityIssue]:
			
 
				+        issues = []
			
 
				+
			
 
				+        if page_idx == 0:
			
 
				+            return issues
			
 
				+
			
 
				+        for shape in slide.shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                text = shape.text_frame.text
			
 
				+
			
 
				+                page_pattern = re.search(r'(\d+)\s*/\s*(\d+)', text)
			
 
				+                if page_pattern:
			
 
				+                    current = int(page_pattern.group(1))
			
 
				+                    total = int(page_pattern.group(2))
			
 
				+                    if total == 0:
			
 
				+                        issues.append(QualityIssue('major', 'data', page_idx,
			
 
				+                            f'页码格式异常: {text.strip()}',
			
 
				+                            'D002', True, {'type': 'page_num'}))
			
 
				+
			
 
				+        return issues
			
 
				+
			
 
				+    def _fix_layout(self, slide, issue):
			
 
				+        fd = issue.fix_data
			
 
				+        if fd.get('type') in ('left', 'right', 'top', 'bottom'):
			
 
				+            shape = fd.get('shape')
			
 
				+            if shape:
			
 
				+                ensure_safe_position(shape, SLIDE_WIDTH, SLIDE_HEIGHT)
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'overlap':
			
 
				+            a, b = fd.get('shape_a'), fd.get('shape_b')
			
 
				+            if a and b:
			
 
				+                try:
			
 
				+                    if int(b.left) < int(a.left) + int(a.width) + Emu(50000):
			
 
				+                        b.left = int(a.left) + int(a.width) + Emu(152400)
			
 
				+                        ensure_safe_position(b, SLIDE_WIDTH, SLIDE_HEIGHT)
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'placeholder':
			
 
				+            shape = fd.get('shape')
			
 
				+            if shape and shape.has_text_frame:
			
 
				+                for para in shape.text_frame.paragraphs:
			
 
				+                    para.text = re.sub(r'\{[^}]+\}', '', para.text)
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'edge_left':
			
 
				+            shape = fd.get('shape')
			
 
				+            if shape:
			
 
				+                try:
			
 
				+                    w = int(shape.width)
			
 
				+                    if w < SLIDE_WIDTH * 0.5:
			
 
				+                        shape.left = SAFE_MARGIN
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'empty_template_artifact':
			
 
				+            shape = fd.get('shape')
			
 
				+            if shape:
			
 
				+                self._remove_shape(shape)
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+    def _fix_visual(self, slide, issue):
			
 
				+        fd = issue.fix_data
			
 
				+        if fd.get('type') == 'font_small':
			
 
				+            run = fd.get('run')
			
 
				+            if run:
			
 
				+                run.font.size = FONT_SIZE_MIN
			
 
				+                fd['fixed'] = True
			
 
				+        elif fd.get('type') == 'font_large':
			
 
				+            run = fd.get('run')
			
 
				+            if run:
			
 
				+                run.font.size = FONT_SIZE_MAX
			
 
				+                fd['fixed'] = True
			
 
				+        elif fd.get('type') == 'font_mixed':
			
 
				+            for shape in slide.shapes:
			
 
				+                if shape.has_text_frame:
			
 
				+                    for para in shape.text_frame.paragraphs:
			
 
				+                        for run in para.runs:
			
 
				+                            run.font.name = DEFAULT_FONT
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+    def _fix_content(self, slide, issue, prs):
			
 
				+        fd = issue.fix_data
			
 
				+        if fd.get('type') == 'sparse':
			
 
				+            fill_ratio = fd.get('fill_ratio', 0)
			
 
				+            if fill_ratio < FILL_RATIO_THRESHOLDS['low']:
			
 
				+                try:
			
 
				+                    box = slide.shapes.add_textbox(
			
 
				+                        CONTENT_LEFT, Emu(int(FOOTER_TOP) - Emu(1600000)),
			
 
				+                        Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(1500000))
			
 
				+                    tf = box.text_frame
			
 
				+                    tf.word_wrap = True
			
 
				+                    p = tf.paragraphs[0]
			
 
				+                    p.text = (
			
 
				+                        '[WARNING] 此页面内容不足，需补充深度分析内容。'
			
 
				+                        '分析应包含：具体数据引用（含数值和单位）、'
			
 
				+                        '与同类/历史/目标的对比分析、'
			
 
				+                        '数据背后原因的至少2条解读、'
			
 
				+                        '以及可执行的业务行动建议。'
			
 
				+                        '请勿使用"要加强"、"进一步优化"等模糊措辞。'
			
 
				+                    )
			
 
				+                    p.font.size = Pt(12)
			
 
				+                    p.font.color.rgb = RGBColor(0xCC, 0x33, 0x00)
			
 
				+                    p.font.name = DEFAULT_FONT
			
 
				+                    p.font.bold = True
			
 
				+                    fd['fixed'] = True
			
 
				+                    fd['needs_rebuild'] = True
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+
			
 
				+        elif fd.get('type') == 'empty_page':
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'chart_no_text':
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'insight_count':
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'short_text':
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') in ('dynamic_page_not_supported', 'kpi_layout_over_capacity'):
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'core_metric_missing':
			
 
				+            fd['needs_rebuild'] = True
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+        elif fd.get('type') == 'missing_title':
			
 
				+            try:
			
 
				+                box = slide.shapes.add_textbox(
			
 
				+                    CONTENT_LEFT, Emu(914400),
			
 
				+                    Emu(SLIDE_WIDTH - 2 * CONTENT_LEFT - Emu(200000)), Emu(508000))
			
 
				+                p = box.text_frame.paragraphs[0]
			
 
				+                p.text = '数据详情'
			
 
				+                p.font.size = Pt(24)
			
 
				+                p.font.bold = True
			
 
				+                p.font.color.rgb = RGBColor(0x33, 0x33, 0x33)
			
 
				+                p.font.name = DEFAULT_FONT
			
 
				+                fd['fixed'] = True
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+        elif fd.get('type') == 'text_overflow':
			
 
				+            shape = fd.get('shape')
			
 
				+            if shape and shape.has_text_frame:
			
 
				+                text_len = len(shape.text_frame.text or '')
			
 
				+                try:
			
 
				+                    if text_len > 180 or int(shape.top) + int(shape.height) > int(FOOTER_TOP) - Emu(120000):
			
 
				+                        fd['needs_rebuild'] = True
			
 
				+                    else:
			
 
				+                        for para in shape.text_frame.paragraphs:
			
 
				+                            for run in para.runs:
			
 
				+                                if run.font.size and run.font.size > Pt(9):
			
 
				+                                    run.font.size = Pt(9)
			
 
				+                except Exception:
			
 
				+                    fd['needs_rebuild'] = True
			
 
				+                fd['fixed'] = True
			
 
				+
			
 
				+    def _fix_data(self, slide, issue, prs):
			
 
				+        fd = issue.fix_data
			
 
				+        if fd.get('type') == 'page_num':
			
 
				+            fd['fixed'] = True
			
 
				+
			
 
				+    def _shapes_overlap(self, a, b) -> bool:
			
 
				+        ax, ay, aw, ah = int(a.left), int(a.top), int(a.width), int(a.height)
			
 
				+        bx, by, bw, bh = int(b.left), int(b.top), int(b.width), int(b.height)
			
 
				+
			
 
				+        if ax + aw <= bx or bx + bw <= ax:
			
 
				+            return False
			
 
				+        if ay + ah <= by or by + bh <= ay:
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    def _is_intentional_overlap(self, a, b) -> bool:
			
 
				+        if hasattr(a, 'is_placeholder') or hasattr(b, 'is_placeholder'):
			
 
				+            return True
			
 
				+        a_area = int(a.width) * int(a.height)
			
 
				+        b_area = int(b.width) * int(b.height)
			
 
				+        if a_area > b_area * 3 or b_area > a_area * 3:
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    def _is_title_shape(self, shape) -> bool:
			
 
				+        if not shape.has_text_frame:
			
 
				+            return False
			
 
				+        try:
			
 
				+            y = int(shape.top)
			
 
				+            return y < int(CONTENT_TOP_BASE) + Emu(200000)
			
 
				+        except Exception:
			
 
				+            return False
			
 
				+
			
 
				+    def _find_empty_template_artifacts(self, slide) -> list:
			
 
				+        artifacts = []
			
 
				+        shapes = list(slide.shapes)
			
 
				+        empty_text_boxes = []
			
 
				+
			
 
				+        for shape in shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                text = (shape.text_frame.text or '').strip()
			
 
				+                if text:
			
 
				+                    continue
			
 
				+                if int(shape.width) < Emu(200000) or int(shape.height) < Emu(120000):
			
 
				+                    continue
			
 
				+                if int(shape.top) < Emu(900000) or int(shape.top) > int(FOOTER_TOP) - Emu(100000):
			
 
				+                    continue
			
 
				+                empty_text_boxes.append(shape)
			
 
				+                artifacts.append(shape)
			
 
				+
			
 
				+        for shape in shapes:
			
 
				+            if shape.has_text_frame:
			
 
				+                continue
			
 
				+            try:
			
 
				+                is_large_soft_card = (
			
 
				+                    int(shape.width) >= Emu(1000000) and
			
 
				+                    int(shape.height) >= Emu(500000) and
			
 
				+                    int(shape.top) < int(FOOTER_TOP) - Emu(400000)
			
 
				+                )
			
 
				+                if not is_large_soft_card:
			
 
				+                    continue
			
 
				+                overlaps_empty_text = any(self._shapes_overlap(shape, box) for box in empty_text_boxes)
			
 
				+                if overlaps_empty_text:
			
 
				+                    artifacts.append(shape)
			
 
				+            except Exception:
			
 
				+                continue
			
 
				+
			
 
				+        # Preserve order while de-duplicating.
			
 
				+        seen = set()
			
 
				+        unique = []
			
 
				+        for shape in artifacts:
			
 
				+            key = id(shape)
			
 
				+            if key not in seen:
			
 
				+                unique.append(shape)
			
 
				+                seen.add(key)
			
 
				+        return unique
			
 
				+
			
 
				+    def _remove_shape(self, shape):
			
 
				+        el = shape.element
			
 
				+        el.getparent().remove(el)
			
 
				+
			
 
				+    def _is_text_overflowing(self, shape) -> bool:
			
 
				+        if not shape.has_text_frame:
			
 
				+            return False
			
 
				+        text = shape.text_frame.text
			
 
				+        if not text.strip():
			
 
				+            return False
			
 
				+        if len(text) > 800:
			
 
				+            return True
			
 
				+        try:
			
 
				+            w = int(shape.width)
			
 
				+            h = int(shape.height)
			
 
				+            width_pt = max(1, w / 12700.0)
			
 
				+            max_font_pt = 10
			
 
				+            para_count = 0
			
 
				+            for para in shape.text_frame.paragraphs:
			
 
				+                if not para.text.strip():
			
 
				+                    continue
			
 
				+                para_count += 1
			
 
				+                for run in para.runs:
			
 
				+                    if run.font.size:
			
 
				+                        max_font_pt = max(max_font_pt, run.font.size / 12700.0)
			
 
				+            chars_per_line = max(8, int(width_pt / (max_font_pt * 1.15)))
			
 
				+            est_lines = max(1, (len(text) + chars_per_line - 1) // chars_per_line)
			
 
				+            est_height = int((est_lines * max_font_pt * 1.2 + para_count * 4) * 12700)
			
 
				+            if est_height > h * 1.15:
			
 
				+                return True
			
 
				+            if h < Emu(200000) and len(text) > 80:
			
 
				+                return True
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def _shape_name(shape):
			
 
				+    try:
			
 
				+        if shape.has_text_frame:
			
 
				+            return shape.text_frame.text[:20].replace('\n', ' ')
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    try:
			
 
				+        return shape.shape_type
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    return '无名形状'
			
 
				+
			
 
				+
			
 
				+def _is_in_content_area(shape):
			
 
				+    try:
			
 
				+        return int(shape.top) >= int(CONTENT_TOP_BASE)
			
 
				+    except Exception:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print("QualityInspector module loaded")
			
 
				+    inspector = QualityInspector()
			
 
				+    print("Ready to inspect PPT files")
			
--- a/generate-data-report-ppt/scripts/quality_rules.py
+++ b/generate-data-report-ppt/scripts/quality_rules.py
@@ -0,0 +1,141 @@
 
				+"""
			
 
				+Quality inspection rule definitions for PPT quality assurance.
			
 
				+Each rule defines a check function, severity level, and auto-fix strategy.
			
 
				+"""
			
 
				+from pptx.util import Emu, Pt
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import Callable, Optional
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class QualityRule:
			
 
				+    rule_id: str
			
 
				+    category: str
			
 
				+    description: str
			
 
				+    severity: str
			
 
				+    auto_fixable: bool
			
 
				+    check_fn: str
			
 
				+    fix_fn: str
			
 
				+
			
 
				+
			
 
				+QUALITY_RULES = [
			
 
				+    QualityRule('L001', 'layout', '元素飞出页面左边界', 'critical', True, '_check_left_bounds', '_fix_left_bounds'),
			
 
				+    QualityRule('L002', 'layout', '元素飞出页面右边界', 'critical', True, '_check_right_bounds', '_fix_right_bounds'),
			
 
				+    QualityRule('L003', 'layout', '元素飞出页面顶部', 'critical', True, '_check_top_bounds', '_fix_top_bounds'),
			
 
				+    QualityRule('L004', 'layout', '元素飞出页面底部', 'critical', True, '_check_bottom_bounds', '_fix_bottom_bounds'),
			
 
				+    QualityRule('L005', 'layout', '图文重叠', 'critical', True, '_check_overlap', '_fix_overlap'),
			
 
				+    QualityRule('L006', 'layout', '占位符未替换', 'critical', True, '_check_placeholders', '_fix_placeholders'),
			
 
				+    QualityRule('L007', 'layout', '元素紧贴页面边缘', 'minor', True, '_check_edge_proximity', '_fix_edge_proximity'),
			
 
				+
			
 
				+    QualityRule('V001', 'visual', '字体不一致', 'minor', True, '_check_font_consistency', '_fix_font_consistency'),
			
 
				+    QualityRule('V002', 'visual', '字号过小(<8pt)', 'major', True, '_check_font_too_small', '_fix_font_too_small'),
			
 
				+    QualityRule('V003', 'visual', '字号过大(>60pt)', 'major', True, '_check_font_too_large', '_fix_font_too_large'),
			
 
				+    QualityRule('V004', 'visual', '颜色对比度不足', 'major', True, '_check_contrast', '_fix_contrast'),
			
 
				+    QualityRule('V005', 'visual', '图片拉伸变形', 'major', True, '_check_image_aspect', '_fix_image_aspect'),
			
 
				+
			
 
				+    QualityRule('C001', 'content', '页面留白过多(填充率<35%)', 'critical', True, '_check_sparse_page', '_fix_sparse_page'),
			
 
				+    QualityRule('C002', 'content', 'KPI卡片数值为空', 'critical', True, '_check_empty_kpi', '_fix_empty_kpi'),
			
 
				+    QualityRule('C003', 'content', '图表无数据', 'critical', True, '_check_empty_chart', '_fix_empty_chart'),
			
 
				+    QualityRule('C004', 'content', '文本截断溢出', 'major', True, '_check_text_overflow', '_fix_text_overflow'),
			
 
				+    QualityRule('C005', 'content', '分析文本过短(<100字)', 'critical', True, '_check_short_text', '_fix_short_text'),
			
 
				+    QualityRule('C006', 'content', '页面缺少标题', 'critical', True, '_check_missing_title', '_fix_missing_title'),
			
 
				+    QualityRule('C007', 'content', '分析段数不足', 'critical', True, '_check_insight_count', '_fix_insight_count'),
			
 
				+    QualityRule('C008', 'content', '页面内容为空(<50字)', 'critical', True, '_check_empty_page', '_fix_empty_page'),
			
 
				+    QualityRule('C009', 'content', '图表缺少分析文本', 'critical', True, '_check_chart_no_text', '_fix_chart_no_text'),
			
 
				+
			
 
				+    QualityRule('D001', 'data', '图表数据与文本矛盾', 'critical', False, '_check_data_text_contradiction', None),
			
 
				+    QualityRule('D002', 'data', '页码错乱', 'major', True, '_check_page_numbers', '_fix_page_numbers'),
			
 
				+    QualityRule('D003', 'data', '数据来源缺失', 'major', True, '_check_missing_source', '_fix_missing_source'),
			
 
				+    QualityRule('D004', 'data', '表格列宽不合理', 'minor', True, '_check_table_column_width', '_fix_table_column_width'),
			
 
				+    QualityRule('D005', 'data', '图表刻度异常', 'minor', True, '_check_axis_scale', '_fix_axis_scale'),
			
 
				+]
			
 
				+
			
 
				+QUALITY_RULES.extend([
			
 
				+    QualityRule('L008', 'layout', '空模板组件残留', 'major', True,
			
 
				+                '_check_empty_template_artifacts', '_fix_empty_template_artifacts'),
			
 
				+    QualityRule('C010', 'content', '动态页面与数据画像不匹配', 'critical', True,
			
 
				+                '_check_dynamic_page_fit', '_fix_rebuild_page'),
			
 
				+    QualityRule('C011', 'content', 'KPI布局容量不足', 'major', True,
			
 
				+                '_check_kpi_layout_capacity', '_fix_rebuild_page'),
			
 
				+    QualityRule('D006', 'data', '六项确认与输出不一致', 'critical', True,
			
 
				+                '_check_confirmation_alignment', '_fix_rebuild_page'),
			
 
				+])
			
 
				+
			
 
				+
			
 
				+SEVERITY_WEIGHTS = {
			
 
				+    'critical': 20,
			
 
				+    'major': 10,
			
 
				+    'minor': 3,
			
 
				+}
			
 
				+
			
 
				+CATEGORY_WEIGHTS = {
			
 
				+    'layout': 0.30,
			
 
				+    'visual': 0.25,
			
 
				+    'content': 0.25,
			
 
				+    'data': 0.20,
			
 
				+}
			
 
				+
			
 
				+FILL_RATIO_THRESHOLDS = {
			
 
				+    'sparse': 0.20,
			
 
				+    'low': 0.35,
			
 
				+    'acceptable': 0.55,
			
 
				+    'good': 0.70,
			
 
				+}
			
 
				+
			
 
				+FONT_SIZE_MIN = Pt(8)
			
 
				+FONT_SIZE_MAX = Pt(60)
			
 
				+TEXT_MIN_LENGTH = 80
			
 
				+INSIGHT_MIN_COUNT = 2
			
 
				+PAGE_MIN_TEXT_LENGTH = 50
			
 
				+SAFE_MARGIN = Emu(762000)
			
 
				+CONTENT_LEFT = Emu(762000)
			
 
				+CONTENT_TOP_BASE = Emu(1524000)
			
 
				+FOOTER_TOP = Emu(8824000)
			
 
				+SLIDE_WIDTH = 16256000
			
 
				+SLIDE_HEIGHT = 9144000
			
 
				+
			
 
				+DEFAULT_FONT = '微软雅黑'
			
 
				+DEFAULT_NUMBER_FONT = 'Arial'
			
 
				+
			
 
				+
			
 
				+def get_rules_by_category(category: str) -> list[QualityRule]:
			
 
				+    return [r for r in QUALITY_RULES if r.category == category]
			
 
				+
			
 
				+
			
 
				+def get_rules_by_severity(severity: str) -> list[QualityRule]:
			
 
				+    return [r for r in QUALITY_RULES if r.severity == severity]
			
 
				+
			
 
				+
			
 
				+def calculate_score(issues_by_severity: dict, issues_by_category: dict, total_pages: int) -> int:
			
 
				+    if total_pages <= 0:
			
 
				+        return 100
			
 
				+
			
 
				+    penalty = 0
			
 
				+    for sev, count in issues_by_severity.items():
			
 
				+        weight = SEVERITY_WEIGHTS.get(sev, 5)
			
 
				+        penalty += count * weight
			
 
				+
			
 
				+    per_page_penalty = min(penalty / total_pages, 80)
			
 
				+    score = max(0, 100 - per_page_penalty)
			
 
				+    return int(score)
			
 
				+
			
 
				+
			
 
				+def get_quality_label(score: int) -> str:
			
 
				+    if score >= 90:
			
 
				+        return '优质'
			
 
				+    elif score >= 75:
			
 
				+        return '良好'
			
 
				+    elif score >= 60:
			
 
				+        return '待改善'
			
 
				+    else:
			
 
				+        return '不合格'
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print(f"Loaded {len(QUALITY_RULES)} quality rules")
			
 
				+    for cat in ['layout', 'visual', 'content', 'data']:
			
 
				+        rules = get_rules_by_category(cat)
			
 
				+        print(f"  {cat}: {len(rules)} rules")
			
 
				+    for sev in ['critical', 'major', 'minor']:
			
 
				+        rules = get_rules_by_severity(sev)
			
 
				+        print(f"  {sev}: {len(rules)} rules")
			
--- a/generate-data-report-ppt/scripts/report_config.py
+++ b/generate-data-report-ppt/scripts/report_config.py
@@ -0,0 +1,255 @@
 
				+"""
			
 
				+Report configuration data models for the universal data report generator.
			
 
				+Defines ReportConfig, MetricDef, PageDef, ThemeConfig, and related enums.
			
 
				+"""
			
 
				+from dataclasses import dataclass, field
			
 
				+from enum import Enum
			
 
				+from datetime import date
			
 
				+from typing import Optional
			
 
				+
			
 
				+
			
 
				+class PeriodType(str, Enum):
			
 
				+    DAILY = 'daily'
			
 
				+    WEEKLY = 'weekly'
			
 
				+    MONTHLY = 'monthly'
			
 
				+    QUARTERLY = 'quarterly'
			
 
				+    CUSTOM = 'custom'
			
 
				+
			
 
				+
			
 
				+class AudienceType(str, Enum):
			
 
				+    MANAGEMENT = 'management'
			
 
				+    OPERATION = 'operation'
			
 
				+    CLIENT = 'client'
			
 
				+    CUSTOM = 'custom'
			
 
				+
			
 
				+
			
 
				+class ComparisonType(str, Enum):
			
 
				+    PREV_PERIOD = 'prev_period'
			
 
				+    YOY = 'yoy'
			
 
				+    NONE = 'none'
			
 
				+
			
 
				+
			
 
				+class ColumnRole(str, Enum):
			
 
				+    TIME = 'time'
			
 
				+    NUMERIC = 'numeric'
			
 
				+    CATEGORY = 'category'
			
 
				+    TEXT = 'text'
			
 
				+    ID = 'id'
			
 
				+    BOOLEAN = 'boolean'
			
 
				+    UNKNOWN = 'unknown'
			
 
				+
			
 
				+
			
 
				+class AggregationType(str, Enum):
			
 
				+    SUM = 'sum'
			
 
				+    COUNT = 'count'
			
 
				+    AVG = 'avg'
			
 
				+    MAX = 'max'
			
 
				+    MIN = 'min'
			
 
				+    DISTINCT_COUNT = 'distinct_count'
			
 
				+
			
 
				+
			
 
				+class MetricType(str, Enum):
			
 
				+    KPI = 'kpi'
			
 
				+    TREND = 'trend'
			
 
				+    DISTRIBUTION = 'distribution'
			
 
				+    RANKING = 'ranking'
			
 
				+    FUNNEL = 'funnel'
			
 
				+    ALERT = 'alert'
			
 
				+
			
 
				+
			
 
				+class ChartType(str, Enum):
			
 
				+    COLUMN = 'column'
			
 
				+    BAR = 'bar'
			
 
				+    LINE = 'line'
			
 
				+    DOUGHNUT = 'doughnut'
			
 
				+    PIE = 'pie'
			
 
				+    FUNNEL = 'funnel'
			
 
				+    TABLE = 'table'
			
 
				+    GROUPED_BAR = 'grouped_bar'
			
 
				+
			
 
				+
			
 
				+class ThemePreset(str, Enum):
			
 
				+    BUSINESS_CLASSIC = 'business_classic'
			
 
				+    FRESH_SIMPLE = 'fresh_simple'
			
 
				+    DARK_PROFESSIONAL = 'dark_professional'
			
 
				+    WARM_BRAND = 'warm_brand'
			
 
				+    CUSTOM = 'custom'
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ColumnProfile:
			
 
				+    column_name: str
			
 
				+    dtype: str
			
 
				+    role: ColumnRole
			
 
				+    null_count: int
			
 
				+    null_rate: float
			
 
				+    unique_count: int
			
 
				+    sample_values: list = field(default_factory=list)
			
 
				+    numeric_stats: Optional[dict] = None
			
 
				+    inferred_label: str = ''
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class MetricDef:
			
 
				+    name: str
			
 
				+    label: str
			
 
				+    column: str
			
 
				+    aggregation: AggregationType
			
 
				+    metric_type: MetricType = MetricType.KPI
			
 
				+    unit: str = ''
			
 
				+    format_spec: str = ',.0f'
			
 
				+    selected: bool = True
			
 
				+    is_primary: bool = False
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PageDef:
			
 
				+    page_id: str
			
 
				+    title: str
			
 
				+    page_type: str
			
 
				+    order: int
			
 
				+    selected: bool = True
			
 
				+    elements: list[dict] = field(default_factory=list)
			
 
				+    conclusion_title: str = ''
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ConfirmationSpec:
			
 
				+    """Six user confirmations required before building a report."""
			
 
				+    period_and_page_range: bool = False
			
 
				+    core_metrics: bool = False
			
 
				+    audience_and_decision: bool = False
			
 
				+    visual_style_and_palette: bool = False
			
 
				+    page_structure_and_template: bool = False
			
 
				+    data_scope_and_field_mapping: bool = False
			
 
				+
			
 
				+    def missing_items(self) -> list[str]:
			
 
				+        labels = {
			
 
				+            'period_and_page_range': '报告周期与页数范围',
			
 
				+            'core_metrics': '核心指标集',
			
 
				+            'audience_and_decision': '受众与决策场景',
			
 
				+            'visual_style_and_palette': '视觉风格与配色方向',
			
 
				+            'page_structure_and_template': '页面结构与模板方案',
			
 
				+            'data_scope_and_field_mapping': '数据范围与字段映射',
			
 
				+        }
			
 
				+        return [
			
 
				+            label for field_name, label in labels.items()
			
 
				+            if not getattr(self, field_name)
			
 
				+        ]
			
 
				+
			
 
				+    def is_complete(self) -> bool:
			
 
				+        return not self.missing_items()
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ThemeConfig:
			
 
				+    preset: ThemePreset = ThemePreset.BUSINESS_CLASSIC
			
 
				+    name: str = '商务经典'
			
 
				+    primary: str = '#1E3A5F'
			
 
				+    accent: str = '#10B981'
			
 
				+    accent_neg: str = '#EF4444'
			
 
				+    secondary: str = '#64748B'
			
 
				+    dark: str = '#1F3A5C'
			
 
				+    white: str = '#FFFFFF'
			
 
				+    gray_bg: str = '#F2F2F2'
			
 
				+    card_bg: str = '#E7F0F7'
			
 
				+    text: str = '#333333'
			
 
				+    text_gray: str = '#666666'
			
 
				+    line: str = '#D9D9D9'
			
 
				+    chart_series: list[str] = field(default_factory=lambda: [
			
 
				+        '#1E3A5F', '#10B981', '#ED7D31', '#64748B',
			
 
				+        '#EF4444', '#707070', '#4472C4', '#10B981'
			
 
				+    ])
			
 
				+    title_font: str = '微软雅黑'
			
 
				+    body_font: str = '微软雅黑'
			
 
				+    number_font: str = 'Arial'
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ReportConfig:
			
 
				+    title: str
			
 
				+    period_type: PeriodType
			
 
				+    date_range: tuple[date, date]
			
 
				+    period_str: str = ''
			
 
				+
			
 
				+    metrics: list[MetricDef] = field(default_factory=list)
			
 
				+    pages: list[PageDef] = field(default_factory=list)
			
 
				+
			
 
				+    audience: AudienceType = AudienceType.MANAGEMENT
			
 
				+    decision_scenario: str = ''
			
 
				+    custom_audience: str = ''
			
 
				+
			
 
				+    theme: ThemeConfig = field(default_factory=ThemeConfig)
			
 
				+    template_path: str = ''
			
 
				+    visual_style_direction: str = ''
			
 
				+    page_structure_template: str = ''
			
 
				+
			
 
				+    filters: dict = field(default_factory=dict)
			
 
				+    comparison: ComparisonType = ComparisonType.PREV_PERIOD
			
 
				+    page_count_range: tuple[int, int] = (6, 15)
			
 
				+
			
 
				+    source_label: str = '数据报告系统'
			
 
				+
			
 
				+    data_scope: str = ''
			
 
				+    data_field_mapping: dict = field(default_factory=dict)
			
 
				+
			
 
				+    data_profiling: Optional[dict] = None
			
 
				+    agent_recommendations: Optional[dict] = None
			
 
				+    user_confirmation: ConfirmationSpec = field(default_factory=ConfirmationSpec)
			
 
				+    require_six_confirmations: bool = True
			
 
				+
			
 
				+    quality_threshold: int = 85
			
 
				+    max_fix_iterations: int = 5
			
 
				+
			
 
				+    def to_dict(self) -> dict:
			
 
				+        return {
			
 
				+            'title': self.title,
			
 
				+            'period_type': self.period_type.value,
			
 
				+            'period_str': self.period_str,
			
 
				+            'page_count_range': list(self.page_count_range),
			
 
				+            'audience': self.audience.value,
			
 
				+            'theme_preset': self.theme.preset.value,
			
 
				+            'metrics_count': len(self.metrics),
			
 
				+            'pages_count': len(self.pages),
			
 
				+            'six_confirmations_complete': self.user_confirmation.is_complete(),
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def validate_six_confirmations(config: ReportConfig, data_columns: Optional[list[str]] = None) -> list[str]:
			
 
				+    """Return validation gaps for the six confirmation contract."""
			
 
				+    issues = []
			
 
				+
			
 
				+    missing = config.user_confirmation.missing_items()
			
 
				+    if missing:
			
 
				+        issues.append('六项确认未完成：' + '、'.join(missing))
			
 
				+
			
 
				+    if not config.period_str and not config.date_range:
			
 
				+        issues.append('缺少报告周期。')
			
 
				+    if not config.page_count_range or len(config.page_count_range) != 2:
			
 
				+        issues.append('缺少页数范围。')
			
 
				+    if not [m for m in config.metrics if m.selected]:
			
 
				+        issues.append('缺少已确认的核心指标集。')
			
 
				+    if not config.decision_scenario:
			
 
				+        issues.append('缺少受众与决策场景说明。')
			
 
				+    if not config.visual_style_direction and not config.theme:
			
 
				+        issues.append('缺少视觉风格与配色方向。')
			
 
				+    if not config.pages:
			
 
				+        issues.append('缺少页面结构与模板方案。')
			
 
				+    if not config.data_field_mapping:
			
 
				+        issues.append('缺少数据范围与字段映射。')
			
 
				+
			
 
				+    if data_columns:
			
 
				+        missing_cols = []
			
 
				+        for metric in config.metrics:
			
 
				+            if metric.selected and metric.column and metric.column not in data_columns:
			
 
				+                missing_cols.append(f'{metric.label} -> {metric.column}')
			
 
				+        if missing_cols:
			
 
				+            issues.append('核心指标字段映射不存在：' + '、'.join(missing_cols[:8]))
			
 
				+
			
 
				+    selected_pages = [p for p in config.pages if p.selected]
			
 
				+    if config.page_count_range and selected_pages:
			
 
				+        low, high = config.page_count_range
			
 
				+        if len(selected_pages) < low - 1 or len(selected_pages) > high + 1:
			
 
				+            issues.append(f'页面数量 {len(selected_pages)} 不在确认范围 {low}-{high} 页附近。')
			
 
				+
			
 
				+    return issues
			
--- a/generate-data-report-ppt/scripts/theme_manager.py
+++ b/generate-data-report-ppt/scripts/theme_manager.py
@@ -0,0 +1,146 @@
 
				+"""
			
 
				+Multi-theme color and visual style manager for the universal data report generator.
			
 
				+"""
			
 
				+from pptx.dml.color import RGBColor
			
 
				+from report_config import ThemeConfig, ThemePreset
			
 
				+
			
 
				+
			
 
				+PRESETS = {
			
 
				+    ThemePreset.BUSINESS_CLASSIC: ThemeConfig(
			
 
				+        preset=ThemePreset.BUSINESS_CLASSIC,
			
 
				+        name='商务经典',
			
 
				+        primary='#1E3A5F',
			
 
				+        accent='#10B981',
			
 
				+        accent_neg='#EF4444',
			
 
				+        secondary='#64748B',
			
 
				+        dark='#1F3A5C',
			
 
				+        white='#FFFFFF',
			
 
				+        gray_bg='#F2F2F2',
			
 
				+        card_bg='#E7F0F7',
			
 
				+        text='#333333',
			
 
				+        text_gray='#666666',
			
 
				+        line='#D9D9D9',
			
 
				+        chart_series=[
			
 
				+            '#1E3A5F', '#10B981', '#ED7D31', '#64748B',
			
 
				+            '#EF4444', '#707070', '#4472C4', '#5B9BD5',
			
 
				+        ],
			
 
				+    ),
			
 
				+    ThemePreset.FRESH_SIMPLE: ThemeConfig(
			
 
				+        preset=ThemePreset.FRESH_SIMPLE,
			
 
				+        name='清新简约',
			
 
				+        primary='#1B8A5E',
			
 
				+        accent='#10B981',
			
 
				+        accent_neg='#EF4444',
			
 
				+        secondary='#94A3B8',
			
 
				+        dark='#0F5C3B',
			
 
				+        white='#FFFFFF',
			
 
				+        gray_bg='#F8FAFC',
			
 
				+        card_bg='#ECFDF5',
			
 
				+        text='#1E293B',
			
 
				+        text_gray='#64748B',
			
 
				+        line='#E2E8F0',
			
 
				+        chart_series=[
			
 
				+            '#1B8A5E', '#3B82F6', '#F59E0B', '#94A3B8',
			
 
				+            '#EF4444', '#8B5CF6', '#06B6D4', '#10B981',
			
 
				+        ],
			
 
				+    ),
			
 
				+    ThemePreset.DARK_PROFESSIONAL: ThemeConfig(
			
 
				+        preset=ThemePreset.DARK_PROFESSIONAL,
			
 
				+        name='深色专业',
			
 
				+        primary='#1E293B',
			
 
				+        accent='#38BDF8',
			
 
				+        accent_neg='#F87171',
			
 
				+        secondary='#94A3B8',
			
 
				+        dark='#0F172A',
			
 
				+        white='#FFFFFF',
			
 
				+        gray_bg='#F1F5F9',
			
 
				+        card_bg='#E2E8F0',
			
 
				+        text='#1E293B',
			
 
				+        text_gray='#475569',
			
 
				+        line='#CBD5E1',
			
 
				+        chart_series=[
			
 
				+            '#1E293B', '#38BDF8', '#F59E0B', '#94A3B8',
			
 
				+            '#F87171', '#A78BFA', '#34D399', '#FB923C',
			
 
				+        ],
			
 
				+    ),
			
 
				+    ThemePreset.WARM_BRAND: ThemeConfig(
			
 
				+        preset=ThemePreset.WARM_BRAND,
			
 
				+        name='温暖品牌',
			
 
				+        primary='#C2410C',
			
 
				+        accent='#F97316',
			
 
				+        accent_neg='#DC2626',
			
 
				+        secondary='#78716C',
			
 
				+        dark='#7C2D12',
			
 
				+        white='#FFFFFF',
			
 
				+        gray_bg='#FFFBEB',
			
 
				+        card_bg='#FFF7ED',
			
 
				+        text='#292524',
			
 
				+        text_gray='#78716C',
			
 
				+        line='#D6D3D1',
			
 
				+        chart_series=[
			
 
				+            '#C2410C', '#F97316', '#EAB308', '#78716C',
			
 
				+            '#DC2626', '#84CC16', '#06B6D4', '#A855F7',
			
 
				+        ],
			
 
				+    ),
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def get_theme(preset: ThemePreset, custom_overrides: dict = None) -> ThemeConfig:
			
 
				+    if preset == ThemePreset.CUSTOM:
			
 
				+        config = ThemeConfig(preset=ThemePreset.CUSTOM, name='自定义主题')
			
 
				+        if custom_overrides:
			
 
				+            for k, v in custom_overrides.items():
			
 
				+                if hasattr(config, k):
			
 
				+                    setattr(config, k, v)
			
 
				+        return config
			
 
				+    return PRESETS.get(preset, PRESETS[ThemePreset.BUSINESS_CLASSIC])
			
 
				+
			
 
				+
			
 
				+def theme_to_rgb_colors(theme: ThemeConfig) -> dict:
			
 
				+    return {
			
 
				+        'primary': _hex_to_rgb(theme.primary),
			
 
				+        'accent': _hex_to_rgb(theme.accent),
			
 
				+        'accent_neg': _hex_to_rgb(theme.accent_neg),
			
 
				+        'secondary': _hex_to_rgb(theme.secondary),
			
 
				+        'dark': _hex_to_rgb(theme.dark),
			
 
				+        'white': _hex_to_rgb(theme.white),
			
 
				+        'gray_bg': _hex_to_rgb(theme.gray_bg),
			
 
				+        'card_bg': _hex_to_rgb(theme.card_bg),
			
 
				+        'text': _hex_to_rgb(theme.text),
			
 
				+        'text_gray': _hex_to_rgb(theme.text_gray),
			
 
				+        'line': _hex_to_rgb(theme.line),
			
 
				+        'green': _hex_to_rgb(theme.accent),
			
 
				+        'red': _hex_to_rgb(theme.accent_neg),
			
 
				+        'orange': _hex_to_rgb(theme.chart_series[2]) if len(theme.chart_series) > 2 else RGBColor(0xED, 0x7D, 0x31),
			
 
				+        'series': [_hex_to_rgb(c) for c in theme.chart_series],
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _hex_to_rgb(hex_str: str) -> RGBColor:
			
 
				+    hex_str = hex_str.lstrip('#')
			
 
				+    if len(hex_str) == 6:
			
 
				+        return RGBColor(int(hex_str[0:2], 16), int(hex_str[2:4], 16), int(hex_str[4:6], 16))
			
 
				+    return RGBColor(0x33, 0x33, 0x33)
			
 
				+
			
 
				+
			
 
				+def list_themes() -> list[dict]:
			
 
				+    result = []
			
 
				+    for preset, config in PRESETS.items():
			
 
				+        result.append({
			
 
				+            'key': preset.value,
			
 
				+            'name': config.name,
			
 
				+            'primary': config.primary,
			
 
				+            'accent': config.accent,
			
 
				+        })
			
 
				+    result.append({
			
 
				+        'key': 'custom',
			
 
				+        'name': '自定义主题',
			
 
				+        'primary': '自定义',
			
 
				+        'accent': '自定义',
			
 
				+    })
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    for t in list_themes():
			
 
				+        print(f"{t['key']}: {t['name']} (primary={t['primary']}, accent={t['accent']})")
			
--- a/~$5月6日质检测试_v2.pptx
+++ b/~$5月6日质检测试_v2.pptx