""" Agent analyzer: intelligent analysis of data profile to generate recommendations for metrics, pages, charts, and overall report structure. Uses rule-based heuristics for analysis and generates structured recommendations. """ from report_config import ( MetricDef, PageDef, MetricType, AggregationType, ChartType, PeriodType, ColumnRole ) def analyze_and_recommend(profile: dict, period_type: PeriodType = PeriodType.MONTHLY) -> dict: recommendations = { 'suggested_metrics': _recommend_metrics(profile), 'suggested_pages': _recommend_pages(profile, period_type), 'suggested_period': period_type.value, 'suggested_page_range': (6, 15), 'data_summary': _build_summary(profile), 'chart_mapping': _build_chart_mapping(profile), 'analysis_notes': _build_analysis_notes(profile), } recommendations.update(_suggest_period_and_range(profile)) return recommendations def _recommend_metrics(profile: dict) -> list[dict]: metrics = [] num_cols = profile.get('numeric_columns', []) cat_cols = profile.get('category_columns', []) for i, col in enumerate(num_cols): ns = col.get('numeric_stats', {}) or {} label = col.get('inferred_label', col['column_name']) unit = _infer_unit(col['column_name']) is_primary = i < 4 metrics.append({ 'name': f"{label}_{col['column_name']}", 'label': label, 'column': col['column_name'], 'aggregation': 'sum', 'metric_type': 'kpi', 'unit': unit, 'selected': is_primary, 'is_primary': is_primary, 'sample_value': ns.get('sum', 0), }) if len(num_cols) <= 4 and ns.get('sum', 0) > 100: metrics.append({ 'name': f"日均{label}", 'label': f"日均{label}", 'column': col['column_name'], 'aggregation': 'avg', 'metric_type': 'kpi', 'unit': unit, 'selected': False, 'is_primary': False, 'sample_value': ns.get('mean', 0), }) if cat_cols: top_cat = cat_cols[0] metrics.append({ 'name': f"覆盖{top_cat['inferred_label']}数", 'label': f"覆盖{top_cat['inferred_label']}数", 'column': top_cat['column_name'], 'aggregation': 'distinct_count', 'metric_type': 'kpi', 'unit': '个', 'selected': True, 'is_primary': False, 'sample_value': top_cat.get('unique_count', 0), }) return metrics def _recommend_pages(profile: dict, period_type: PeriodType) -> list[dict]: pages = [] order = 0 pages.append({ 'page_id': 'cover', 'title': '封面', 'page_type': 'cover', 'order': order, 'selected': True, 'elements': [], 'conclusion_title': '', }) order += 1 num_cols = profile.get('numeric_columns', []) if period_type in (PeriodType.MONTHLY, PeriodType.QUARTERLY): pages.append({ 'page_id': 'toc', 'title': '目录', 'page_type': 'toc', 'order': order, 'selected': True, 'elements': [], }) order += 1 pages.append({ 'page_id': 'kpi_overview', 'title': '核心指标概览', 'page_type': 'kpi_overview', 'order': order, 'selected': True, 'elements': [{'type': 'kpi_cards', 'count': min(6, len(num_cols))}], 'conclusion_title': '核心指标概览', }) order += 1 time_cols = profile.get('time_columns', []) if time_cols and num_cols: top_num = num_cols[0] pages.append({ 'page_id': 'trend', 'title': f'{top_num["inferred_label"]}趋势', 'page_type': 'trend', 'order': order, 'selected': True, 'elements': [ {'type': 'line_chart', 'metric': top_num['column_name'], 'dimension': time_cols[0]['column_name'], 'title': f'{top_num["inferred_label"]}趋势'} ], 'conclusion_title': f'{top_num["inferred_label"]}趋势', }) order += 1 cat_cols = profile.get('category_columns', []) if cat_cols and num_cols: top_cat = cat_cols[0] top_num = num_cols[0] pages.append({ 'page_id': 'distribution', 'title': f'{top_cat["inferred_label"]}分布', 'page_type': 'distribution', 'order': order, 'selected': True, 'elements': [ {'type': 'doughnut_chart', 'metric': top_num['column_name'], 'dimension': top_cat['column_name'], 'title': f'{top_cat["inferred_label"]}占比'} ], 'conclusion_title': f'{top_cat["inferred_label"]}分布', }) order += 1 if len(cat_cols) >= 2: cat2 = cat_cols[1] if len(cat_cols) > 1 else cat_cols[0] pages.append({ 'page_id': 'ranking', 'title': f'{cat2["inferred_label"]}排行', 'page_type': 'ranking', 'order': order, 'selected': True, 'elements': [ {'type': 'bar_chart', 'metric': num_cols[0]['column_name'], 'dimension': cat2['column_name'], 'title': f'{cat2["inferred_label"]}TOP排行'} ], 'conclusion_title': f'{cat2["inferred_label"]}TOP排行', }) order += 1 pages.append({ 'page_id': 'summary', 'title': '总结与建议', 'page_type': 'summary', 'order': order, 'selected': True, 'elements': [{'type': 'insight_block', 'title': '总结与建议'}], 'conclusion_title': '总结与建议', }) order += 1 pages.append({ 'page_id': 'end', 'title': '尾页', 'page_type': 'end', 'order': order, 'selected': True, 'elements': [], }) return pages def _suggest_period_and_range(profile: dict) -> dict: granularity = profile.get('time_granularity', 'monthly') dr = profile.get('date_range', (None, None)) period_map = { 'daily': PeriodType.DAILY, 'weekly': PeriodType.WEEKLY, 'monthly': PeriodType.MONTHLY, 'quarterly': PeriodType.QUARTERLY, 'yearly': PeriodType.MONTHLY, } suggested = period_map.get(granularity, PeriodType.MONTHLY) page_range_map = { 'daily': (6, 9), 'weekly': (7, 11), 'monthly': (8, 14), 'quarterly': (10, 18), 'yearly': (12, 20), } page_range = page_range_map.get(granularity, (8, 14)) return { 'suggested_period': suggested.value, 'suggested_page_range': page_range, } def _build_chart_mapping(profile: dict) -> list[dict]: mapping = [] num_cols = profile.get('numeric_columns', []) time_cols = profile.get('time_columns', []) cat_cols = profile.get('category_columns', []) if time_cols and num_cols: for nc in num_cols[:3]: mapping.append({ 'metric': nc['inferred_label'], 'metric_col': nc['column_name'], 'dimension': time_cols[0]['column_name'], 'dimension_label': '时间', 'chart_type': ChartType.LINE.value, 'rationale': f'{nc["inferred_label"]}随时间变化趋势', }) if cat_cols and num_cols: top_num = num_cols[0] for cc in cat_cols[:3]: chart_type = ChartType.DOUGHNUT.value if cc['unique_count'] <= 8 else ChartType.BAR.value mapping.append({ 'metric': top_num['inferred_label'], 'metric_col': top_num['column_name'], 'dimension': cc['column_name'], 'dimension_label': cc['inferred_label'], 'chart_type': chart_type, 'rationale': f'{top_num["inferred_label"]}按{cc["inferred_label"]}的分布', }) return mapping def _build_summary(profile: dict) -> str: lines = [] lines.append(f"数据量: {profile['total_rows']:,} 行 × {profile['total_columns']} 列") num_cols = profile.get('numeric_columns', []) cat_cols = profile.get('category_columns', []) time_cols = profile.get('time_columns', []) lines.append(f"可计算指标: {len(num_cols)} 个数值列") lines.append(f"可分析维度: {len(cat_cols)} 个分类列") if time_cols: lines.append(f"时间列: {time_cols[0]['column_name']}") lines.append(f"数据粒度: {profile.get('time_granularity', 'unknown')}") dr = profile.get('date_range', (None, None)) if dr[0]: lines.append(f"时间范围: {dr[0]} ~ {dr[1]}") q = profile.get('data_quality', {}) lines.append(f"质量评分: {q.get('score', 0)}/100") return '\n'.join(lines) def _build_analysis_notes(profile: dict) -> list[str]: notes = [] num_cols = profile.get('numeric_columns', []) cat_cols = profile.get('category_columns', []) if not cat_cols: notes.append('数据中缺少分类维度列,报告将以数值汇总为主,建议补充分类字段以增强分析深度。') if len(num_cols) >= 4: names = [c['inferred_label'] for c in num_cols[:4]] notes.append(f'核心数值指标: {", ".join(names)}') if len(cat_cols) == 1: notes.append(f'仅有一个分类维度列 ({cat_cols[0]["inferred_label"]}),报告分析维度较窄。') elif len(cat_cols) >= 3: names = [c['inferred_label'] for c in cat_cols[:3]] notes.append(f'分类维度丰富 ({", ".join(names)}),可支撑多维交叉分析。') q = profile.get('data_quality', {}) if q.get('score', 100) < 85: notes.append(f'数据质量评分偏低 ({q["score"]}/100),建议在生成前检查缺失值与异常值。') return notes def _infer_unit(col_name: str) -> str: col_lower = col_name.lower().strip() unit_map = { '金额': '元', '销售额': '元', '收入': '元', '利润': '元', '成本': '元', '费用': '元', '台数': '台', '件数': '件', '数量': '', '人数': '人', '天数': '天', '占比': '%', '比率': '%', '比例': '%', '率': '%', } for kw, unit in unit_map.items(): if kw in col_lower: return unit return '' def generate_interaction_prompts(recommendations: dict, profile: dict) -> dict: return { 'period': { 'question': '报告周期与页数范围', 'detail': f"建议周期: {recommendations['suggested_period']}报\n建议页数: {recommendations['suggested_page_range'][0]}-{recommendations['suggested_page_range'][1]} 页\n请确认或调整", }, 'metrics': { 'question': '核心指标集', 'detail': f"检测到 {len(recommendations['suggested_metrics'])} 个可计算指标\n已自动推荐主要的 {min(6, len(recommendations['suggested_metrics']))} 个\n请确认或增删", }, 'audience': { 'question': '受众与决策场景', 'detail': '请选择: 管理层汇报 | 运营分析会 | 对外客户报告 | 自定义描述', }, 'style': { 'question': '视觉风格与配色方向', 'detail': '推荐方案: 商务经典(深蓝) | 清新简约(绿色) | 深色专业 | 温暖品牌\n请选择配色方案', }, 'pages': { 'question': '页面结构与模板方案', 'detail': f'推荐 {len(recommendations["suggested_pages"])} 个页面\n可增删调整页面顺序', }, } if __name__ == '__main__': profile = { 'total_rows': 3240, 'total_columns': 15, 'numeric_columns': [ {'column_name': '销售额', 'inferred_label': '销售额', 'numeric_stats': {'sum': 500000, 'mean': 154}}, {'column_name': '订单量', 'inferred_label': '订单量', 'numeric_stats': {'sum': 3240, 'mean': 1.0}}, {'column_name': '利润', 'inferred_label': '利润', 'numeric_stats': {'sum': 80000, 'mean': 25}}, ], 'category_columns': [ {'column_name': '区域', 'inferred_label': '区域', 'unique_count': 5}, {'column_name': '产品', 'inferred_label': '产品', 'unique_count': 12}, ], 'time_columns': [{'column_name': '日期', 'inferred_label': '日期'}], 'time_granularity': 'monthly', 'date_range': ('2026-01-01', '2026-04-30'), 'data_quality': {'score': 92}, } recs = analyze_and_recommend(profile, PeriodType.MONTHLY) prompts = generate_interaction_prompts(recs, profile) for k, v in prompts.items(): print(f"\n{k}: {v['question']}\n{v['detail']}")