agent_analyzer.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. """
  2. Agent analyzer: intelligent analysis of data profile to generate
  3. recommendations for metrics, pages, charts, and overall report structure.
  4. Uses rule-based heuristics for analysis and generates structured recommendations.
  5. """
  6. from report_config import (
  7. MetricDef, PageDef, MetricType, AggregationType, ChartType,
  8. PeriodType, ColumnRole
  9. )
  10. def analyze_and_recommend(profile: dict, period_type: PeriodType = PeriodType.MONTHLY) -> dict:
  11. recommendations = {
  12. 'suggested_metrics': _recommend_metrics(profile),
  13. 'suggested_pages': _recommend_pages(profile, period_type),
  14. 'suggested_period': period_type.value,
  15. 'suggested_page_range': (6, 15),
  16. 'data_summary': _build_summary(profile),
  17. 'chart_mapping': _build_chart_mapping(profile),
  18. 'analysis_notes': _build_analysis_notes(profile),
  19. }
  20. recommendations.update(_suggest_period_and_range(profile))
  21. return recommendations
  22. def _recommend_metrics(profile: dict) -> list[dict]:
  23. metrics = []
  24. num_cols = profile.get('numeric_columns', [])
  25. cat_cols = profile.get('category_columns', [])
  26. for i, col in enumerate(num_cols):
  27. ns = col.get('numeric_stats', {}) or {}
  28. label = col.get('inferred_label', col['column_name'])
  29. unit = _infer_unit(col['column_name'])
  30. is_primary = i < 4
  31. metrics.append({
  32. 'name': f"{label}_{col['column_name']}",
  33. 'label': label,
  34. 'column': col['column_name'],
  35. 'aggregation': 'sum',
  36. 'metric_type': 'kpi',
  37. 'unit': unit,
  38. 'selected': is_primary,
  39. 'is_primary': is_primary,
  40. 'sample_value': ns.get('sum', 0),
  41. })
  42. if len(num_cols) <= 4 and ns.get('sum', 0) > 100:
  43. metrics.append({
  44. 'name': f"日均{label}",
  45. 'label': f"日均{label}",
  46. 'column': col['column_name'],
  47. 'aggregation': 'avg',
  48. 'metric_type': 'kpi',
  49. 'unit': unit,
  50. 'selected': False,
  51. 'is_primary': False,
  52. 'sample_value': ns.get('mean', 0),
  53. })
  54. if cat_cols:
  55. top_cat = cat_cols[0]
  56. metrics.append({
  57. 'name': f"覆盖{top_cat['inferred_label']}数",
  58. 'label': f"覆盖{top_cat['inferred_label']}数",
  59. 'column': top_cat['column_name'],
  60. 'aggregation': 'distinct_count',
  61. 'metric_type': 'kpi',
  62. 'unit': '个',
  63. 'selected': True,
  64. 'is_primary': False,
  65. 'sample_value': top_cat.get('unique_count', 0),
  66. })
  67. return metrics
  68. def _recommend_pages(profile: dict, period_type: PeriodType) -> list[dict]:
  69. pages = []
  70. order = 0
  71. pages.append({
  72. 'page_id': 'cover',
  73. 'title': '封面',
  74. 'page_type': 'cover',
  75. 'order': order,
  76. 'selected': True,
  77. 'elements': [],
  78. 'conclusion_title': '',
  79. })
  80. order += 1
  81. num_cols = profile.get('numeric_columns', [])
  82. if period_type in (PeriodType.MONTHLY, PeriodType.QUARTERLY):
  83. pages.append({
  84. 'page_id': 'toc',
  85. 'title': '目录',
  86. 'page_type': 'toc',
  87. 'order': order,
  88. 'selected': True,
  89. 'elements': [],
  90. })
  91. order += 1
  92. pages.append({
  93. 'page_id': 'kpi_overview',
  94. 'title': '核心指标概览',
  95. 'page_type': 'kpi_overview',
  96. 'order': order,
  97. 'selected': True,
  98. 'elements': [{'type': 'kpi_cards', 'count': min(6, len(num_cols))}],
  99. 'conclusion_title': '核心指标概览',
  100. })
  101. order += 1
  102. time_cols = profile.get('time_columns', [])
  103. if time_cols and num_cols:
  104. top_num = num_cols[0]
  105. pages.append({
  106. 'page_id': 'trend',
  107. 'title': f'{top_num["inferred_label"]}趋势',
  108. 'page_type': 'trend',
  109. 'order': order,
  110. 'selected': True,
  111. 'elements': [
  112. {'type': 'line_chart', 'metric': top_num['column_name'],
  113. 'dimension': time_cols[0]['column_name'], 'title': f'{top_num["inferred_label"]}趋势'}
  114. ],
  115. 'conclusion_title': f'{top_num["inferred_label"]}趋势',
  116. })
  117. order += 1
  118. cat_cols = profile.get('category_columns', [])
  119. if cat_cols and num_cols:
  120. top_cat = cat_cols[0]
  121. top_num = num_cols[0]
  122. pages.append({
  123. 'page_id': 'distribution',
  124. 'title': f'{top_cat["inferred_label"]}分布',
  125. 'page_type': 'distribution',
  126. 'order': order,
  127. 'selected': True,
  128. 'elements': [
  129. {'type': 'doughnut_chart', 'metric': top_num['column_name'],
  130. 'dimension': top_cat['column_name'], 'title': f'{top_cat["inferred_label"]}占比'}
  131. ],
  132. 'conclusion_title': f'{top_cat["inferred_label"]}分布',
  133. })
  134. order += 1
  135. if len(cat_cols) >= 2:
  136. cat2 = cat_cols[1] if len(cat_cols) > 1 else cat_cols[0]
  137. pages.append({
  138. 'page_id': 'ranking',
  139. 'title': f'{cat2["inferred_label"]}排行',
  140. 'page_type': 'ranking',
  141. 'order': order,
  142. 'selected': True,
  143. 'elements': [
  144. {'type': 'bar_chart', 'metric': num_cols[0]['column_name'],
  145. 'dimension': cat2['column_name'], 'title': f'{cat2["inferred_label"]}TOP排行'}
  146. ],
  147. 'conclusion_title': f'{cat2["inferred_label"]}TOP排行',
  148. })
  149. order += 1
  150. pages.append({
  151. 'page_id': 'summary',
  152. 'title': '总结与建议',
  153. 'page_type': 'summary',
  154. 'order': order,
  155. 'selected': True,
  156. 'elements': [{'type': 'insight_block', 'title': '总结与建议'}],
  157. 'conclusion_title': '总结与建议',
  158. })
  159. order += 1
  160. pages.append({
  161. 'page_id': 'end',
  162. 'title': '尾页',
  163. 'page_type': 'end',
  164. 'order': order,
  165. 'selected': True,
  166. 'elements': [],
  167. })
  168. return pages
  169. def _suggest_period_and_range(profile: dict) -> dict:
  170. granularity = profile.get('time_granularity', 'monthly')
  171. dr = profile.get('date_range', (None, None))
  172. period_map = {
  173. 'daily': PeriodType.DAILY,
  174. 'weekly': PeriodType.WEEKLY,
  175. 'monthly': PeriodType.MONTHLY,
  176. 'quarterly': PeriodType.QUARTERLY,
  177. 'yearly': PeriodType.MONTHLY,
  178. }
  179. suggested = period_map.get(granularity, PeriodType.MONTHLY)
  180. page_range_map = {
  181. 'daily': (6, 9),
  182. 'weekly': (7, 11),
  183. 'monthly': (8, 14),
  184. 'quarterly': (10, 18),
  185. 'yearly': (12, 20),
  186. }
  187. page_range = page_range_map.get(granularity, (8, 14))
  188. return {
  189. 'suggested_period': suggested.value,
  190. 'suggested_page_range': page_range,
  191. }
  192. def _build_chart_mapping(profile: dict) -> list[dict]:
  193. mapping = []
  194. num_cols = profile.get('numeric_columns', [])
  195. time_cols = profile.get('time_columns', [])
  196. cat_cols = profile.get('category_columns', [])
  197. if time_cols and num_cols:
  198. for nc in num_cols[:3]:
  199. mapping.append({
  200. 'metric': nc['inferred_label'],
  201. 'metric_col': nc['column_name'],
  202. 'dimension': time_cols[0]['column_name'],
  203. 'dimension_label': '时间',
  204. 'chart_type': ChartType.LINE.value,
  205. 'rationale': f'{nc["inferred_label"]}随时间变化趋势',
  206. })
  207. if cat_cols and num_cols:
  208. top_num = num_cols[0]
  209. for cc in cat_cols[:3]:
  210. chart_type = ChartType.DOUGHNUT.value if cc['unique_count'] <= 8 else ChartType.BAR.value
  211. mapping.append({
  212. 'metric': top_num['inferred_label'],
  213. 'metric_col': top_num['column_name'],
  214. 'dimension': cc['column_name'],
  215. 'dimension_label': cc['inferred_label'],
  216. 'chart_type': chart_type,
  217. 'rationale': f'{top_num["inferred_label"]}按{cc["inferred_label"]}的分布',
  218. })
  219. return mapping
  220. def _build_summary(profile: dict) -> str:
  221. lines = []
  222. lines.append(f"数据量: {profile['total_rows']:,} 行 × {profile['total_columns']} 列")
  223. num_cols = profile.get('numeric_columns', [])
  224. cat_cols = profile.get('category_columns', [])
  225. time_cols = profile.get('time_columns', [])
  226. lines.append(f"可计算指标: {len(num_cols)} 个数值列")
  227. lines.append(f"可分析维度: {len(cat_cols)} 个分类列")
  228. if time_cols:
  229. lines.append(f"时间列: {time_cols[0]['column_name']}")
  230. lines.append(f"数据粒度: {profile.get('time_granularity', 'unknown')}")
  231. dr = profile.get('date_range', (None, None))
  232. if dr[0]:
  233. lines.append(f"时间范围: {dr[0]} ~ {dr[1]}")
  234. q = profile.get('data_quality', {})
  235. lines.append(f"质量评分: {q.get('score', 0)}/100")
  236. return '\n'.join(lines)
  237. def _build_analysis_notes(profile: dict) -> list[str]:
  238. notes = []
  239. num_cols = profile.get('numeric_columns', [])
  240. cat_cols = profile.get('category_columns', [])
  241. if not cat_cols:
  242. notes.append('数据中缺少分类维度列,报告将以数值汇总为主,建议补充分类字段以增强分析深度。')
  243. if len(num_cols) >= 4:
  244. names = [c['inferred_label'] for c in num_cols[:4]]
  245. notes.append(f'核心数值指标: {", ".join(names)}')
  246. if len(cat_cols) == 1:
  247. notes.append(f'仅有一个分类维度列 ({cat_cols[0]["inferred_label"]}),报告分析维度较窄。')
  248. elif len(cat_cols) >= 3:
  249. names = [c['inferred_label'] for c in cat_cols[:3]]
  250. notes.append(f'分类维度丰富 ({", ".join(names)}),可支撑多维交叉分析。')
  251. q = profile.get('data_quality', {})
  252. if q.get('score', 100) < 85:
  253. notes.append(f'数据质量评分偏低 ({q["score"]}/100),建议在生成前检查缺失值与异常值。')
  254. return notes
  255. def _infer_unit(col_name: str) -> str:
  256. col_lower = col_name.lower().strip()
  257. unit_map = {
  258. '金额': '元', '销售额': '元', '收入': '元', '利润': '元',
  259. '成本': '元', '费用': '元', '台数': '台', '件数': '件',
  260. '数量': '', '人数': '人', '天数': '天', '占比': '%',
  261. '比率': '%', '比例': '%', '率': '%',
  262. }
  263. for kw, unit in unit_map.items():
  264. if kw in col_lower:
  265. return unit
  266. return ''
  267. def generate_interaction_prompts(recommendations: dict, profile: dict) -> dict:
  268. return {
  269. 'period': {
  270. 'question': '报告周期与页数范围',
  271. 'detail': f"建议周期: {recommendations['suggested_period']}报\n建议页数: {recommendations['suggested_page_range'][0]}-{recommendations['suggested_page_range'][1]} 页\n请确认或调整",
  272. },
  273. 'metrics': {
  274. 'question': '核心指标集',
  275. 'detail': f"检测到 {len(recommendations['suggested_metrics'])} 个可计算指标\n已自动推荐主要的 {min(6, len(recommendations['suggested_metrics']))} 个\n请确认或增删",
  276. },
  277. 'audience': {
  278. 'question': '受众与决策场景',
  279. 'detail': '请选择: 管理层汇报 | 运营分析会 | 对外客户报告 | 自定义描述',
  280. },
  281. 'style': {
  282. 'question': '视觉风格与配色方向',
  283. 'detail': '推荐方案: 商务经典(深蓝) | 清新简约(绿色) | 深色专业 | 温暖品牌\n请选择配色方案',
  284. },
  285. 'pages': {
  286. 'question': '页面结构与模板方案',
  287. 'detail': f'推荐 {len(recommendations["suggested_pages"])} 个页面\n可增删调整页面顺序',
  288. },
  289. }
  290. if __name__ == '__main__':
  291. profile = {
  292. 'total_rows': 3240,
  293. 'total_columns': 15,
  294. 'numeric_columns': [
  295. {'column_name': '销售额', 'inferred_label': '销售额', 'numeric_stats': {'sum': 500000, 'mean': 154}},
  296. {'column_name': '订单量', 'inferred_label': '订单量', 'numeric_stats': {'sum': 3240, 'mean': 1.0}},
  297. {'column_name': '利润', 'inferred_label': '利润', 'numeric_stats': {'sum': 80000, 'mean': 25}},
  298. ],
  299. 'category_columns': [
  300. {'column_name': '区域', 'inferred_label': '区域', 'unique_count': 5},
  301. {'column_name': '产品', 'inferred_label': '产品', 'unique_count': 12},
  302. ],
  303. 'time_columns': [{'column_name': '日期', 'inferred_label': '日期'}],
  304. 'time_granularity': 'monthly',
  305. 'date_range': ('2026-01-01', '2026-04-30'),
  306. 'data_quality': {'score': 92},
  307. }
  308. recs = analyze_and_recommend(profile, PeriodType.MONTHLY)
  309. prompts = generate_interaction_prompts(recs, profile)
  310. for k, v in prompts.items():
  311. print(f"\n{k}: {v['question']}\n{v['detail']}")