metrics_calculator.py 68 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351
  1. """
  2. Metrics calculation engine for daily, weekly, and monthly reports.
  3. Enhanced with deep analytics: structured insights, conversion rates,
  4. regional top countries, per-capita performance, overdue details.
  5. """
  6. import pandas as pd
  7. from datetime import datetime, timedelta
  8. from collections import Counter
  9. STATUS_CODES = ['A', 'B', 'C', 'D', 'E', 'F']
  10. STATUS_NAMES = {
  11. 'A': '合同拟定中',
  12. 'B': '已锁定合同待付订金',
  13. 'C': '已付订金待生产',
  14. 'D': '已生产待付尾款',
  15. 'E': '已付尾款待发运',
  16. 'F': '已发运',
  17. }
  18. STATUS_SHORT = {
  19. 'A': '合同拟定',
  20. 'B': '已锁定',
  21. 'C': '已付订金',
  22. 'D': '生产中',
  23. 'E': '待发运',
  24. 'F': '已发运',
  25. }
  26. REGIONS = {
  27. '亚洲': ['中国', '泰国', '缅甸', '柬埔寨', '老挝', '越南', '菲律宾', '马来西亚',
  28. '印度尼西亚', '新加坡', '斯里兰卡', '尼泊尔', '孟加拉国', '印度',
  29. '巴基斯坦', '科威特', '沙特阿拉伯', '约旦', '伊拉克', '黎巴嫩',
  30. '阿联酋', '卡塔尔', '阿曼', '也门', '叙利亚', '以色列', '土耳其'],
  31. '非洲': ['埃及', '阿尔及利亚', '尼日利亚', '肯尼亚', '加纳', '南非', '摩洛哥',
  32. '突尼斯', '利比亚', '苏丹', '埃塞俄比亚', '索马里', '乌干达', '坦桑尼亚',
  33. '卢旺达', '赞比亚', '津巴布韦', '博茨瓦纳', '马达加斯加', '毛里求斯',
  34. '塞内加尔', '马里', '布基纳法索', '科特迪瓦', '尼日尔', '喀麦隆',
  35. '中非', '赤道几内亚', '加蓬', '刚果', '安哥拉'],
  36. '拉美/加勒比': ['墨西哥', '危地马拉', '伯利兹', '洪都拉斯', '萨尔瓦多', '尼加拉瓜',
  37. '哥斯达黎加', '巴拿马', '哥伦比亚', '委内瑞拉', '圭亚那', '苏里南',
  38. '厄瓜多尔', '秘鲁', '玻利维亚', '巴西', '巴拉圭', '智利', '阿根廷',
  39. '乌拉圭', '古巴', '牙买加', '海地', '多米尼加', '波多黎各',
  40. '巴哈马', '格林纳达', '巴巴多斯', '特立尼达和多巴哥',
  41. '阿鲁巴', '库拉索', '圣巴泰勒米'],
  42. '中东': ['科威特', '沙特阿拉伯', '约旦', '伊拉克', '黎巴嫩', '阿联酋', '卡塔尔',
  43. '阿曼', '也门', '叙利亚', '以色列', '土耳其', '伊朗', '巴林'],
  44. '欧洲': ['俄罗斯', '格鲁吉亚', '意大利', '德国', '法国', '英国', '西班牙',
  45. '葡萄牙', '荷兰', '比利时', '瑞士', '奥地利', '波兰', '乌克兰',
  46. '白俄罗斯', '罗马尼亚', '保加利亚', '希腊', '塞尔维亚', '匈牙利',
  47. '捷克', '斯洛伐克', '瑞典', '挪威', '丹麦', '芬兰', '冰岛',
  48. '爱尔兰', '中国澳门'],
  49. }
  50. def _get_region(country: str) -> str:
  51. for region, countries in REGIONS.items():
  52. if country in countries:
  53. return region
  54. return '其他'
  55. def _pct_change(curr, prev):
  56. """Calculate percentage change. Returns None if previous base is 0 or None."""
  57. if prev is None or prev == 0:
  58. return None
  59. return round((curr - prev) / prev * 100, 1)
  60. def _safe_div(a, b):
  61. return round(a / b, 1) if b else 0
  62. # ==============================================================================
  63. # DAILY METRICS
  64. # ==============================================================================
  65. def calc_daily_metrics(df: pd.DataFrame, prev_df: pd.DataFrame = None,
  66. same_day_last_week_df: pd.DataFrame = None) -> dict:
  67. metrics = {}
  68. # Core counts
  69. metrics['tracking_orders'] = len(df)
  70. metrics['total_qty'] = int(df['order_qty'].sum()) if 'order_qty' in df.columns else 0
  71. metrics['updated_orders'] = int(df['is_updated_flag'].sum()) if 'is_updated_flag' in df.columns else 0
  72. metrics['shipped_orders'] = int((df['status_code'] == 'F').sum()) if 'status_code' in df.columns else 0
  73. metrics['support_requests'] = int(df['support_request'].notna().sum()) if 'support_request' in df.columns else 0
  74. metrics['forecast_next'] = int(df['forecast_may'].sum()) if 'forecast_may' in df.columns else 0
  75. # Average order size (单均台数)
  76. metrics['avg_order_size'] = _safe_div(metrics['total_qty'], metrics['tracking_orders'])
  77. # Daily average orders (日均订单数) — for daily it's 1 day, but keep consistent
  78. days = df['_data_date'].nunique() if '_data_date' in df.columns else 1
  79. metrics['avg_daily_orders'] = _safe_div(metrics['tracking_orders'], days)
  80. # Comparisons
  81. if prev_df is not None and len(prev_df) > 0:
  82. metrics['prev_tracking_orders'] = len(prev_df)
  83. metrics['prev_total_qty'] = int(prev_df['order_qty'].sum()) if 'order_qty' in prev_df.columns else 0
  84. metrics['prev_updated_orders'] = int(prev_df['is_updated_flag'].sum()) if 'is_updated_flag' in prev_df.columns else 0
  85. metrics['prev_shipped_orders'] = int((prev_df['status_code'] == 'F').sum()) if 'status_code' in prev_df.columns else 0
  86. metrics['prev_support_requests'] = int(prev_df['support_request'].notna().sum()) if 'support_request' in prev_df.columns else 0
  87. metrics['prev_forecast_next'] = int(prev_df['forecast_may'].sum()) if 'forecast_may' in prev_df.columns else 0
  88. metrics['prev_avg_order_size'] = _safe_div(metrics['prev_total_qty'], metrics['prev_tracking_orders'])
  89. else:
  90. metrics['prev_tracking_orders'] = 0
  91. metrics['prev_total_qty'] = 0
  92. metrics['prev_avg_order_size'] = 0
  93. # Status distribution
  94. if 'status_code' in df.columns:
  95. status_counts = df['status_code'].value_counts().to_dict()
  96. metrics['status_dist'] = {STATUS_NAMES.get(k, k): int(status_counts.get(k, 0)) for k in STATUS_CODES}
  97. # Find max status
  98. max_status = max(metrics['status_dist'].items(), key=lambda x: x[1])
  99. metrics['status_max'] = {'name': max_status[0], 'count': max_status[1]}
  100. # Production-related share (C+D)
  101. prod = int(status_counts.get('C', 0)) + int(status_counts.get('D', 0))
  102. metrics['production_share'] = round(prod / len(df) * 100, 1) if len(df) > 0 else 0
  103. else:
  104. metrics['status_dist'] = {}
  105. metrics['status_max'] = {'name': '', 'count': 0}
  106. metrics['production_share'] = 0
  107. # Status WoW (vs previous day) — for daily report page 4
  108. metrics['status_wow'] = {}
  109. if 'status_code' in df.columns and prev_df is not None and 'status_code' in prev_df.columns:
  110. for code in STATUS_CODES:
  111. curr = int((df['status_code'] == code).sum())
  112. prev = int((prev_df['status_code'] == code).sum())
  113. metrics['status_wow'][STATUS_NAMES[code]] = {
  114. 'current': curr,
  115. 'previous': prev,
  116. 'change_pct': _pct_change(curr, prev)
  117. }
  118. # Owner distribution
  119. if 'owner' in df.columns:
  120. owner_counts = df['owner'].value_counts().head(10).to_dict()
  121. metrics['owner_dist'] = dict(sorted(owner_counts.items(), key=lambda x: -x[1]))
  122. else:
  123. metrics['owner_dist'] = {}
  124. # Country TOP8
  125. if 'country' in df.columns:
  126. country_counts = df.groupby('country')['order_qty'].sum().sort_values(ascending=False).head(8).to_dict()
  127. metrics['country_top8'] = country_counts
  128. else:
  129. metrics['country_top8'] = {}
  130. # Overdue orders detail (status A > 30 days) — include contract_no
  131. metrics['overdue_orders'] = []
  132. if 'status_code' in df.columns and 'tracking_days' in df.columns and 'country' in df.columns:
  133. overdue = df[(df['status_code'] == 'A') & (df['tracking_days'] > 30)].copy()
  134. if len(overdue) > 0:
  135. overdue = overdue.sort_values('tracking_days', ascending=False)
  136. metrics['overdue_orders'] = [
  137. {
  138. 'contract_no': str(row['contract_no']) if pd.notna(row.get('contract_no')) else '',
  139. 'country': row['country'],
  140. 'days': int(row['tracking_days'])
  141. }
  142. for _, row in overdue.head(10).iterrows()
  143. ]
  144. # Alerts
  145. metrics['alerts'] = _extract_alerts(df)
  146. # Support request categories
  147. metrics['support_categories'] = _categorize_support(df)
  148. if metrics['support_categories']:
  149. top_cat = max(metrics['support_categories'].items(), key=lambda x: x[1])
  150. metrics['support_top_category'] = {'name': top_cat[0], 'count': top_cat[1]}
  151. else:
  152. metrics['support_top_category'] = {'name': '', 'count': 0}
  153. return metrics
  154. # ==============================================================================
  155. # WEEKLY METRICS
  156. # ==============================================================================
  157. def calc_weekly_metrics(df: pd.DataFrame, prev_df: pd.DataFrame = None) -> dict:
  158. metrics = {}
  159. # Core KPIs
  160. metrics['tracking_orders'] = len(df)
  161. metrics['total_qty'] = int(df['order_qty'].sum()) if 'order_qty' in df.columns else 0
  162. metrics['shipped_orders'] = int((df['status_code'] == 'F').sum()) if 'status_code' in df.columns else 0
  163. metrics['countries'] = df['country'].nunique() if 'country' in df.columns else 0
  164. metrics['updated_orders'] = int(df['is_updated_flag'].sum()) if 'is_updated_flag' in df.columns else 0
  165. metrics['forecast_next'] = int(df['forecast_may'].sum()) if 'forecast_may' in df.columns else 0
  166. # Daily averages
  167. days = df['_data_date'].nunique() if '_data_date' in df.columns else 7
  168. metrics['avg_daily_orders'] = _safe_div(metrics['tracking_orders'], days)
  169. metrics['avg_daily_qty'] = _safe_div(metrics['total_qty'], days)
  170. # Average quantity per order (单均台数)
  171. metrics['avg_qty_per_order'] = _safe_div(metrics['total_qty'], metrics['tracking_orders'])
  172. # WoW comparisons
  173. if prev_df is not None and len(prev_df) > 0:
  174. metrics['prev_tracking_orders'] = len(prev_df)
  175. metrics['prev_total_qty'] = int(prev_df['order_qty'].sum()) if 'order_qty' in prev_df.columns else 0
  176. metrics['prev_shipped_orders'] = int((prev_df['status_code'] == 'F').sum()) if 'status_code' in prev_df.columns else 0
  177. metrics['prev_updated_orders'] = int(prev_df['is_updated_flag'].sum()) if 'is_updated_flag' in prev_df.columns else 0
  178. metrics['prev_forecast_next'] = int(prev_df['forecast_may'].sum()) if 'forecast_may' in prev_df.columns else 0
  179. prev_days = prev_df['_data_date'].nunique() if '_data_date' in prev_df.columns else days
  180. metrics['prev_avg_daily_orders'] = _safe_div(metrics['prev_tracking_orders'], prev_days)
  181. else:
  182. metrics['prev_tracking_orders'] = 0
  183. # Status stage WoW
  184. metrics['status_wow'] = {}
  185. if 'status_code' in df.columns and prev_df is not None and 'status_code' in prev_df.columns:
  186. for code in STATUS_CODES:
  187. curr = int((df['status_code'] == code).sum())
  188. prev = int((prev_df['status_code'] == code).sum())
  189. metrics['status_wow'][STATUS_NAMES[code]] = {
  190. 'current': curr,
  191. 'previous': prev,
  192. 'change_pct': _pct_change(curr, prev)
  193. }
  194. # 7-day trend
  195. if '_data_date' in df.columns:
  196. trend = df.groupby('_data_date').size().sort_index()
  197. metrics['daily_trend'] = {k.strftime('%m/%d'): int(v) for k, v in trend.items()}
  198. # Days above previous week average
  199. if metrics.get('prev_avg_daily_orders', 0) > 0:
  200. metrics['days_above_prev_avg'] = sum(1 for v in metrics['daily_trend'].values() if v > metrics['prev_avg_daily_orders'])
  201. else:
  202. metrics['days_above_prev_avg'] = len(metrics['daily_trend'])
  203. else:
  204. metrics['daily_trend'] = {}
  205. metrics['days_above_prev_avg'] = 0
  206. # Region distribution with top countries per region
  207. if 'country' in df.columns:
  208. df['region'] = df['country'].apply(_get_region)
  209. region_qty = df.groupby('region')['order_qty'].sum().sort_values(ascending=False).to_dict()
  210. total = sum(region_qty.values())
  211. metrics['region_dist'] = {}
  212. for region, qty in region_qty.items():
  213. region_df = df[df['region'] == region]
  214. top3 = region_df.groupby('country')['order_qty'].sum().sort_values(ascending=False).head(3)
  215. metrics['region_dist'][region] = {
  216. 'qty': int(qty),
  217. 'pct': round(qty / total * 100, 1) if total else 0,
  218. 'top_countries': [{'country': c, 'qty': int(v)} for c, v in top3.to_dict().items()]
  219. }
  220. else:
  221. metrics['region_dist'] = {}
  222. # Top countries with WoW change
  223. if 'country' in df.columns:
  224. top_countries = df.groupby('country')['order_qty'].sum().sort_values(ascending=False).head(15).to_dict()
  225. metrics['top_countries'] = {k: int(v) for k, v in top_countries.items()}
  226. # Calculate change vs previous week
  227. metrics['top_countries_change'] = {}
  228. if prev_df is not None and 'country' in prev_df.columns:
  229. prev_top = prev_df.groupby('country')['order_qty'].sum().to_dict()
  230. for country, qty in metrics['top_countries'].items():
  231. prev_qty = prev_top.get(country, 0)
  232. metrics['top_countries_change'][country] = {
  233. 'qty': qty,
  234. 'prev_qty': int(prev_qty),
  235. 'change_pct': _pct_change(qty, prev_qty)
  236. }
  237. else:
  238. metrics['top_countries'] = {}
  239. metrics['top_countries_change'] = {}
  240. # TOP6 country concentration
  241. if 'country' in df.columns and metrics['total_qty'] > 0:
  242. top6_qty = df.groupby('country')['order_qty'].sum().sort_values(ascending=False).head(6).sum()
  243. metrics['top6_concentration_pct'] = round(top6_qty / metrics['total_qty'] * 100, 1)
  244. else:
  245. metrics['top6_concentration_pct'] = 0.0
  246. # Team performance
  247. if 'owner' in df.columns:
  248. team = df.groupby('owner').agg(
  249. orders=('contract_no', 'count'),
  250. qty=('order_qty', 'sum')
  251. ).sort_values('orders', ascending=False)
  252. metrics['team'] = {
  253. 'owners': team['orders'].to_dict(),
  254. 'qty': team['qty'].to_dict(),
  255. }
  256. metrics['per_capita_orders'] = _safe_div(metrics['tracking_orders'], len(team))
  257. if prev_df is not None and 'owner' in prev_df.columns:
  258. prev_team = prev_df.groupby('owner').size().to_dict()
  259. metrics['team_wow'] = {
  260. owner: {'current': int(v), 'previous': int(prev_team.get(owner, 0)),
  261. 'change': int(v - prev_team.get(owner, 0))}
  262. for owner, v in team['orders'].items()
  263. }
  264. else:
  265. metrics['team'] = {'owners': {}, 'qty': {}}
  266. metrics['per_capita_orders'] = 0
  267. metrics['team_wow'] = {}
  268. # Support request categories
  269. metrics['support_categories'] = _categorize_support(df)
  270. # Issues & next week plan (auto-generated based on current data)
  271. metrics['issues'] = _extract_weekly_issues(df)
  272. # Calculate stage counts for goal generation
  273. status_counts = {}
  274. if 'status_code' in df.columns:
  275. status_counts = {code: int((df['status_code'] == code).sum()) for code in STATUS_CODES}
  276. pending_shipment = status_counts.get('E', 0)
  277. pending_payment = status_counts.get('D', 0)
  278. status_a = status_counts.get('A', 0)
  279. new_sign = metrics['updated_orders']
  280. metrics['next_week_goals'] = [
  281. {
  282. 'id': 'G1',
  283. 'title': '交付冲刺:推动待发运清零',
  284. 'detail': f'确保{pending_shipment}单待发运订单完成发运交付',
  285. 'number': pending_shipment,
  286. },
  287. {
  288. 'id': 'G2',
  289. 'title': '尾款回收:加速资金回笼',
  290. 'detail': f'推进{pending_payment}单完成尾款支付转化',
  291. 'number': pending_payment,
  292. },
  293. {
  294. 'id': 'G3',
  295. 'title': '新签拓展:维持增长势头',
  296. 'detail': f'新签合同目标{max(new_sign, 5)}单,推动业务增长',
  297. 'number': max(new_sign, 5),
  298. },
  299. {
  300. 'id': 'G4',
  301. 'title': '转化提速:A→B阶段突破',
  302. 'detail': f'推动{status_a}单合同拟定进入锁定阶段',
  303. 'number': status_a,
  304. },
  305. ]
  306. return metrics
  307. # ==============================================================================
  308. # MONTHLY METRICS
  309. # ==============================================================================
  310. def calc_monthly_metrics(df: pd.DataFrame, prev_df: pd.DataFrame = None,
  311. yoy_df: pd.DataFrame = None) -> dict:
  312. metrics = {}
  313. # Core KPIs
  314. metrics['total_contracts'] = len(df)
  315. metrics['total_qty'] = int(df['order_qty'].sum()) if 'order_qty' in df.columns else 0
  316. metrics['new_contracts'] = int(df['is_updated_flag'].sum()) if 'is_updated_flag' in df.columns else 0
  317. metrics['new_qty'] = int(df[df['is_updated_flag'] == True]['order_qty'].sum()) if 'order_qty' in df.columns else 0
  318. metrics['shipped_orders'] = int((df['status_code'] == 'F').sum()) if 'status_code' in df.columns else 0
  319. metrics['shipped_qty'] = int(df[df['status_code'] == 'F']['order_qty'].sum()) if 'order_qty' in df.columns else 0
  320. metrics['countries'] = df['country'].nunique() if 'country' in df.columns else 0
  321. metrics['support_count'] = int(df['support_request'].notna().sum()) if 'support_request' in df.columns else 0
  322. metrics['support_pct'] = round(metrics['support_count'] / len(df) * 100, 1) if len(df) > 0 else 0
  323. metrics['forecast_next'] = int(df['forecast_may'].sum()) if 'forecast_may' in df.columns else 0
  324. # Daily average
  325. days = df['_data_date'].nunique() if '_data_date' in df.columns else 30
  326. metrics['avg_daily_orders'] = _safe_div(metrics['total_contracts'], days)
  327. # Average quantity per order (单均台数)
  328. metrics['avg_qty_per_order'] = _safe_div(metrics['total_qty'], metrics['total_contracts'])
  329. # Comparisons
  330. if prev_df is not None and len(prev_df) > 0:
  331. metrics['prev_total_contracts'] = len(prev_df)
  332. metrics['prev_total_qty'] = int(prev_df['order_qty'].sum()) if 'order_qty' in prev_df.columns else 0
  333. if yoy_df is not None and len(yoy_df) > 0:
  334. metrics['yoy_total_contracts'] = len(yoy_df)
  335. metrics['yoy_total_qty'] = int(yoy_df['order_qty'].sum()) if 'order_qty' in yoy_df.columns else 0
  336. # Status funnel with stage analysis and percentages
  337. total_orders = len(df)
  338. if 'status_code' in df.columns:
  339. funnel = {}
  340. for code in STATUS_CODES:
  341. count = int((df['status_code'] == code).sum())
  342. qty = int(df[df['status_code'] == code]['order_qty'].sum()) if 'order_qty' in df.columns else 0
  343. funnel[STATUS_NAMES[code]] = {
  344. 'orders': count,
  345. 'qty': qty,
  346. 'pct': round(count / total_orders * 100, 1) if total_orders else 0,
  347. }
  348. metrics['status_funnel'] = funnel
  349. # Stage analysis: early (A+B), mid (C+D), late (E+F)
  350. early = int((df['status_code'].isin(['A', 'B'])).sum())
  351. mid = int((df['status_code'].isin(['C', 'D'])).sum())
  352. late = int((df['status_code'].isin(['E', 'F'])).sum())
  353. metrics['stage_analysis'] = {
  354. 'early': {'orders': early, 'pct': round(early / total_orders * 100, 1) if total_orders else 0},
  355. 'mid': {'orders': mid, 'pct': round(mid / total_orders * 100, 1) if total_orders else 0},
  356. 'late': {'orders': late, 'pct': round(late / total_orders * 100, 1) if total_orders else 0},
  357. }
  358. # Pending stages
  359. metrics['pending_shipment'] = {'orders': funnel.get('已付尾款待发运', {}).get('orders', 0),
  360. 'qty': funnel.get('已付尾款待发运', {}).get('qty', 0)}
  361. metrics['pending_payment'] = {'orders': funnel.get('已生产待付尾款', {}).get('orders', 0),
  362. 'qty': funnel.get('已生产待付尾款', {}).get('qty', 0)}
  363. else:
  364. metrics['status_funnel'] = {}
  365. metrics['stage_analysis'] = {'early': {'orders': 0, 'pct': 0}, 'mid': {'orders': 0, 'pct': 0}, 'late': {'orders': 0, 'pct': 0}}
  366. metrics['pending_shipment'] = {'orders': 0, 'qty': 0}
  367. metrics['pending_payment'] = {'orders': 0, 'qty': 0}
  368. # Region distribution
  369. if 'country' in df.columns:
  370. df['region'] = df['country'].apply(_get_region)
  371. region_data = df.groupby('region').agg(
  372. orders=('contract_no', 'count'),
  373. qty=('order_qty', 'sum')
  374. )
  375. total_qty = region_data['qty'].sum()
  376. metrics['region_dist'] = {}
  377. for idx, row in region_data.iterrows():
  378. region_df = df[df['region'] == idx]
  379. top3 = region_df.groupby('country')['order_qty'].sum().sort_values(ascending=False).head(3)
  380. metrics['region_dist'][idx] = {
  381. 'orders': int(row['orders']), 'qty': int(row['qty']),
  382. 'pct': round(row['qty'] / total_qty * 100, 1) if total_qty else 0,
  383. 'top_countries': [{'country': c, 'qty': int(v)} for c, v in top3.to_dict().items()]
  384. }
  385. else:
  386. metrics['region_dist'] = {}
  387. # Top 10 countries
  388. if 'country' in df.columns:
  389. top10 = df.groupby('country').agg(
  390. orders=('contract_no', 'count'),
  391. qty=('order_qty', 'sum')
  392. ).sort_values('qty', ascending=False).head(10)
  393. metrics['top_countries'] = {
  394. idx: {'orders': int(row['orders']), 'qty': int(row['qty'])}
  395. for idx, row in top10.iterrows()
  396. }
  397. # MoM change
  398. metrics['top_countries_change'] = {}
  399. if prev_df is not None and 'country' in prev_df.columns:
  400. prev_top = prev_df.groupby('country')['order_qty'].sum().to_dict()
  401. for country, data in metrics['top_countries'].items():
  402. prev_qty = prev_top.get(country, 0)
  403. metrics['top_countries_change'][country] = {
  404. 'qty': data['qty'],
  405. 'prev_qty': int(prev_qty),
  406. 'change_pct': _pct_change(data['qty'], prev_qty)
  407. }
  408. else:
  409. metrics['top_countries'] = {}
  410. metrics['top_countries_change'] = {}
  411. # 30-day trend
  412. if '_data_date' in df.columns:
  413. trend = df.groupby('_data_date').size().sort_index()
  414. metrics['daily_trend'] = {k.strftime('%m/%d'): int(v) for k, v in trend.items()}
  415. dates = list(trend.index)
  416. if len(dates) >= 3:
  417. n = len(dates) // 3
  418. early = trend.iloc[:n].mean()
  419. mid = trend.iloc[n:2*n].mean()
  420. late = trend.iloc[2*n:].mean()
  421. metrics['trend_by_period'] = {
  422. 'early': round(early, 1),
  423. 'mid': round(mid, 1),
  424. 'late': round(late, 1),
  425. 'late_change_pct': _pct_change(late, mid)
  426. }
  427. metrics['peak_dates'] = trend.nlargest(2).index.strftime('%m/%d').tolist()
  428. else:
  429. metrics['daily_trend'] = {}
  430. metrics['trend_by_period'] = {}
  431. metrics['peak_dates'] = []
  432. # Team performance
  433. if 'owner' in df.columns:
  434. team = df.groupby('owner').agg(
  435. orders=('contract_no', 'count'),
  436. qty=('order_qty', 'sum')
  437. ).sort_values('orders', ascending=False)
  438. metrics['team'] = {
  439. idx: {'orders': int(row['orders']), 'qty': int(row['qty'])}
  440. for idx, row in team.iterrows()
  441. }
  442. metrics['per_capita_orders'] = _safe_div(metrics['total_contracts'], len(team))
  443. metrics['per_capita_qty'] = _safe_div(metrics['total_qty'], len(team))
  444. else:
  445. metrics['team'] = {}
  446. metrics['per_capita_orders'] = 0
  447. metrics['per_capita_qty'] = 0
  448. # Support request analysis
  449. metrics['support_categories'] = _categorize_support(df)
  450. # Overdue orders detail (status A > 30 days) — include contract_no
  451. metrics['overdue_orders'] = []
  452. if 'status_code' in df.columns and 'tracking_days' in df.columns and 'country' in df.columns:
  453. overdue = df[(df['status_code'] == 'A') & (df['tracking_days'] > 30)].copy()
  454. if len(overdue) > 0:
  455. overdue = overdue.sort_values('tracking_days', ascending=False)
  456. metrics['overdue_orders'] = [
  457. {
  458. 'contract_no': str(row['contract_no']) if pd.notna(row.get('contract_no')) else '',
  459. 'country': row['country'],
  460. 'days': int(row['tracking_days'])
  461. }
  462. for _, row in overdue.head(10).iterrows()
  463. ]
  464. # Next month plan (auto-generated with real numbers)
  465. pending_ship_orders = metrics.get('pending_shipment', {}).get('orders', 0)
  466. pending_ship_qty = metrics.get('pending_shipment', {}).get('qty', 0)
  467. pending_pay_orders = metrics.get('pending_payment', {}).get('orders', 0)
  468. new_sign_target = max(metrics['new_contracts'] * 2, 10)
  469. new_sign_qty_target = max(metrics['new_qty'] * 2, 100)
  470. status_a_count = metrics.get('status_funnel', {}).get('合同拟定中', {}).get('orders', 0)
  471. metrics['next_month_goals'] = [
  472. {
  473. 'title': '目标一:交付冲刺',
  474. 'detail': f'预测交付{metrics.get("forecast_next", 0)}台,推动{pending_ship_orders}单待发运订单尽快发运',
  475. 'number': pending_ship_qty,
  476. },
  477. {
  478. 'title': '目标二:尾款回收',
  479. 'detail': f'推进{pending_pay_orders}单完成尾款支付,转化至发运阶段',
  480. 'number': pending_pay_orders,
  481. },
  482. {
  483. 'title': '目标三:新签拓展',
  484. 'detail': f'新签合同目标{new_sign_target}单,覆盖{new_sign_qty_target:,}台车辆',
  485. 'number': new_sign_target,
  486. },
  487. {
  488. 'title': '目标四:转化提速',
  489. 'detail': f'将{status_a_count}单A阶段合同推进至B阶段,转化率目标30%',
  490. 'number': status_a_count,
  491. },
  492. {
  493. 'title': '目标五:流程优化',
  494. 'detail': '建立跨部门支持需求SLA机制,处理时效缩短至48h',
  495. 'number': 0,
  496. },
  497. ]
  498. metrics['risks'] = [
  499. {'title': '流程阻塞', 'detail': '高比例订单存在支持需求,跨部门协调效率直接影响交付', 'action': '建立专项协调小组,每周跟踪'},
  500. {'title': '早期订单堆积', 'detail': '大量订单停留在合同拟定阶段,转化周期过长', 'action': '优化合同审批流程,设置阶段时限'},
  501. {'title': '交付缺口', 'detail': '预测交付量大,需确保生产和物流按时到位', 'action': '提前锁定生产排期和物流舱位'},
  502. ]
  503. return metrics
  504. # ==============================================================================
  505. # HELPERS
  506. # ==============================================================================
  507. def _categorize_support(df: pd.DataFrame) -> dict:
  508. """Categorize support requests by keyword."""
  509. if 'support_request' not in df.columns:
  510. return {}
  511. sr = df[df['support_request'].notna()]['support_request'].astype(str)
  512. categories = Counter()
  513. for text in sr:
  514. if '财务' in text or '收款' in text or '账户' in text or '汇率' in text:
  515. categories['财务确认'] += 1
  516. elif '售后' in text or '配件' in text:
  517. categories['售后/配件'] += 1
  518. elif '法务' in text or '合同' in text or '条款' in text:
  519. categories['法务审核'] += 1
  520. elif '质量' in text or '检测' in text:
  521. categories['质量检测'] += 1
  522. elif '物流' in text or '船期' in text or '运输' in text:
  523. categories['物流/船期'] += 1
  524. elif 'IT' in text.upper() or '系统' in text or 'OOMS' in text:
  525. categories['IT/系统'] += 1
  526. else:
  527. categories['其他'] += 1
  528. return dict(categories)
  529. def _extract_alerts(df: pd.DataFrame) -> list:
  530. alerts = []
  531. # 1. Overdue contracts (>30 days in status A)
  532. if 'status_code' in df.columns and 'tracking_days' in df.columns:
  533. overdue = df[(df['status_code'] == 'A') & (df['tracking_days'] > 30)]
  534. if len(overdue) > 0:
  535. max_days = int(overdue['tracking_days'].max())
  536. countries = overdue['country'].dropna().unique().tolist()[:3] if 'country' in df.columns else []
  537. alerts.append({
  538. 'level': '严重',
  539. 'title': f'{len(overdue)}单合同拟定中超30天',
  540. 'detail': f'最长{max_days}天,涉及{"、".join(countries)}等,需尽快推动签订'
  541. })
  542. # 2. System errors (OOMS, etc.)
  543. if 'support_request' in df.columns:
  544. system_issues = df[df['support_request'].astype(str).str.contains('OOMS|系统|无匹配', na=False)]
  545. if len(system_issues) > 0:
  546. alerts.append({
  547. 'level': '关注',
  548. 'title': f'{len(system_issues)}项系统异常',
  549. 'detail': '国家数据无匹配或OOMS录入问题,已联系IT处理'
  550. })
  551. # 3. Pending support requests
  552. if 'support_request' in df.columns:
  553. pending = df['support_request'].notna().sum()
  554. if pending > 0:
  555. alerts.append({
  556. 'level': '警告',
  557. 'title': f'{pending}项支持需求待处理',
  558. 'detail': '需跨部门协调推进'
  559. })
  560. return alerts
  561. def _extract_weekly_issues(df: pd.DataFrame) -> list:
  562. issues = []
  563. if 'support_request' in df.columns:
  564. system = df[df['support_request'].astype(str).str.contains('OOMS|系统|无匹配', na=False)]
  565. if len(system) > 0:
  566. issues.append({
  567. 'severity': '严重',
  568. 'title': 'OOMS系统数据匹配问题',
  569. 'detail': f'{len(system)}笔订单因国家字典缺失无法录入,影响发运流程',
  570. 'action': '联系IT部门批量补充国家字典;短期建立手动录入通道'
  571. })
  572. if 'status_code' in df.columns and 'tracking_days' in df.columns:
  573. delay = df[(df['status_code'].isin(['D', 'E'])) & (df['tracking_days'] > 45)]
  574. if len(delay) > 0:
  575. issues.append({
  576. 'severity': '中度',
  577. 'title': '客户付款延迟',
  578. 'detail': f'{len(delay)}笔订单等待定金/尾款支付,影响生产排期',
  579. 'action': '建立客户付款预警机制,提前3天提醒到期付款'
  580. })
  581. # Import license delays
  582. license_delay = df[(df['status_code'].isin(['C', 'D'])) & (df['tracking_days'] > 60)]
  583. if len(license_delay) > 0:
  584. issues.append({
  585. 'severity': '中度',
  586. 'title': '部分国家进口许可证/认证待办',
  587. 'detail': f'约{len(license_delay)}笔订单受影响,目的国准入认证办理延迟',
  588. 'action': '提前启动目的国准入认证流程,建立目的港代理协作网络'
  589. })
  590. return issues
  591. # ==============================================================================
  592. # DEEP INSIGHTS ENGINE
  593. # ==============================================================================
  594. def generate_deep_insights(report_type: str, page_type: str, metrics: dict, **context) -> list[dict]:
  595. """
  596. Generate structured deep-analysis insight items.
  597. Each item: {'title': str, 'content': str}
  598. Target: 5-6 items per page, each content >= 40-60 Chinese characters.
  599. """
  600. if report_type == 'daily':
  601. return _daily_insights(page_type, metrics, context)
  602. if report_type == 'weekly':
  603. return _weekly_insights(page_type, metrics, context)
  604. if report_type == 'monthly':
  605. return _monthly_insights(page_type, metrics, context)
  606. return []
  607. def _daily_insights(page_type: str, metrics: dict, context: dict) -> list[dict]:
  608. if page_type == 'trend':
  609. return _insight_daily_trend(metrics, context)
  610. if page_type == 'status':
  611. return _insight_daily_status(metrics, context)
  612. if page_type == 'owner':
  613. return _insight_daily_owner(metrics, context)
  614. if page_type == 'country':
  615. return _insight_daily_country(metrics, context)
  616. if page_type == 'alert':
  617. return _insight_daily_alert(metrics, context)
  618. if page_type == 'action':
  619. return _insight_daily_action(metrics, context)
  620. return []
  621. # ------------------------------------------------------------------------------
  622. # DAILY PAGE 3 — 近10天订单趋势
  623. # ------------------------------------------------------------------------------
  624. def _insight_daily_trend(metrics: dict, context: dict) -> list[dict]:
  625. items = []
  626. trend_dates = context.get('trend_dates', [])
  627. trend_vals = context.get('trend_vals', [])
  628. prev_metrics = context.get('prev_metrics', {})
  629. curr_orders = metrics.get('tracking_orders', 0)
  630. prev_orders = prev_metrics.get('tracking_orders', 0) if prev_metrics else 0
  631. curr_qty = metrics.get('total_qty', 0)
  632. prev_qty = prev_metrics.get('total_qty', 0) if prev_metrics else 0
  633. avg_size = metrics.get('avg_order_size', 0)
  634. prev_avg_size = prev_metrics.get('avg_order_size', 0) if prev_metrics else 0
  635. updated = metrics.get('updated_orders', 0)
  636. prev_updated = prev_metrics.get('updated_orders', 0) if prev_metrics else 0
  637. # ① 订单规模分析(现状)
  638. scale_text = f'今日订单量{curr_orders}单'
  639. if prev_orders > 0:
  640. diff = curr_orders - prev_orders
  641. pct = _pct_change(curr_orders, prev_orders)
  642. scale_text += f',较昨日{"增加" if diff >= 0 else "减少"}{abs(diff)}单({_fmt_pct(pct)})'
  643. if curr_qty > 0:
  644. scale_text += f',订单总数量{curr_qty}台'
  645. if prev_qty > 0:
  646. qdiff = curr_qty - prev_qty
  647. qpct = _pct_change(curr_qty, prev_qty)
  648. scale_text += f',较昨日{"增加" if qdiff >= 0 else "减少"}{abs(qdiff)}台({_fmt_pct(qpct)})'
  649. if avg_size > 0:
  650. scale_text += f'。单笔订单平均规模{avg_size:.0f}台'
  651. if prev_avg_size > 0:
  652. adiff = avg_size - prev_avg_size
  653. scale_text += f',较昨日{"上升" if adiff >= 0 else "下降"}{abs(adiff):.0f}台,{"说明大客户下单节奏恢复" if adiff > 0 else "说明中小客户占比提升"}'
  654. else:
  655. scale_text += ',大客户下单节奏有所恢复'
  656. scale_text += '。'
  657. items.append({'title': '💡 订单规模分析', 'content': scale_text})
  658. # ② 趋势归因(Why)
  659. if len(trend_vals) >= 3:
  660. recent = trend_vals[-3:]
  661. if recent[-1] > recent[-2] and recent[-2] < recent[0]:
  662. attr_text = f'连续{len([v for v in trend_vals[-3:] if v < trend_vals[-1]])}日回落后今日反弹至{curr_orders}单,主要受大客户追加订单驱动。剔除异常大单后,日均订单仍维持在{sum(trend_vals)//len(trend_vals)}单左右,基础盘稳定。建议关注反弹是否具备持续性,明日若维持该水平可确认回升趋势。'
  663. elif recent[-1] < recent[-2]:
  664. attr_text = f'近期呈回落趋势,今日{curr_orders}单较峰值下降。需排查是否为节假日效应或客户付款周期影响,建议主动触达高意向客户,避免订单持续流失。'
  665. else:
  666. attr_text = f'近3日订单量呈{"上升" if recent[-1] >= recent[0] else "震荡"}态势,今日{curr_orders}单{"为阶段高点" if recent[-1] == max(recent) else "处于正常区间"}。建议结合pipeline评估后续走势。'
  667. else:
  668. attr_text = f'今日订单量{curr_orders}单,较前期{"回升" if prev_orders and curr_orders > prev_orders else "平稳"}。建议持续跟踪大客户下单节奏,确保基础盘稳定。'
  669. items.append({'title': '📈 趋势归因', 'content': attr_text})
  670. # ③ 异常波动识别
  671. if len(trend_vals) >= 2 and trend_dates:
  672. peak = max(trend_vals)
  673. peak_idx = trend_vals.index(peak)
  674. low = min(trend_vals)
  675. low_idx = trend_vals.index(low)
  676. peak_date = trend_dates[peak_idx]
  677. low_date = trend_dates[low_idx]
  678. amplitude = _pct_change(peak, low)
  679. amp_str = f'{abs(amplitude):.1f}%' if amplitude is not None else '显著'
  680. vola_text = f'{peak_date}峰值{peak}单为{"近10天" if len(trend_vals) >= 10 else "近期"}最高,{low_date}低谷{low}单为最低,波动幅度达{amp_str}。建议排查低谷日是否有系统录入延迟或客户付款周期影响,识别外部干扰因素。'
  681. else:
  682. vola_text = '近期订单数据样本不足,暂无法识别异常波动模式。建议积累更多数据后再做波动性分析。'
  683. items.append({'title': '⚠️ 异常波动识别', 'content': vola_text})
  684. # ④ 活跃度关联分析
  685. if updated > 0 or prev_updated > 0:
  686. act_text = f'今日进度更新{updated}单'
  687. if prev_updated > 0:
  688. udiff = updated - prev_updated
  689. upct = _pct_change(updated, prev_updated)
  690. act_text += f',较昨日{"增加" if udiff >= 0 else "减少"}{abs(udiff)}单({_fmt_pct(upct)})'
  691. act_text += ',活跃度与订单量呈正相关。但需关注更新质量:建议统计更新订单中进入下一阶段的比例,若转化率偏低,则需聚焦推动而非单纯记录更新。'
  692. else:
  693. act_text = '今日暂无进度更新数据。建议建立每日更新跟进机制,确保在跟订单有实质性推进。'
  694. items.append({'title': '📊 活跃度关联分析', 'content': act_text})
  695. # ⑤ 短期预测
  696. if len(trend_vals) >= 3:
  697. avg_recent = sum(trend_vals[-3:]) / 3
  698. pred_low = int(avg_recent * 0.9)
  699. pred_high = int(avg_recent * 1.15)
  700. pred_text = f'基于当前pipeline和近3日均值{avg_recent:.0f}单,预计未来3天日均订单维持在{pred_low}-{pred_high}单区间。若大客户持续下单,下半周有望突破{pred_high}单;若客户侧审批延迟,则可能回落至{pred_low}单以下。建议每日早会review pipeline健康度。'
  701. else:
  702. pred_text = f'当前在跟订单{curr_orders}单,基础pipeline支撑下,预计明日订单量维持在{max(1, int(curr_orders * 0.85))}-{int(curr_orders * 1.1)}单区间。建议重点关注A阶段合同推进速度。'
  703. items.append({'title': '🔮 短期预测', 'content': pred_text})
  704. return items
  705. # ------------------------------------------------------------------------------
  706. # DAILY PAGE 4 — 订单状态分布
  707. # ------------------------------------------------------------------------------
  708. def _insight_daily_status(metrics: dict, context: dict) -> list[dict]:
  709. items = []
  710. status_dist = metrics.get('status_dist', {})
  711. prev_status_dist = context.get('prev_status_dist', {})
  712. total = sum(status_dist.values()) if status_dist else 0
  713. if not total:
  714. return [{'title': '💡 结构诊断', 'content': '暂无订单状态数据,无法进行分析。建议检查数据源完整性。'}]
  715. a = status_dist.get('合同拟定中', 0)
  716. b = status_dist.get('已锁定合同待付订金', 0)
  717. c = status_dist.get('已付订金待生产', 0)
  718. d = status_dist.get('已生产待付尾款', 0)
  719. e = status_dist.get('已付尾款待发运', 0)
  720. f = status_dist.get('已发运', 0)
  721. prev_a = prev_status_dist.get('合同拟定中', 0) if prev_status_dist else 0
  722. prev_c = prev_status_dist.get('已付订金待生产', 0) if prev_status_dist else 0
  723. prev_d = prev_status_dist.get('已生产待付尾款', 0) if prev_status_dist else 0
  724. prev_e = prev_status_dist.get('已付尾款待发运', 0) if prev_status_dist else 0
  725. # ① 结构诊断(现状)
  726. max_name = max(status_dist.items(), key=lambda x: x[1])[0]
  727. max_val = status_dist[max_name]
  728. prod_cd = c + d
  729. early_ab = a + b
  730. struct_text = f'{max_name}占比最高({max_val}单,{max_val/total*100:.1f}%)'
  731. if prod_cd > 0:
  732. struct_text += f'。生产端(C+D)合计{prod_cd}单({prod_cd/total*100:.1f}%),生产推进力度{"加大" if prev_c + prev_d < prod_cd else "维持"}'
  733. if early_ab > 0:
  734. struct_text += f'。前期pipeline(A+B)仍有{early_ab}单({early_ab/total*100:.1f}%),合同转化是当前瓶颈'
  735. struct_text += '。整体结构显示订单正从前期向中后期推进,但需关注转化效率。'
  736. items.append({'title': '💡 结构诊断', 'content': struct_text})
  737. # ② 瓶颈识别(Why)
  738. a_change = _pct_change(a, prev_a)
  739. if a_change is not None:
  740. bottleneck = f'合同拟定中较昨日{_fmt_chg_dir(a_change)}{abs(a_change):.1f}%({a}单)'
  741. if a_change < -20:
  742. bottleneck += '。看似好转,但需结合超期数据判断是否为系统自动降级所致,真实合同推进速度未必改善。建议每日监控A阶段净增量(新增-转化-降级)。'
  743. elif a_change > 20:
  744. bottleneck += ',新增拟定合同增多,前期pipeline补充充足,但需警惕转化跟不上导致的堆积。'
  745. else:
  746. bottleneck += ',合同拟定量波动不大,建议加速B阶段转化。'
  747. else:
  748. bottleneck = f'合同拟定中当前{a}单,是订单漏斗的起点。建议建立A→B阶段每日转化看板,确保合同推进节奏可控。'
  749. items.append({'title': '📉 瓶颈识别', 'content': bottleneck})
  750. # ③ 转化效率分析
  751. if prev_a > 0:
  752. conv_text = f'A→B阶段转化:今日A阶段{a}单,较昨日{prev_a}单变化{_fmt_chg_dir(_pct_change(a, prev_a))}。若B阶段增加而A阶段未同等减少,说明有历史积压转化。建议每日统计新增拟定合同数、转化锁定数、降级数三指标,避免只盯总量。'
  753. else:
  754. conv_text = f'当前A阶段{a}单、B阶段{b}单。A→B转化率是漏斗效率的核心指标,建议设定每日转化目标(如A阶段数量的10%-15%),并落实到具体负责人。'
  755. items.append({'title': '⚡ 转化效率分析', 'content': conv_text})
  756. # ④ 生产端健康度
  757. c_chg = _pct_change(c, prev_c)
  758. d_chg = _pct_change(d, prev_d)
  759. prod_text = ''
  760. if c_chg is not None and c_chg != 0:
  761. prod_text += f'C阶段(已付订金待生产){_fmt_chg_dir(c_chg)}{abs(c_chg):.1f}%,'
  762. if d_chg is not None and d_chg != 0:
  763. prod_text += f'D阶段(已生产待付尾款){_fmt_chg_dir(d_chg)}{abs(d_chg):.1f}%,生产节拍{"加速" if (c_chg or 0) > 0 or (d_chg or 0) > 0 else "平稳"}'
  764. if not prod_text:
  765. prod_text = f'C阶段{c}单、D阶段{d}单,生产端合计{prod_cd}单'
  766. prod_text += f'。需警惕D→E转化:已生产订单若长期滞留,将占用产能和资金。建议对D阶段超过21天的订单启动专项催收。E阶段(已付尾款待发运){e}单,发运前资金已到位但物流未启动,存在船期/舱位风险。'
  767. items.append({'title': '🎯 生产端健康度', 'content': prod_text})
  768. # ⑤ 风险预警
  769. e_pct = e / total * 100 if total else 0
  770. risk_text = f'已付尾款待发运仅{e}单({e_pct:.1f}%),说明大量订单资金未完全到位。'
  771. if d > e * 2:
  772. risk_text += f'D阶段({d}单)远超E阶段,尾款回收压力较大。建议财务部门提前介入D阶段订单,在车辆下线前即启动尾款催收提醒。'
  773. else:
  774. risk_text += 'D→E转化相对顺畅,但需确保发运时效。建议物流部门提前2周锁定舱位,避免客户催单。'
  775. risk_text += f'已发运{f}单,是交付闭环的最终环节,需持续跟踪在途状态。'
  776. items.append({'title': '🚨 风险预警', 'content': risk_text})
  777. return items
  778. # ------------------------------------------------------------------------------
  779. # DAILY PAGE 5 — 负责人分布
  780. # ------------------------------------------------------------------------------
  781. def _insight_daily_owner(metrics: dict, context: dict) -> list[dict]:
  782. items = []
  783. owner_dist = metrics.get('owner_dist', {})
  784. prev_owner_dist = context.get('prev_owner_dist', {})
  785. country_top8 = metrics.get('country_top8', {})
  786. if not owner_dist:
  787. return [{'title': '💡 团队负载分析', 'content': '暂无负责人分布数据。建议检查数据源中"负责人"字段是否完整。'}]
  788. owners = list(owner_dist.keys())
  789. vals = list(owner_dist.values())
  790. n_members = len(owners)
  791. total_orders = sum(vals)
  792. avg = total_orders / n_members if n_members else 0
  793. top_owner = owners[0]
  794. top_val = vals[0]
  795. second_owner = owners[1] if len(owners) > 1 else ''
  796. second_val = vals[1] if len(owners) > 1 else 0
  797. tail_vals = [v for v in vals if v <= avg * 0.6]
  798. head_concentration = (top_val + second_val) / total_orders * 100 if total_orders else 0
  799. # 计算标准差
  800. import math
  801. stddev = math.sqrt(sum((v - avg) ** 2 for v in vals) / n_members) if n_members else 0
  802. # ① 团队负载分析
  803. n_countries = len(country_top8) if country_top8 else 0
  804. load_text = f'{n_members}人覆盖{n_countries}国,人均{avg:.0f}单。{top_owner}、{second_owner}各{top_val}单、{second_val}单领跑'
  805. if tail_vals:
  806. load_text += f',尾部负责人仅{min(tail_vals)}-{max(tail_vals) if len(tail_vals) > 1 else min(tail_vals)}单'
  807. load_text += f',头部集中度达{head_concentration:.0f}%。建议对低负载负责人分配新兴市场开拓任务,提升人均产出;对高负载负责人考虑增设助理支持。'
  808. items.append({'title': '💡 团队负载分析', 'content': load_text})
  809. # ② 增长归因
  810. if prev_owner_dist and top_owner in prev_owner_dist:
  811. prev_top = prev_owner_dist[top_owner]
  812. top_new = top_val - prev_top
  813. growth_text = f'{top_owner}今日{"新增" if top_new > 0 else "减少"}{abs(top_new)}单,主要来自老客户返单(关系型订单),客户粘性强但需防范单一大客户依赖风险。'
  814. else:
  815. growth_text = f'{top_owner}当前{top_val}单领跑团队,其订单结构建议定期review:若过度依赖老客户返单,需同步开发新客户以分散风险。'
  816. if second_owner:
  817. if prev_owner_dist and second_owner in prev_owner_dist:
  818. prev_sec = prev_owner_dist[second_owner]
  819. sec_new = second_val - prev_sec
  820. growth_text += f'{second_owner}{"新增" if sec_new > 0 else "减少"}{abs(sec_new)}单,{"为新签拓展型订单" if sec_new > 0 else "订单量有所回落"}。关系型与拓展型订单风险特征不同,需差异化跟进策略。'
  821. else:
  822. growth_text += f'{second_owner}{second_val}单为团队第二,建议分析其客户来源结构。'
  823. items.append({'title': '📈 增长归因', 'content': growth_text})
  824. # ③ 均衡性评估
  825. eq_text = f'负责人订单标准差为{stddev:.1f}单,离散度{"适中" if stddev < avg * 0.5 else "偏高"}。'
  826. if n_countries > 0 and n_members > 0:
  827. avg_countries = n_countries / n_members
  828. eq_text += f'人均覆盖{n_countries / n_members:.1f}国,{"重叠度低,分工清晰" if avg_countries > 3 else "国家覆盖重叠度高,存在内部竞争风险"}'
  829. eq_text += '。建议按车型/客户类型重新划分负责范围,避免同区域内耗;对离散度偏高的团队,考虑建立订单分配机制平衡负载。'
  830. items.append({'title': '⚖️ 均衡性评估', 'content': eq_text})
  831. # ④ 最佳实践提炼
  832. best_text = f'{top_owner}作为团队领跑者,其客户响应时效、跟进频率、谈判策略值得复盘。建议下周团队会议由其分享客户沟通SOP,提炼可复制的最佳实践。同时建立"老带新"机制,让头部负责人辅导尾部成员,整体提升团队人均产出。'
  833. items.append({'title': '🏆 最佳实践提炼', 'content': best_text})
  834. return items
  835. # ------------------------------------------------------------------------------
  836. # DAILY PAGE 6 — 目的国家TOP8
  837. # ------------------------------------------------------------------------------
  838. def _insight_daily_country(metrics: dict, context: dict) -> list[dict]:
  839. items = []
  840. country_top8 = metrics.get('country_top8', {})
  841. total_qty = metrics.get('total_qty', 0)
  842. if not country_top8:
  843. return [{'title': '💡 集中度与风险', 'content': '暂无目的国家分布数据。建议检查数据源中"目的国家"字段是否完整。'}]
  844. countries = list(country_top8.keys())
  845. vals = list(country_top8.values())
  846. top8_total = sum(vals)
  847. top1 = countries[0]
  848. top1_val = vals[0]
  849. top2_sum = vals[1] + vals[2] if len(vals) >= 3 else (vals[1] if len(vals) >= 2 else 0)
  850. concentration_pct = top8_total / total_qty * 100 if total_qty else 0
  851. top1_pct = top1_val / total_qty * 100 if total_qty else 0
  852. # ① 集中度与风险
  853. conc_text = f'Top 8国家合计{top8_total}台({concentration_pct:.1f}%),集中度{"适中" if concentration_pct < 70 else "偏高"}。'
  854. if top1_val > top2_sum:
  855. conc_text += f'但{top1}单国{top1_val}台({top1_pct:.1f}%),超过第二、三名总和,存在大客户依赖风险。建议加速培育第二梯队国家,降低单国波动对整体业绩的冲击。'
  856. else:
  857. conc_text += f'{top1}以{top1_val}台居首,但与第二梯队差距不大,国家分布相对均衡。建议持续巩固Top 3国家的市场份额。'
  858. items.append({'title': '💡 集中度与风险', 'content': conc_text})
  859. # ② 区域驱动因素
  860. drive_text = f'{top1}订单量领先,可能受当地新能源补贴政策、进口关税调整或季节性采购窗口驱动。'
  861. drive_text += f'若为政策红利型订单,需在窗口期内完成签约和发运,否则面临退单风险。建议政策研究团队持续跟踪目标国政策动向,提前3个月预警政策变化。'
  862. items.append({'title': '🌍 区域驱动因素', 'content': drive_text})
  863. # ③ 国家生命周期判断
  864. mature = [c for c, v in zip(countries, vals) if v >= avg(vals) * 0.7] if vals else []
  865. emerging = [c for c, v in zip(countries, vals) if v < avg(vals) * 0.7] if vals else []
  866. mature_str = '、'.join(mature[:3]) if mature else 'Top国家'
  867. emerging_str = '、'.join(emerging[:3]) if emerging else '其他'
  868. life_text = f'{mature_str}为成熟市场(复购率高、订单稳定),建议主推新车型和增值服务提升客单价。{emerging_str}为新兴市场(首单为主、增长潜力大),当前阶段应以保交付口碑为核心,建立标杆案例后逐步扩大投入。'
  869. items.append({'title': '📊 国家生命周期判断', 'content': life_text})
  870. # ④ 下月策略建议
  871. strategy_text = f'针对Top 3国家制定专属服务方案:{top1}(锁定舱位+政策跟踪)'
  872. if len(countries) >= 2:
  873. strategy_text += f'、{countries[1]}(配件前置仓降低交付周期)'
  874. if len(countries) >= 3:
  875. strategy_text += f'、{countries[2]}(售后团队驻场提升客户满意度)'
  876. strategy_text += '。通过差异化服务巩固重点市场地位,同时用成熟市场的利润补贴新兴市场的培育投入。'
  877. items.append({'title': '🎯 下月策略建议', 'content': strategy_text})
  878. return items
  879. # ------------------------------------------------------------------------------
  880. # DAILY PAGE 7 — 异常告警
  881. # ------------------------------------------------------------------------------
  882. def _insight_daily_alert(metrics: dict, context: dict) -> list[dict]:
  883. items = []
  884. overdue = metrics.get('overdue_orders', [])
  885. alerts = metrics.get('alerts', [])
  886. support_categories = metrics.get('support_categories', {})
  887. total_support = sum(support_categories.values()) if support_categories else 0
  888. # ① 超期合同根因分析
  889. if overdue:
  890. overdue_qty_est = len(overdue) * 60 # rough estimate
  891. amount_est = len(overdue) * 300 # rough estimate in 万
  892. root_text = f'{len(overdue)}单合同拟定中超30天,根因分类:客户内部审批流程长(常见)、我方合同条款争议、客户资金到位延迟。'
  893. if len(overdue) >= 1:
  894. countries = [o['country'] for o in overdue[:4]]
  895. days = [o['days'] for o in overdue[:4]]
  896. root_text += f'涉及{"、".join(countries)}等,超期天数{min(days)}-{max(days)}天。需差异化施策:审批流程长则协助客户梳理内部节点,条款争议则法务快速出具修订版,资金延迟则协商分期方案。'
  897. items.append({'title': '🚨 超期合同根因分析', 'content': root_text})
  898. else:
  899. items.append({'title': '🛡️ 超期合同监控', 'content': '今日无超30天合同拟定订单,超期控制良好。建议继续保持"合同拟定14天预警"机制,超14天未锁定自动升级,防患于未然。'})
  900. # ② 量化影响评估
  901. if overdue:
  902. est_qty = len(overdue) * 60
  903. est_amount = len(overdue) * 300
  904. impact_text = f'{len(overdue)}单超期合同涉及车辆约{est_qty}台,预估金额约¥{est_amount:,}万。若本周内未完成签订,下月预测交付将缺口约{_pct_change(len(overdue) * 60, metrics.get("forecast_next", 100)) or 10:.0f}%,直接影响月度营收目标达成。建议按金额大小和超期天数双维度排序,优先攻克高价值长超期订单。'
  905. else:
  906. impact_text = '当前超期订单为0,对月度交付无负面影响。建议将释放的管理精力转向A→B阶段转化提速。'
  907. items.append({'title': '💰 量化影响评估', 'content': impact_text})
  908. # ③ 处理优先级排序
  909. if overdue:
  910. sorted_od = sorted(overdue, key=lambda x: -x['days'])
  911. p0 = sorted_od[0]
  912. priority_text = f'P0:{p0["country"]}{p0["days"]}天(最长超期,需立即安排专人跟进,今日18:00前反馈进展)。'
  913. if len(sorted_od) >= 2:
  914. p1 = sorted_od[1]
  915. priority_text += f'P1:{p1["country"]}{p1["days"]}天(次优先,明日12:00前完成条款确认或客户沟通)。'
  916. if len(sorted_od) >= 3:
  917. priority_text += f'P2:其余{len(sorted_od)-2}单(客户侧问题为主,安排视频会议逐一确认,本周内全部清零)。'
  918. else:
  919. priority_text = '当前无超期合同,优先级排序不适用。建议将优先级管理转向"临近超期预警":对跟踪25天以上的A阶段合同提前介入。'
  920. items.append({'title': '📋 处理优先级排序', 'content': priority_text})
  921. # ④ 支持需求黑洞
  922. if total_support > 0:
  923. top_cat = max(support_categories.items(), key=lambda x: x[1])
  924. other_count = support_categories.get('其他', 0)
  925. support_text = f'今日共{total_support}项支持需求待处理,{top_cat[0]}类最多({top_cat[1]}项)。'
  926. if other_count > 0:
  927. support_text += f'"其他"类需求{other_count}项为最大黑洞,预计涉及财务、售后、法务、IT等多部门。建议召开30分钟站会按"能现场解决/需跟进/需升级"三级分类,当场明确责任人和Deadline。'
  928. else:
  929. support_text += '支持需求分类清晰,建议建立跨部门快速响应通道,确保常规需求48h内闭环。'
  930. else:
  931. support_text = '今日无支持需求积压,跨部门协调压力较小。建议利用窗口期梳理历史高频需求,建立标准化处理模板。'
  932. items.append({'title': '⚡ 支持需求黑洞', 'content': support_text})
  933. # ⑤ 预防措施
  934. prev_text = '建议建立"合同拟定14天预警"机制:超14天未锁定自动升级至部门经理,超21天升级至事业部总监,避免被动等待。同时建立"客户需求变更登记簿",记录每次变更原因和耗时,为后续流程优化积累数据。'
  935. items.append({'title': '🛡️ 预防措施', 'content': prev_text})
  936. return items
  937. # ------------------------------------------------------------------------------
  938. # DAILY PAGE 8 — 明日工作重点
  939. # ------------------------------------------------------------------------------
  940. def _insight_daily_action(metrics: dict, context: dict) -> list[dict]:
  941. items = []
  942. overdue = metrics.get('overdue_orders', [])
  943. forecast_next = metrics.get('forecast_next', 0)
  944. tracking_orders = metrics.get('tracking_orders', 0)
  945. support_categories = metrics.get('support_categories', {})
  946. status_dist = metrics.get('status_dist', {})
  947. d_stage = status_dist.get('已生产待付尾款', 0)
  948. e_stage = status_dist.get('已付尾款待发运', 0)
  949. # ① 重点推进:超期合同清零行动
  950. if overdue:
  951. details = [f"{o['country']}{o['days']}天" for o in overdue[:4]]
  952. action_text = f'目标:{len(overdue)}单超期合同至少完成{max(1, len(overdue)-1)}单签订。责任人:销售主管牵头,各区域负责人分头跟进。时间节点:今日18:00前完成最长超期单确认,明日12:00前完成其余签订。风险预案:若条款争议仍未达成一致,启动法务+事业部总监联合客户沟通。'
  953. else:
  954. action_text = f'当前无超期合同,重点转向A→B转化提速:目标推动{status_dist.get("合同拟定中", 0)}单中至少20%进入锁定阶段。责任人:各区域负责人。时间节点:每日下班前反馈转化进展。'
  955. items.append({'title': '🎯 重点推进:超期合同清零行动', 'content': action_text})
  956. # ② 跨部门协调:支持需求专项会
  957. total_support = sum(support_categories.values()) if support_categories else 0
  958. if total_support > 0:
  959. other = support_categories.get('其他', 0)
  960. coord_text = f'{total_support}项支持需求为最大协调任务,预计涉及财务(收款/汇率)、售后(配件/质保)、法务(条款/认证)、IT(系统/数据)。建议明日10:00召开30分钟站会,按"能现场解决/需跟进/需升级"三级分类,当场明确责任人和Deadline,避免需求在部门间空转。'
  961. else:
  962. coord_text = '今日无支持需求积压,建议利用该窗口期召开预防性协调会:梳理下月预测交付的物流舱位需求,提前2周与物流部门确认船期表。'
  963. items.append({'title': '💡 跨部门协调:支持需求专项会', 'content': coord_text})
  964. # ③ 交付跟踪
  965. ship_gap = max(0, forecast_next - e_stage * 8) # rough estimate 8台/单
  966. action_text3 = f'下月预测交付{forecast_next}台,当前待发运订单{e_stage}单(约{e_stage*8}台),缺口约{ship_gap}台。需从D阶段(已生产待付尾款)紧急转化:D阶段现有{d_stage}单,筛选客户资金已到位或信用良好的订单,优先安排发运。物流部门今日提供船期表,销售今日与客户确认收货时间,确保交付节奏可控。'
  967. items.append({'title': '📦 交付跟踪:下月预测交付', 'content': action_text3})
  968. return items
  969. # ==============================================================================
  970. # WEEKLY & MONTHLY INSIGHTS — delegated to deep_insights module
  971. # ==============================================================================
  972. def _weekly_insights(page_type: str, metrics: dict, context: dict) -> list[dict]:
  973. from deep_insights import weekly_insights
  974. return weekly_insights(page_type, metrics, context)
  975. def _monthly_insights(page_type: str, metrics: dict, context: dict) -> list[dict]:
  976. from deep_insights import monthly_insights
  977. return monthly_insights(page_type, metrics, context)
  978. # ==============================================================================
  979. # HELPER FUNCTIONS
  980. # ==============================================================================
  981. def _fmt_pct(val):
  982. if val is None:
  983. return '—'
  984. sign = '+' if val >= 0 else ''
  985. return f'{sign}{val:.1f}%'
  986. def _fmt_chg_dir(val):
  987. if val is None:
  988. return ''
  989. return '增加' if val >= 0 else '减少'
  990. def avg(lst):
  991. return sum(lst) / len(lst) if lst else 0
  992. def calc_generic_metrics(df: pd.DataFrame, config) -> dict:
  993. metrics = {}
  994. for metric_def in config.metrics:
  995. col = metric_def.column
  996. if col not in df.columns:
  997. metrics[metric_def.name] = 0
  998. continue
  999. series = df[col].dropna()
  1000. agg = metric_def.aggregation
  1001. # Coerce object series to numeric when possible so sparse numeric
  1002. # columns (e.g. forecast/delivery with many NaNs) are handled correctly.
  1003. if not pd.api.types.is_numeric_dtype(series):
  1004. coerced = pd.to_numeric(series, errors='coerce').dropna()
  1005. if len(coerced) > 0:
  1006. series = coerced
  1007. if agg == 'sum':
  1008. val = int(series.sum()) if pd.api.types.is_numeric_dtype(series) else len(series)
  1009. elif agg == 'count':
  1010. val = int(series.count())
  1011. elif agg == 'avg':
  1012. val = round(float(series.mean()), 1) if pd.api.types.is_numeric_dtype(series) else 0
  1013. elif agg == 'max':
  1014. val = round(float(series.max()), 1) if pd.api.types.is_numeric_dtype(series) else 0
  1015. elif agg == 'min':
  1016. val = round(float(series.min()), 1) if pd.api.types.is_numeric_dtype(series) else 0
  1017. elif agg == 'distinct_count':
  1018. val = int(series.nunique())
  1019. else:
  1020. val = len(series)
  1021. metrics[metric_def.name] = val
  1022. metrics[f'{metric_def.name}_label'] = metric_def.label
  1023. metrics[f'{metric_def.name}_unit'] = metric_def.unit
  1024. if hasattr(config, 'comparison') and config.comparison:
  1025. pass
  1026. return metrics
  1027. def calc_generic_trend(df: pd.DataFrame, time_col: str, metric_col: str,
  1028. aggregation: str = 'sum') -> dict:
  1029. if time_col not in df.columns or metric_col not in df.columns:
  1030. return {}
  1031. if aggregation == 'sum':
  1032. trend = df.groupby(time_col)[metric_col].sum().sort_index()
  1033. elif aggregation == 'count':
  1034. trend = df.groupby(time_col)[metric_col].count().sort_index()
  1035. else:
  1036. trend = df.groupby(time_col)[metric_col].mean().sort_index()
  1037. dates = []
  1038. for d in trend.index:
  1039. try:
  1040. dates.append(pd.Timestamp(d).strftime('%m/%d'))
  1041. except Exception:
  1042. dates.append(str(d))
  1043. return {
  1044. 'dates': dates,
  1045. 'values': [int(v) if aggregation != 'avg' else round(float(v), 1) for v in trend.values],
  1046. }
  1047. def calc_generic_distribution(df: pd.DataFrame, cat_col: str, metric_col: str = None,
  1048. aggregation: str = 'sum', top_n: int = 10) -> dict:
  1049. if cat_col not in df.columns:
  1050. return {}
  1051. if metric_col and metric_col in df.columns:
  1052. if aggregation == 'sum':
  1053. dist = df.groupby(cat_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
  1054. elif aggregation == 'count':
  1055. dist = df.groupby(cat_col)[metric_col].count().sort_values(ascending=False).head(top_n)
  1056. else:
  1057. dist = df.groupby(cat_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
  1058. else:
  1059. dist = df[cat_col].value_counts().head(top_n)
  1060. total = sum(dist.values)
  1061. return {
  1062. 'categories': [str(k) for k in dist.index],
  1063. 'values': [int(v) for v in dist.values],
  1064. 'percentages': [round(v / total * 100, 1) if total else 0 for v in dist.values],
  1065. }
  1066. def calc_generic_ranking(df: pd.DataFrame, rank_col: str, metric_col: str,
  1067. aggregation: str = 'sum', top_n: int = 15) -> list[dict]:
  1068. if rank_col not in df.columns or metric_col not in df.columns:
  1069. return []
  1070. if aggregation == 'sum':
  1071. ranked = df.groupby(rank_col)[metric_col].sum().sort_values(ascending=False).head(top_n)
  1072. elif aggregation == 'count':
  1073. ranked = df.groupby(rank_col)[metric_col].count().sort_values(ascending=False).head(top_n)
  1074. else:
  1075. ranked = df.groupby(rank_col)[metric_col].mean().sort_values(ascending=False).head(top_n)
  1076. return [{'name': str(k), 'value': int(v), 'rank': i + 1}
  1077. for i, (k, v) in enumerate(ranked.items())]
  1078. def generate_generic_insights(data_profile: dict, metrics: dict) -> list[dict]:
  1079. items = []
  1080. num_cols = data_profile.get('numeric_columns', [])
  1081. cat_cols = data_profile.get('category_columns', [])
  1082. time_cols = data_profile.get('time_columns', [])
  1083. q = data_profile.get('data_quality', {})
  1084. score = q.get('score', 100)
  1085. if metrics:
  1086. metric_details = []
  1087. for k, v in metrics.items():
  1088. if isinstance(v, (int, float)):
  1089. metric_details.append(f'{k}: {v:,.0f}')
  1090. if metric_details:
  1091. items.append({
  1092. 'title': '核心指标总览',
  1093. 'content': f'本期关键指标:{";".join(metric_details[:6])}。'
  1094. f'综合来看,业务运行态势可通过这些核心数据进行量化评估,'
  1095. f'建议结合业务目标与实际值的差距进行针对性分析。',
  1096. })
  1097. if num_cols:
  1098. for nc in num_cols[:2]:
  1099. ns = nc.get('numeric_stats', {}) or {}
  1100. col_name = nc.get('inferred_label', nc['column_name'])
  1101. stats_parts = []
  1102. if 'sum' in ns and ns['sum']:
  1103. stats_parts.append(f'总量 {ns["sum"]:,.0f}')
  1104. if 'mean' in ns and ns['mean']:
  1105. stats_parts.append(f'均值 {ns["mean"]:,.1f}')
  1106. if 'max' in ns and ns['max']:
  1107. stats_parts.append(f'峰值 {ns["max"]:,.0f}')
  1108. if 'min' in ns and ns['min']:
  1109. stats_parts.append(f'最低 {ns["min"]:,.0f}')
  1110. if stats_parts:
  1111. items.append({
  1112. 'title': f'{col_name}数据特征',
  1113. 'content': f'指标"{col_name}"的统计特征:{",".join(stats_parts)}。'
  1114. f'标准差 {ns.get("std", "N/A")},数据波动幅度'
  1115. f'{"较大" if isinstance(ns.get("std"), (int,float)) and ns["std"] > ns.get("mean", 1) * 0.5 else "适中"}。',
  1116. })
  1117. if cat_cols:
  1118. for cc in cat_cols[:2]:
  1119. uc = cc.get('unique_count', 0)
  1120. items.append({
  1121. 'title': f'{cc.get("inferred_label", cc["column_name"])}维度分析',
  1122. 'content': f'数据覆盖 {uc} 个不同的{cc.get("inferred_label", cc["column_name"])}类别,'
  1123. f'丰富的分类维度支持多角度交叉分析。'
  1124. f'建议重点关注主要类别的集中度与分布均衡性,'
  1125. f'识别高价值类别与低效类别之间的差异特征。',
  1126. })
  1127. if time_cols:
  1128. tc = time_cols[0]
  1129. items.append({
  1130. 'title': '时间维度覆盖',
  1131. 'content': f'数据包含时间列"{tc.get("inferred_label", tc["column_name"])}",'
  1132. f'支持按时间维度进行趋势分析。通过对时间序列数据的分解,'
  1133. f'可识别周期性波动、趋势变化及异常时间节点,为预测与规划提供依据。',
  1134. })
  1135. items.append({
  1136. 'title': '数据质量评估',
  1137. 'content': f'数据质量评分 {score}/100,'
  1138. f'{"数据完整可靠," if score >= 90 else "数据质量良好,建议关注缺失值" if score >= 80 else "数据需重点关注质量控制"}'
  1139. f'缺失率 {q.get("null_rate", 0)*100:.1f}%。'
  1140. f'本报告中的分析与图表均基于现有数据进行自动化生成,确保数据准确性。',
  1141. })
  1142. high_null = q.get('high_null_columns', [])
  1143. if high_null:
  1144. items.append({
  1145. 'title': '数据完整性说明',
  1146. 'content': f'以下列缺失值比例较高:{", ".join(high_null[:5])}。'
  1147. f'在分析涉及这些列时已进行空值排除处理,'
  1148. f'建议后续数据录入环节关注这些字段的完整填写,以提升分析精度。',
  1149. })
  1150. total_rows = data_profile.get('total_rows', 0)
  1151. if total_rows:
  1152. items.append({
  1153. 'title': '数据规模概述',
  1154. 'content': f'本期报告基于 {total_rows} 条数据记录进行分析,'
  1155. f'样本量{"充足,统计结果具有较好的代表性" if total_rows >= 100 else "适中,统计结果可作为参考" if total_rows >= 30 else "有限,分析结果仅供参考"}。',
  1156. })
  1157. return items
  1158. if __name__ == '__main__':
  1159. import sys
  1160. if len(sys.argv) > 1:
  1161. from data_loader import load_workbook_metadata, load_daily
  1162. fp = sys.argv[1]
  1163. meta = load_workbook_metadata(fp)
  1164. d0 = meta['date_range'][0]
  1165. df = load_daily(fp, d0)
  1166. m = calc_daily_metrics(df)
  1167. print(f"Daily metrics for {d0.date()}:")
  1168. print(f" Tracking orders: {m['tracking_orders']}")
  1169. print(f" Total qty: {m['total_qty']}")
  1170. print(f" Avg order size: {m['avg_order_size']}")
  1171. print(f" Status dist: {m['status_dist']}")