|
@@ -54,8 +54,19 @@ def parse_structured_order(text: str) -> dict:
|
|
|
i = 0
|
|
i = 0
|
|
|
while i < len(lines):
|
|
while i < len(lines):
|
|
|
line = lines[i].strip()
|
|
line = lines[i].strip()
|
|
|
- # 匹配车型起始行: (数字)... 型号:XXX
|
|
|
|
|
|
|
+ # 匹配车型起始行: (数字)... 型号:XXX 或 N.意向车型:... 型号:XXX
|
|
|
m = re.match(r'[((](\d+)[))]\s*(.+?)\s*型号[::]\s*(\w+)', line)
|
|
m = re.match(r'[((](\d+)[))]\s*(.+?)\s*型号[::]\s*(\w+)', line)
|
|
|
|
|
+ if not m:
|
|
|
|
|
+ m = re.match(r'\d+[..]\s*意向车型[::]\s*(.+?)\s*型号[::]\s*(\w+)', line)
|
|
|
|
|
+ if m:
|
|
|
|
|
+ # Convert to same groups: group(1)=name_part, group(2)=model_code, group(3) unused
|
|
|
|
|
+ # Rewrite as a 3-group match for downstream compatibility
|
|
|
|
|
+ class _M:
|
|
|
|
|
+ def __init__(self, real_m):
|
|
|
|
|
+ self._m = real_m
|
|
|
|
|
+ # group(1)=name_part, group(2)=model_code → map to group(2), group(3)
|
|
|
|
|
+ self.group = lambda n: ['', '', real_m.group(1), real_m.group(2)][n]
|
|
|
|
|
+ m = _M(m)
|
|
|
if m:
|
|
if m:
|
|
|
seq = m.group(1)
|
|
seq = m.group(1)
|
|
|
name_part = m.group(2).strip()
|
|
name_part = m.group(2).strip()
|
|
@@ -64,13 +75,19 @@ def parse_structured_order(text: str) -> dict:
|
|
|
# 解析车型名称(中英文分离)
|
|
# 解析车型名称(中英文分离)
|
|
|
name_cn, name_en = split_name_cn_en(name_part)
|
|
name_cn, name_en = split_name_cn_en(name_part)
|
|
|
|
|
|
|
|
- # 下一行提取排放、发动机代码、数量、颜色
|
|
|
|
|
|
|
+ # 先从当前行提取代码/数量/颜色,再尝试下一行
|
|
|
engine_code = ''
|
|
engine_code = ''
|
|
|
emission = ''
|
|
emission = ''
|
|
|
quantity = 1
|
|
quantity = 1
|
|
|
color = ''
|
|
color = ''
|
|
|
|
|
|
|
|
- if i + 1 < len(lines):
|
|
|
|
|
|
|
+ # 当前行可能包含: 代码:VS03 5台蓝色
|
|
|
|
|
+ m2 = re.search(r'代码[::]\s*(\w+)\s+(\d+)台(\w+)', line)
|
|
|
|
|
+ if m2:
|
|
|
|
|
+ engine_code = m2.group(1).strip()
|
|
|
|
|
+ quantity = int(m2.group(2))
|
|
|
|
|
+ color = m2.group(3).strip()
|
|
|
|
|
+ elif i + 1 < len(lines):
|
|
|
next_line = lines[i + 1].strip()
|
|
next_line = lines[i + 1].strip()
|
|
|
# 模式: 排放标准 代码:XXX X台颜色
|
|
# 模式: 排放标准 代码:XXX X台颜色
|
|
|
m2 = re.search(r'(.+?)\s+代码[::]\s*(\w+)\s+(\d+)台(\w+)', next_line)
|
|
m2 = re.search(r'(.+?)\s+代码[::]\s*(\w+)\s+(\d+)台(\w+)', next_line)
|