Files

44 lines
1.7 KiB
Python

from __future__ import annotations
import re
WORKFLOW_KEYWORDS = {
"regulatory_review": ("法规核查", "风险", "整改", "RR-"),
"application_form_fill": ("自动填表", "填表", "申报文件", "AFF-"),
"file_summary": ("自动汇总", "文件汇总", "目录", "页数", "FS-"),
}
def parse_question_intent(text: str) -> dict[str, object]:
normalized = (text or "").strip()
batch_no = _extract_batch_no(normalized)
workflow_type = _detect_workflow_type(normalized, batch_no)
latest = bool(re.search(r"(最新|最近|上一个|最后一个)", normalized))
intent = "batch_status" if batch_no or latest else "unknown"
if workflow_type == "regulatory_review" and any(keyword in normalized for keyword in ["风险", "阻断", "整改"]):
intent = "risk_summary"
if workflow_type == "application_form_fill" and any(keyword in normalized for keyword in ["导出", "文件", "word", "Word"]):
intent = "export_summary"
if workflow_type == "file_summary" and any(keyword in normalized for keyword in ["缺失", "目录", "页数"]):
intent = "missing_summary"
return {
"intent": intent,
"workflow_type": workflow_type,
"batch_no": batch_no,
"latest": latest or not batch_no,
}
def _extract_batch_no(text: str) -> str:
match = re.search(r"\b(?:RR|AFF|FS)-[A-Za-z0-9-]+", text, flags=re.IGNORECASE)
return match.group(0).upper() if match else ""
def _detect_workflow_type(text: str, batch_no: str = "") -> str:
source = f"{text} {batch_no}"
for workflow_type, keywords in WORKFLOW_KEYWORDS.items():
if any(keyword in source for keyword in keywords):
return workflow_type
return ""