from __future__ import annotations import re WORKFLOW_KEYWORDS = { "regulatory_review": ("法规核查", "风险", "整改", "RR-"), "application_form_fill": ("自动填表", "填表", "申报文件", "AFF-"), "file_summary": ("自动汇总", "文件汇总", "目录", "页数", "FS-"), } def parse_question_intent(text: str) -> dict[str, object]: normalized = (text or "").strip() batch_no = _extract_batch_no(normalized) workflow_type = _detect_workflow_type(normalized, batch_no) latest = bool(re.search(r"(最新|最近|上一个|最后一个)", normalized)) intent = "batch_status" if batch_no or latest else "unknown" if workflow_type == "regulatory_review" and any(keyword in normalized for keyword in ["风险", "阻断", "整改"]): intent = "risk_summary" if workflow_type == "application_form_fill" and any(keyword in normalized for keyword in ["导出", "文件", "word", "Word"]): intent = "export_summary" if workflow_type == "file_summary" and any(keyword in normalized for keyword in ["缺失", "目录", "页数"]): intent = "missing_summary" return { "intent": intent, "workflow_type": workflow_type, "batch_no": batch_no, "latest": latest or not batch_no, } def _extract_batch_no(text: str) -> str: match = re.search(r"\b(?:RR|AFF|FS)-[A-Za-z0-9-]+", text, flags=re.IGNORECASE) return match.group(0).upper() if match else "" def _detect_workflow_type(text: str, batch_no: str = "") -> str: source = f"{text} {batch_no}" for workflow_type, keywords in WORKFLOW_KEYWORDS.items(): if any(keyword in source for keyword in keywords): return workflow_type return ""