44 lines
1.7 KiB
Python
44 lines
1.7 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
|
|
WORKFLOW_KEYWORDS = {
|
|
"regulatory_review": ("法规核查", "风险", "整改", "RR-"),
|
|
"application_form_fill": ("自动填表", "填表", "申报文件", "AFF-"),
|
|
"file_summary": ("自动汇总", "文件汇总", "目录", "页数", "FS-"),
|
|
}
|
|
|
|
|
|
def parse_question_intent(text: str) -> dict[str, object]:
|
|
normalized = (text or "").strip()
|
|
batch_no = _extract_batch_no(normalized)
|
|
workflow_type = _detect_workflow_type(normalized, batch_no)
|
|
latest = bool(re.search(r"(最新|最近|上一个|最后一个)", normalized))
|
|
intent = "batch_status" if batch_no or latest else "unknown"
|
|
if workflow_type == "regulatory_review" and any(keyword in normalized for keyword in ["风险", "阻断", "整改"]):
|
|
intent = "risk_summary"
|
|
if workflow_type == "application_form_fill" and any(keyword in normalized for keyword in ["导出", "文件", "word", "Word"]):
|
|
intent = "export_summary"
|
|
if workflow_type == "file_summary" and any(keyword in normalized for keyword in ["缺失", "目录", "页数"]):
|
|
intent = "missing_summary"
|
|
return {
|
|
"intent": intent,
|
|
"workflow_type": workflow_type,
|
|
"batch_no": batch_no,
|
|
"latest": latest or not batch_no,
|
|
}
|
|
|
|
|
|
def _extract_batch_no(text: str) -> str:
|
|
match = re.search(r"\b(?:RR|AFF|FS)-[A-Za-z0-9-]+", text, flags=re.IGNORECASE)
|
|
return match.group(0).upper() if match else ""
|
|
|
|
|
|
def _detect_workflow_type(text: str, batch_no: str = "") -> str:
|
|
source = f"{text} {batch_no}"
|
|
for workflow_type, keywords in WORKFLOW_KEYWORDS.items():
|
|
if any(keyword in source for keyword in keywords):
|
|
return workflow_type
|
|
return ""
|