from __future__ import annotations

import re


WORKFLOW_KEYWORDS = {
    "regulatory_review": ("法规核查", "风险", "整改", "RR-"),
    "application_form_fill": ("自动填表", "填表", "申报文件", "AFF-"),
    "file_summary": ("自动汇总", "文件汇总", "目录", "页数", "FS-"),
}


def parse_question_intent(text: str) -> dict[str, object]:
    normalized = (text or "").strip()
    batch_no = _extract_batch_no(normalized)
    workflow_type = _detect_workflow_type(normalized, batch_no)
    latest = bool(re.search(r"(最新|最近|上一个|最后一个)", normalized))
    intent = "batch_status" if batch_no or latest else "unknown"
    if workflow_type == "regulatory_review" and any(keyword in normalized for keyword in ["风险", "阻断", "整改"]):
        intent = "risk_summary"
    if workflow_type == "application_form_fill" and any(keyword in normalized for keyword in ["导出", "文件", "word", "Word"]):
        intent = "export_summary"
    if workflow_type == "file_summary" and any(keyword in normalized for keyword in ["缺失", "目录", "页数"]):
        intent = "missing_summary"
    return {
        "intent": intent,
        "workflow_type": workflow_type,
        "batch_no": batch_no,
        "latest": latest or not batch_no,
    }


def _extract_batch_no(text: str) -> str:
    match = re.search(r"\b(?:RR|AFF|FS)-[A-Za-z0-9-]+", text, flags=re.IGNORECASE)
    return match.group(0).upper() if match else ""


def _detect_workflow_type(text: str, batch_no: str = "") -> str:
    source = f"{text} {batch_no}"
    for workflow_type, keywords in WORKFLOW_KEYWORDS.items():
        if any(keyword in source for keyword in keywords):
            return workflow_type
    return ""