import json from .schemas.outputs import SUPPORTED_OUTPUT_TYPES # 按输出类型声明页面和审计日志真正需要消费的结构化字段。 # 这里不追求复杂 schema 框架,优先保证字段稳定、可读、易讲解。 OUTPUT_FIELD_TEMPLATES = { "general_answer": { "answer": "", "confidence": "medium", "references": [], }, "document_review_report": { "summary": "", "issues": [], "risk_level": "medium", "suggestions": [], "missing_items": [], "references": [], }, "ticket_response": { "reply": "", "category": "general", "priority": "medium", "suggested_action": "", "need_human_review": False, }, "quality_report": { "summary": "", "possible_causes": [], "evidence": [], "risk_level": "medium", "suggested_actions": [], "references": [], }, "risk_audit_report": { "summary": "", "risk_points": [], "risk_level": "medium", "suggestions": [], "references": [], }, "registration_overview_report": { "batch_id": "", "product_name": "", "file_count": 0, "total_page_count": 0, "chapter_summary": [], "documents": [], "warnings": [], }, "registration_completeness_report": { "summary": "", "missing_items": [], "misplaced_items": [], "risk_level": "medium", "references": [], }, "registration_field_extraction_report": { "summary": "", "field_items": [], "low_confidence_items": [], "references": [], }, "registration_consistency_report": { "summary": "", "conflict_items": [], "mixed_document_risks": [], "risk_level": "medium", "references": [], }, "registration_risk_report": { "summary": "", "risk_items": [], "highest_risk_level": "medium", "pass_status": "review_required", "manual_review_items": [], "owner_roles": [], "suggestions": [], "notify_reason": "task_completed", }, "registration_word_export_report": { "summary": "", "export_status": "draft_only", "can_export_formally": False, "blocked_items": [], "download_url": "", }, "feishu_notification_report": { "batch_id": "", "conversation_id": "", "notify_reason": "task_completed", "mentioned_users": [], "message_status": "pending", "web_detail_url": "", "receipt": {}, }, } def build_response_schema_hint(output_type: str) -> dict: """返回给 LLM 的结构化提示,帮助模型尽量输出稳定 JSON。""" normalized_output_type = normalize_output_type(output_type) return { "output_type": normalized_output_type, "fields": list(OUTPUT_FIELD_TEMPLATES[normalized_output_type].keys()), } def normalize_output_type(output_type: str) -> str: """对外部配置做轻量归一化,避免拼写差异导致解析分支混乱。""" if output_type in SUPPORTED_OUTPUT_TYPES: return output_type return "general_answer" def parse_structured_output(raw_content: str, output_type: str) -> tuple[dict, str]: """ 优先将模型输出解析为 JSON。 返回值: - structured_output: 页面和审计日志可直接消费的标准结构 - parse_mode: `json` 表示成功解析,`fallback` 表示降级处理 """ normalized_output_type = normalize_output_type(output_type) parsed = _try_parse_json_object(raw_content) if parsed is None: return build_fallback_structured_output( output_type=normalized_output_type, raw_content=raw_content, ), "fallback" template = { "output_type": normalized_output_type, "parse_mode": "json", } template.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type]) template.update(parsed) return template, "json" def build_fallback_structured_output(output_type: str, raw_content: str) -> dict: """当模型没有输出合法 JSON 时,仍然构造一个稳定的展示结构。""" normalized_output_type = normalize_output_type(output_type) structured_output = { "output_type": normalized_output_type, "parse_mode": "fallback", } structured_output.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type]) if normalized_output_type == "general_answer": structured_output["answer"] = raw_content return structured_output if normalized_output_type == "document_review_report": structured_output["summary"] = raw_content return structured_output if normalized_output_type == "ticket_response": structured_output["reply"] = raw_content return structured_output if normalized_output_type == "quality_report": structured_output["summary"] = raw_content return structured_output structured_output["summary"] = raw_content return structured_output def extract_answer_from_structured_output(structured_output: dict, raw_content: str) -> str: """从结构化结果里提取页面主回答,保证不同输出类型有统一入口。""" for field_name in ("answer", "reply", "summary"): value = structured_output.get(field_name) if isinstance(value, str) and value.strip(): return value.strip() return raw_content.strip() def _try_parse_json_object(raw_content: str) -> dict | None: """支持纯 JSON 或被 Markdown 代码块包裹的 JSON。""" content = raw_content.strip() if not content: return None candidates = [content] if content.startswith("```"): stripped = content.strip("`").strip() if stripped.lower().startswith("json"): stripped = stripped[4:].strip() candidates.append(stripped) for candidate in candidates: try: parsed = json.loads(candidate) except json.JSONDecodeError: continue if isinstance(parsed, dict): return parsed return None