198 lines
6.1 KiB
Python
198 lines
6.1 KiB
Python
import json
|
|
|
|
from .schemas.outputs import SUPPORTED_OUTPUT_TYPES
|
|
|
|
|
|
# 按输出类型声明页面和审计日志真正需要消费的结构化字段。
|
|
# 这里不追求复杂 schema 框架,优先保证字段稳定、可读、易讲解。
|
|
OUTPUT_FIELD_TEMPLATES = {
|
|
"general_answer": {
|
|
"answer": "",
|
|
"confidence": "medium",
|
|
"references": [],
|
|
},
|
|
"document_review_report": {
|
|
"summary": "",
|
|
"issues": [],
|
|
"risk_level": "medium",
|
|
"suggestions": [],
|
|
"missing_items": [],
|
|
"references": [],
|
|
},
|
|
"ticket_response": {
|
|
"reply": "",
|
|
"category": "general",
|
|
"priority": "medium",
|
|
"suggested_action": "",
|
|
"need_human_review": False,
|
|
},
|
|
"quality_report": {
|
|
"summary": "",
|
|
"possible_causes": [],
|
|
"evidence": [],
|
|
"risk_level": "medium",
|
|
"suggested_actions": [],
|
|
"references": [],
|
|
},
|
|
"risk_audit_report": {
|
|
"summary": "",
|
|
"risk_points": [],
|
|
"risk_level": "medium",
|
|
"suggestions": [],
|
|
"references": [],
|
|
},
|
|
"registration_overview_report": {
|
|
"batch_id": "",
|
|
"product_name": "",
|
|
"file_count": 0,
|
|
"total_page_count": 0,
|
|
"chapter_summary": [],
|
|
"documents": [],
|
|
"warnings": [],
|
|
},
|
|
"registration_completeness_report": {
|
|
"summary": "",
|
|
"missing_items": [],
|
|
"misplaced_items": [],
|
|
"risk_level": "medium",
|
|
"references": [],
|
|
},
|
|
"registration_field_extraction_report": {
|
|
"summary": "",
|
|
"field_items": [],
|
|
"low_confidence_items": [],
|
|
"references": [],
|
|
},
|
|
"registration_consistency_report": {
|
|
"summary": "",
|
|
"conflict_items": [],
|
|
"mixed_document_risks": [],
|
|
"risk_level": "medium",
|
|
"references": [],
|
|
},
|
|
"registration_risk_report": {
|
|
"summary": "",
|
|
"risk_items": [],
|
|
"highest_risk_level": "medium",
|
|
"pass_status": "review_required",
|
|
"manual_review_items": [],
|
|
"owner_roles": [],
|
|
"suggestions": [],
|
|
"notify_reason": "task_completed",
|
|
},
|
|
"registration_word_export_report": {
|
|
"summary": "",
|
|
"export_status": "draft_only",
|
|
"can_export_formally": False,
|
|
"blocked_items": [],
|
|
"download_url": "",
|
|
},
|
|
"feishu_notification_report": {
|
|
"batch_id": "",
|
|
"conversation_id": "",
|
|
"notify_reason": "task_completed",
|
|
"mentioned_users": [],
|
|
"message_status": "pending",
|
|
"web_detail_url": "",
|
|
"receipt": {},
|
|
},
|
|
}
|
|
|
|
|
|
def build_response_schema_hint(output_type: str) -> dict:
|
|
"""返回给 LLM 的结构化提示,帮助模型尽量输出稳定 JSON。"""
|
|
normalized_output_type = normalize_output_type(output_type)
|
|
return {
|
|
"output_type": normalized_output_type,
|
|
"fields": list(OUTPUT_FIELD_TEMPLATES[normalized_output_type].keys()),
|
|
}
|
|
|
|
|
|
def normalize_output_type(output_type: str) -> str:
|
|
"""对外部配置做轻量归一化,避免拼写差异导致解析分支混乱。"""
|
|
if output_type in SUPPORTED_OUTPUT_TYPES:
|
|
return output_type
|
|
return "general_answer"
|
|
|
|
|
|
def parse_structured_output(raw_content: str, output_type: str) -> tuple[dict, str]:
|
|
"""
|
|
优先将模型输出解析为 JSON。
|
|
|
|
返回值:
|
|
- structured_output: 页面和审计日志可直接消费的标准结构
|
|
- parse_mode: `json` 表示成功解析,`fallback` 表示降级处理
|
|
"""
|
|
normalized_output_type = normalize_output_type(output_type)
|
|
parsed = _try_parse_json_object(raw_content)
|
|
if parsed is None:
|
|
return build_fallback_structured_output(
|
|
output_type=normalized_output_type,
|
|
raw_content=raw_content,
|
|
), "fallback"
|
|
|
|
template = {
|
|
"output_type": normalized_output_type,
|
|
"parse_mode": "json",
|
|
}
|
|
template.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type])
|
|
template.update(parsed)
|
|
return template, "json"
|
|
|
|
|
|
def build_fallback_structured_output(output_type: str, raw_content: str) -> dict:
|
|
"""当模型没有输出合法 JSON 时,仍然构造一个稳定的展示结构。"""
|
|
normalized_output_type = normalize_output_type(output_type)
|
|
structured_output = {
|
|
"output_type": normalized_output_type,
|
|
"parse_mode": "fallback",
|
|
}
|
|
structured_output.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type])
|
|
|
|
if normalized_output_type == "general_answer":
|
|
structured_output["answer"] = raw_content
|
|
return structured_output
|
|
if normalized_output_type == "document_review_report":
|
|
structured_output["summary"] = raw_content
|
|
return structured_output
|
|
if normalized_output_type == "ticket_response":
|
|
structured_output["reply"] = raw_content
|
|
return structured_output
|
|
if normalized_output_type == "quality_report":
|
|
structured_output["summary"] = raw_content
|
|
return structured_output
|
|
|
|
structured_output["summary"] = raw_content
|
|
return structured_output
|
|
|
|
|
|
def extract_answer_from_structured_output(structured_output: dict, raw_content: str) -> str:
|
|
"""从结构化结果里提取页面主回答,保证不同输出类型有统一入口。"""
|
|
for field_name in ("answer", "reply", "summary"):
|
|
value = structured_output.get(field_name)
|
|
if isinstance(value, str) and value.strip():
|
|
return value.strip()
|
|
return raw_content.strip()
|
|
|
|
|
|
def _try_parse_json_object(raw_content: str) -> dict | None:
|
|
"""支持纯 JSON 或被 Markdown 代码块包裹的 JSON。"""
|
|
content = raw_content.strip()
|
|
if not content:
|
|
return None
|
|
candidates = [content]
|
|
if content.startswith("```"):
|
|
stripped = content.strip("`").strip()
|
|
if stripped.lower().startswith("json"):
|
|
stripped = stripped[4:].strip()
|
|
candidates.append(stripped)
|
|
|
|
for candidate in candidates:
|
|
try:
|
|
parsed = json.loads(candidate)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
return None
|