feat(agent-core): 补齐提示词编排与结构化解析

2026-05-30 00:20:40 +08:00
parent ba3f5fc584
commit df45a89eb1
5 changed files with 421 additions and 29 deletions
--- a/agent_core/structured_output.py
+++ b/agent_core/structured_output.py
@@ -1,8 +1,142 @@
-def build_mock_structured_output(output_type: str, user_input: str, references: list) -> dict:
+import json
+
+from .schemas.outputs import SUPPORTED_OUTPUT_TYPES
+
+
+# 按输出类型声明页面和审计日志真正需要消费的结构化字段。
+# 这里不追求复杂 schema 框架，优先保证字段稳定、可读、易讲解。
+OUTPUT_FIELD_TEMPLATES = {
+    "general_answer": {
+        "answer": "",
+        "confidence": "medium",
+        "references": [],
+    },
+    "document_review_report": {
+        "summary": "",
+        "issues": [],
+        "risk_level": "medium",
+        "suggestions": [],
+        "missing_items": [],
+        "references": [],
+    },
+    "ticket_response": {
+        "reply": "",
+        "category": "general",
+        "priority": "medium",
+        "suggested_action": "",
+        "need_human_review": False,
+    },
+    "quality_report": {
+        "summary": "",
+        "possible_causes": [],
+        "evidence": [],
+        "risk_level": "medium",
+        "suggested_actions": [],
+        "references": [],
+    },
+    "risk_audit_report": {
+        "summary": "",
+        "risk_points": [],
+        "risk_level": "medium",
+        "suggestions": [],
+        "references": [],
+    },
+}
+
+
+def build_response_schema_hint(output_type: str) -> dict:
+    """返回给 LLM 的结构化提示，帮助模型尽量输出稳定 JSON。"""
+    normalized_output_type = normalize_output_type(output_type)
    return {
-        "output_type": output_type,
-        "summary": f"模拟结构化输出：{user_input}",
-        "references_count": len(references),
-        "risk_level": "low",
-        "suggested_actions": ["补充真实 LLM Provider 后替换模拟结果"],
+        "output_type": normalized_output_type,
+        "fields": list(OUTPUT_FIELD_TEMPLATES[normalized_output_type].keys()),
    }
+
+
+def normalize_output_type(output_type: str) -> str:
+    """对外部配置做轻量归一化，避免拼写差异导致解析分支混乱。"""
+    if output_type in SUPPORTED_OUTPUT_TYPES:
+        return output_type
+    return "general_answer"
+
+
+def parse_structured_output(raw_content: str, output_type: str) -> tuple[dict, str]:
+    """
+    优先将模型输出解析为 JSON。
+
+    返回值：
+    - structured_output: 页面和审计日志可直接消费的标准结构
+    - parse_mode: `json` 表示成功解析，`fallback` 表示降级处理
+    """
+    normalized_output_type = normalize_output_type(output_type)
+    parsed = _try_parse_json_object(raw_content)
+    if parsed is None:
+        return build_fallback_structured_output(
+            output_type=normalized_output_type,
+            raw_content=raw_content,
+        ), "fallback"
+
+    template = {
+        "output_type": normalized_output_type,
+        "parse_mode": "json",
+    }
+    template.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type])
+    template.update(parsed)
+    return template, "json"
+
+
+def build_fallback_structured_output(output_type: str, raw_content: str) -> dict:
+    """当模型没有输出合法 JSON 时，仍然构造一个稳定的展示结构。"""
+    normalized_output_type = normalize_output_type(output_type)
+    structured_output = {
+        "output_type": normalized_output_type,
+        "parse_mode": "fallback",
+    }
+    structured_output.update(OUTPUT_FIELD_TEMPLATES[normalized_output_type])
+
+    if normalized_output_type == "general_answer":
+        structured_output["answer"] = raw_content
+        return structured_output
+    if normalized_output_type == "document_review_report":
+        structured_output["summary"] = raw_content
+        return structured_output
+    if normalized_output_type == "ticket_response":
+        structured_output["reply"] = raw_content
+        return structured_output
+    if normalized_output_type == "quality_report":
+        structured_output["summary"] = raw_content
+        return structured_output
+
+    structured_output["summary"] = raw_content
+    return structured_output
+
+
+def extract_answer_from_structured_output(structured_output: dict, raw_content: str) -> str:
+    """从结构化结果里提取页面主回答，保证不同输出类型有统一入口。"""
+    for field_name in ("answer", "reply", "summary"):
+        value = structured_output.get(field_name)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+    return raw_content.strip()
+
+
+def _try_parse_json_object(raw_content: str) -> dict | None:
+    """支持纯 JSON 或被 Markdown 代码块包裹的 JSON。"""
+    content = raw_content.strip()
+    if not content:
+        return None
+    candidates = [content]
+    if content.startswith("```"):
+        stripped = content.strip("`").strip()
+        if stripped.lower().startswith("json"):
+            stripped = stripped[4:].strip()
+        candidates.append(stripped)
+
+    for candidate in candidates:
+        try:
+            parsed = json.loads(candidate)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(parsed, dict):
+            return parsed
+    return None