from __future__ import annotations import json from pathlib import Path from review_agent.regulatory_info_package.schemas import MergedField REQUIRED_FIELDS = { "product_name": "产品名称", "applicant_name": "申请人名称", "package_specification": "包装规格", "intended_use": "预期用途", "storage_condition": "储存条件", } def merge_fields(rule_results: dict[str, dict], llm_results: dict[str, dict]) -> tuple[dict[str, MergedField], dict[str, list[dict]]]: merged: dict[str, MergedField] = {} missing_fields: list[dict] = [] llm_only_fields: list[dict] = [] conflict_fields: list[dict] = [] keys = set(REQUIRED_FIELDS) | set(rule_results) | set(llm_results) for key in sorted(keys): rule = rule_results.get(key) or {} llm = llm_results.get(key) or {} rule_value = str(rule.get("value") or "").strip() llm_value = str(llm.get("value") or "").strip() label = str(rule.get("label") or llm.get("label") or REQUIRED_FIELDS.get(key) or key) if rule_value and llm_value and rule_value != llm_value: field = MergedField( key=key, label=label, value=rule_value, source="rule_conflict", evidence=str(rule.get("evidence") or ""), confidence=float(rule.get("confidence") or 0.0), highlight_reason="conflict", needs_review=True, rule_value=rule_value, llm_value=llm_value, ) conflict_fields.append( { "field_key": key, "field_label": label, "rule_value": rule_value, "llm_value": llm_value, "selected_value": rule_value, "handling": "规则优先,写入值高亮并进入追溯清单", } ) elif rule_value: field = MergedField( key=key, label=label, value=rule_value, source="rule", evidence=str(rule.get("evidence") or ""), confidence=float(rule.get("confidence") or 0.0), ) elif llm_value: field = MergedField( key=key, label=label, value=llm_value, source="llm", evidence=str(llm.get("evidence") or ""), confidence=float(llm.get("confidence") or 0.0), highlight_reason="llm_only", needs_review=True, llm_value=llm_value, ) llm_only_fields.append(_review_dict(field)) else: field = MergedField( key=key, label=label, value="/", source="missing", evidence="", confidence=0.0, highlight_reason="missing", needs_review=True, ) missing_fields.append(_review_dict(field)) merged[key] = field return merged, { "missing_fields": missing_fields, "llm_only_fields": llm_only_fields, "conflict_fields": conflict_fields, } def save_merged_fields(path: str | Path, merged: dict[str, MergedField], summary: dict[str, list[dict]]) -> Path: target = Path(path) target.parent.mkdir(parents=True, exist_ok=True) payload = { "fields": {key: field.__dict__ for key, field in merged.items()}, **summary, } target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") return target def _review_dict(field: MergedField) -> dict: return { "target_file": "", "field_key": field.key, "field_label": field.label, "final_value": field.value, "highlight_reason": field.highlight_reason, "needs_review": field.needs_review, }