Files
DEMO-AGENT/review_agent/regulatory_info_package/services/field_merge.py

116 lines
3.9 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
from review_agent.regulatory_info_package.schemas import MergedField
REQUIRED_FIELDS = {
"product_name": "产品名称",
"applicant_name": "申请人名称",
"package_specification": "包装规格",
"intended_use": "预期用途",
"storage_condition": "储存条件",
}
def merge_fields(rule_results: dict[str, dict], llm_results: dict[str, dict]) -> tuple[dict[str, MergedField], dict[str, list[dict]]]:
merged: dict[str, MergedField] = {}
missing_fields: list[dict] = []
llm_only_fields: list[dict] = []
conflict_fields: list[dict] = []
keys = set(REQUIRED_FIELDS) | set(rule_results) | set(llm_results)
for key in sorted(keys):
rule = rule_results.get(key) or {}
llm = llm_results.get(key) or {}
rule_value = str(rule.get("value") or "").strip()
llm_value = str(llm.get("value") or "").strip()
label = str(rule.get("label") or llm.get("label") or REQUIRED_FIELDS.get(key) or key)
if rule_value and llm_value and rule_value != llm_value:
field = MergedField(
key=key,
label=label,
value=rule_value,
source="rule_conflict",
evidence=str(rule.get("evidence") or ""),
confidence=float(rule.get("confidence") or 0.0),
highlight_reason="conflict",
needs_review=True,
rule_value=rule_value,
llm_value=llm_value,
)
conflict_fields.append(
{
"field_key": key,
"field_label": label,
"rule_value": rule_value,
"llm_value": llm_value,
"selected_value": rule_value,
"handling": "规则优先,写入值高亮并进入追溯清单",
}
)
elif rule_value:
field = MergedField(
key=key,
label=label,
value=rule_value,
source="rule",
evidence=str(rule.get("evidence") or ""),
confidence=float(rule.get("confidence") or 0.0),
)
elif llm_value:
field = MergedField(
key=key,
label=label,
value=llm_value,
source="llm",
evidence=str(llm.get("evidence") or ""),
confidence=float(llm.get("confidence") or 0.0),
highlight_reason="llm_only",
needs_review=True,
llm_value=llm_value,
)
llm_only_fields.append(_review_dict(field))
else:
field = MergedField(
key=key,
label=label,
value="/",
source="missing",
evidence="",
confidence=0.0,
highlight_reason="missing",
needs_review=True,
)
missing_fields.append(_review_dict(field))
merged[key] = field
return merged, {
"missing_fields": missing_fields,
"llm_only_fields": llm_only_fields,
"conflict_fields": conflict_fields,
}
def save_merged_fields(path: str | Path, merged: dict[str, MergedField], summary: dict[str, list[dict]]) -> Path:
target = Path(path)
target.parent.mkdir(parents=True, exist_ok=True)
payload = {
"fields": {key: field.__dict__ for key, field in merged.items()},
**summary,
}
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
return target
def _review_dict(field: MergedField) -> dict:
return {
"target_file": "",
"field_key": field.key,
"field_label": field.label,
"final_value": field.value,
"highlight_reason": field.highlight_reason,
"needs_review": field.needs_review,
}