116 lines
3.9 KiB
Python
116 lines
3.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from review_agent.regulatory_info_package.schemas import MergedField
|
|
|
|
|
|
REQUIRED_FIELDS = {
|
|
"product_name": "产品名称",
|
|
"applicant_name": "申请人名称",
|
|
"package_specification": "包装规格",
|
|
"intended_use": "预期用途",
|
|
"storage_condition": "储存条件",
|
|
}
|
|
|
|
|
|
def merge_fields(rule_results: dict[str, dict], llm_results: dict[str, dict]) -> tuple[dict[str, MergedField], dict[str, list[dict]]]:
|
|
merged: dict[str, MergedField] = {}
|
|
missing_fields: list[dict] = []
|
|
llm_only_fields: list[dict] = []
|
|
conflict_fields: list[dict] = []
|
|
keys = set(REQUIRED_FIELDS) | set(rule_results) | set(llm_results)
|
|
for key in sorted(keys):
|
|
rule = rule_results.get(key) or {}
|
|
llm = llm_results.get(key) or {}
|
|
rule_value = str(rule.get("value") or "").strip()
|
|
llm_value = str(llm.get("value") or "").strip()
|
|
label = str(rule.get("label") or llm.get("label") or REQUIRED_FIELDS.get(key) or key)
|
|
if rule_value and llm_value and rule_value != llm_value:
|
|
field = MergedField(
|
|
key=key,
|
|
label=label,
|
|
value=rule_value,
|
|
source="rule_conflict",
|
|
evidence=str(rule.get("evidence") or ""),
|
|
confidence=float(rule.get("confidence") or 0.0),
|
|
highlight_reason="conflict",
|
|
needs_review=True,
|
|
rule_value=rule_value,
|
|
llm_value=llm_value,
|
|
)
|
|
conflict_fields.append(
|
|
{
|
|
"field_key": key,
|
|
"field_label": label,
|
|
"rule_value": rule_value,
|
|
"llm_value": llm_value,
|
|
"selected_value": rule_value,
|
|
"handling": "规则优先,写入值高亮并进入追溯清单",
|
|
}
|
|
)
|
|
elif rule_value:
|
|
field = MergedField(
|
|
key=key,
|
|
label=label,
|
|
value=rule_value,
|
|
source="rule",
|
|
evidence=str(rule.get("evidence") or ""),
|
|
confidence=float(rule.get("confidence") or 0.0),
|
|
)
|
|
elif llm_value:
|
|
field = MergedField(
|
|
key=key,
|
|
label=label,
|
|
value=llm_value,
|
|
source="llm",
|
|
evidence=str(llm.get("evidence") or ""),
|
|
confidence=float(llm.get("confidence") or 0.0),
|
|
highlight_reason="llm_only",
|
|
needs_review=True,
|
|
llm_value=llm_value,
|
|
)
|
|
llm_only_fields.append(_review_dict(field))
|
|
else:
|
|
field = MergedField(
|
|
key=key,
|
|
label=label,
|
|
value="/",
|
|
source="missing",
|
|
evidence="",
|
|
confidence=0.0,
|
|
highlight_reason="missing",
|
|
needs_review=True,
|
|
)
|
|
missing_fields.append(_review_dict(field))
|
|
merged[key] = field
|
|
return merged, {
|
|
"missing_fields": missing_fields,
|
|
"llm_only_fields": llm_only_fields,
|
|
"conflict_fields": conflict_fields,
|
|
}
|
|
|
|
|
|
def save_merged_fields(path: str | Path, merged: dict[str, MergedField], summary: dict[str, list[dict]]) -> Path:
|
|
target = Path(path)
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
payload = {
|
|
"fields": {key: field.__dict__ for key, field in merged.items()},
|
|
**summary,
|
|
}
|
|
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
return target
|
|
|
|
|
|
def _review_dict(field: MergedField) -> dict:
|
|
return {
|
|
"target_file": "",
|
|
"field_key": field.key,
|
|
"field_label": field.label,
|
|
"final_value": field.value,
|
|
"highlight_reason": field.highlight_reason,
|
|
"needs_review": field.needs_review,
|
|
}
|
|
|