feat(regulatory-info-package): 实现材料包生成工作流
This commit is contained in:
115
review_agent/regulatory_info_package/services/field_merge.py
Normal file
115
review_agent/regulatory_info_package/services/field_merge.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from review_agent.regulatory_info_package.schemas import MergedField
|
||||
|
||||
|
||||
REQUIRED_FIELDS = {
|
||||
"product_name": "产品名称",
|
||||
"applicant_name": "申请人名称",
|
||||
"package_specification": "包装规格",
|
||||
"intended_use": "预期用途",
|
||||
"storage_condition": "储存条件",
|
||||
}
|
||||
|
||||
|
||||
def merge_fields(rule_results: dict[str, dict], llm_results: dict[str, dict]) -> tuple[dict[str, MergedField], dict[str, list[dict]]]:
|
||||
merged: dict[str, MergedField] = {}
|
||||
missing_fields: list[dict] = []
|
||||
llm_only_fields: list[dict] = []
|
||||
conflict_fields: list[dict] = []
|
||||
keys = set(REQUIRED_FIELDS) | set(rule_results) | set(llm_results)
|
||||
for key in sorted(keys):
|
||||
rule = rule_results.get(key) or {}
|
||||
llm = llm_results.get(key) or {}
|
||||
rule_value = str(rule.get("value") or "").strip()
|
||||
llm_value = str(llm.get("value") or "").strip()
|
||||
label = str(rule.get("label") or llm.get("label") or REQUIRED_FIELDS.get(key) or key)
|
||||
if rule_value and llm_value and rule_value != llm_value:
|
||||
field = MergedField(
|
||||
key=key,
|
||||
label=label,
|
||||
value=rule_value,
|
||||
source="rule_conflict",
|
||||
evidence=str(rule.get("evidence") or ""),
|
||||
confidence=float(rule.get("confidence") or 0.0),
|
||||
highlight_reason="conflict",
|
||||
needs_review=True,
|
||||
rule_value=rule_value,
|
||||
llm_value=llm_value,
|
||||
)
|
||||
conflict_fields.append(
|
||||
{
|
||||
"field_key": key,
|
||||
"field_label": label,
|
||||
"rule_value": rule_value,
|
||||
"llm_value": llm_value,
|
||||
"selected_value": rule_value,
|
||||
"handling": "规则优先,写入值高亮并进入追溯清单",
|
||||
}
|
||||
)
|
||||
elif rule_value:
|
||||
field = MergedField(
|
||||
key=key,
|
||||
label=label,
|
||||
value=rule_value,
|
||||
source="rule",
|
||||
evidence=str(rule.get("evidence") or ""),
|
||||
confidence=float(rule.get("confidence") or 0.0),
|
||||
)
|
||||
elif llm_value:
|
||||
field = MergedField(
|
||||
key=key,
|
||||
label=label,
|
||||
value=llm_value,
|
||||
source="llm",
|
||||
evidence=str(llm.get("evidence") or ""),
|
||||
confidence=float(llm.get("confidence") or 0.0),
|
||||
highlight_reason="llm_only",
|
||||
needs_review=True,
|
||||
llm_value=llm_value,
|
||||
)
|
||||
llm_only_fields.append(_review_dict(field))
|
||||
else:
|
||||
field = MergedField(
|
||||
key=key,
|
||||
label=label,
|
||||
value="/",
|
||||
source="missing",
|
||||
evidence="",
|
||||
confidence=0.0,
|
||||
highlight_reason="missing",
|
||||
needs_review=True,
|
||||
)
|
||||
missing_fields.append(_review_dict(field))
|
||||
merged[key] = field
|
||||
return merged, {
|
||||
"missing_fields": missing_fields,
|
||||
"llm_only_fields": llm_only_fields,
|
||||
"conflict_fields": conflict_fields,
|
||||
}
|
||||
|
||||
|
||||
def save_merged_fields(path: str | Path, merged: dict[str, MergedField], summary: dict[str, list[dict]]) -> Path:
|
||||
target = Path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"fields": {key: field.__dict__ for key, field in merged.items()},
|
||||
**summary,
|
||||
}
|
||||
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
return target
|
||||
|
||||
|
||||
def _review_dict(field: MergedField) -> dict:
|
||||
return {
|
||||
"target_file": "",
|
||||
"field_key": field.key,
|
||||
"field_label": field.label,
|
||||
"final_value": field.value,
|
||||
"highlight_reason": field.highlight_reason,
|
||||
"needs_review": field.needs_review,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user