fix(application-form-fill): 代理人字段暂用生产企业信息
This commit is contained in:
@@ -17,6 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
FIELD_ALIASES = {
|
||||
"product_name": ["产品名称"],
|
||||
"applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"],
|
||||
"applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"],
|
||||
"manufacturer_address": ["生产地址", "生产企业地址", "生产场所"],
|
||||
"agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"],
|
||||
"agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"],
|
||||
"package_specification": ["包装规格", "规格"],
|
||||
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
||||
"intended_use": ["预期用途"],
|
||||
@@ -41,7 +46,7 @@ def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||
fields: list[dict[str, Any]] = []
|
||||
field_defs = _field_defs(specs)
|
||||
labels = [field["label"] for field in field_defs if field.get("label")]
|
||||
labels = _all_field_labels(field_defs)
|
||||
for file_name, text in texts.items():
|
||||
source_role = detect_source_role(file_name, text)
|
||||
for field in field_defs:
|
||||
@@ -174,6 +179,15 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
|
||||
return result
|
||||
|
||||
|
||||
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||
labels: list[str] = []
|
||||
for field in fields:
|
||||
for label in _field_aliases(field):
|
||||
if label not in labels:
|
||||
labels.append(label)
|
||||
return labels
|
||||
|
||||
|
||||
def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||
return _extract_colon_label_value(text, label, labels)
|
||||
|
||||
|
||||
@@ -81,8 +81,30 @@ def merge_fields(regex_results: dict[str, Any], llm_results: dict[str, Any]) ->
|
||||
"handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段",
|
||||
}
|
||||
)
|
||||
_apply_agent_company_fallbacks(merged)
|
||||
return merged, conflicts
|
||||
|
||||
|
||||
def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]:
|
||||
return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")}
|
||||
|
||||
|
||||
def _apply_agent_company_fallbacks(merged: dict[str, MergedField]) -> None:
|
||||
fallback_pairs = {
|
||||
"agent_name": ("applicant_name", "代理人名称"),
|
||||
"agent_address": ("applicant_address", "代理人住所"),
|
||||
}
|
||||
for target_key, (source_key, target_label) in fallback_pairs.items():
|
||||
if target_key in merged or source_key not in merged:
|
||||
continue
|
||||
source = merged[source_key]
|
||||
merged[target_key] = MergedField(
|
||||
key=target_key,
|
||||
label=target_label,
|
||||
value=source.value,
|
||||
source_file=source.source_file,
|
||||
evidence=source.evidence,
|
||||
confidence=source.confidence,
|
||||
has_conflict=source.has_conflict,
|
||||
conflict_values=source.conflict_values,
|
||||
)
|
||||
|
||||
@@ -36,6 +36,24 @@ templates:
|
||||
source_roles:
|
||||
- 申请表
|
||||
- 质量管理体系文件
|
||||
- key: agent_name
|
||||
label: 代理人名称
|
||||
target:
|
||||
type: table_row
|
||||
row_label: 代理人名称
|
||||
source_roles:
|
||||
- 说明书
|
||||
- 企业信息
|
||||
- 申请表
|
||||
- key: agent_address
|
||||
label: 代理人住所
|
||||
target:
|
||||
type: table_row
|
||||
row_label: 代理人住所
|
||||
source_roles:
|
||||
- 说明书
|
||||
- 企业信息
|
||||
- 申请表
|
||||
- key: product_name
|
||||
label: 产品名称
|
||||
target:
|
||||
|
||||
@@ -84,6 +84,25 @@ def test_rule_extracts_bracket_sections_from_instructions():
|
||||
assert "-20±5℃" in values["storage_condition_and_validity"]
|
||||
|
||||
|
||||
def test_rule_maps_agent_fields_to_manufacturer_company_for_now():
|
||||
texts = {
|
||||
"目标产品说明书.docx": "\n".join(
|
||||
[
|
||||
"生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||
"生产企业住所:江苏省宜兴经济技术开发区杏里路10号",
|
||||
"生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
result = extract_by_rules(texts, _registration_specs())
|
||||
|
||||
values = {field["key"]: field["value"] for field in result["fields"]}
|
||||
assert values["agent_name"] == "卡尤迪生物科技宜兴有限公司"
|
||||
assert values["agent_address"] == "江苏省宜兴经济技术开发区杏里路10号"
|
||||
assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||||
|
||||
|
||||
def test_llm_extract_parses_structured_json(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
||||
|
||||
@@ -77,3 +77,35 @@ def test_merge_fields_combines_consistent_values_without_conflict():
|
||||
assert merged["product_name"].value == "甲胎蛋白检测试剂盒"
|
||||
assert merged["product_name"].has_conflict is False
|
||||
assert conflicts == []
|
||||
|
||||
|
||||
def test_merge_fields_fills_agent_from_applicant_for_now():
|
||||
regex_results = {
|
||||
"fields": [
|
||||
{
|
||||
"key": "applicant_name",
|
||||
"label": "注册人名称",
|
||||
"value": "卡尤迪生物科技宜兴有限公司",
|
||||
"source_file": "目标产品说明书.docx",
|
||||
"source_role": "说明书",
|
||||
"evidence": "生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||
"confidence": 0.75,
|
||||
},
|
||||
{
|
||||
"key": "applicant_address",
|
||||
"label": "注册人住所",
|
||||
"value": "江苏省宜兴经济技术开发区杏里路10号",
|
||||
"source_file": "目标产品说明书.docx",
|
||||
"source_role": "说明书",
|
||||
"evidence": "生产企业住所:江苏省宜兴经济技术开发区杏里路10号",
|
||||
"confidence": 0.75,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
merged, conflicts = merge_fields(regex_results, {"fields": []})
|
||||
|
||||
assert merged["agent_name"].value == "卡尤迪生物科技宜兴有限公司"
|
||||
assert merged["agent_name"].label == "代理人名称"
|
||||
assert merged["agent_address"].value == "江苏省宜兴经济技术开发区杏里路10号"
|
||||
assert conflicts == []
|
||||
|
||||
Reference in New Issue
Block a user