diff --git a/review_agent/application_form_fill/services/field_extract.py b/review_agent/application_form_fill/services/field_extract.py index 6f40833..82650a0 100644 --- a/review_agent/application_form_fill/services/field_extract.py +++ b/review_agent/application_form_fill/services/field_extract.py @@ -17,6 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text FIELD_ALIASES = { "product_name": ["产品名称"], + "applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"], + "applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"], + "manufacturer_address": ["生产地址", "生产企业地址", "生产场所"], + "agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"], + "agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"], "package_specification": ["包装规格", "规格"], "main_components": ["主要组成成分", "主要组成", "组成成分"], "intended_use": ["预期用途"], @@ -41,7 +46,7 @@ def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]: def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]: fields: list[dict[str, Any]] = [] field_defs = _field_defs(specs) - labels = [field["label"] for field in field_defs if field.get("label")] + labels = _all_field_labels(field_defs) for file_name, text in texts.items(): source_role = detect_source_role(file_name, text) for field in field_defs: @@ -174,6 +179,15 @@ def _field_aliases(field: dict[str, str]) -> list[str]: return result +def _all_field_labels(fields: list[dict[str, str]]) -> list[str]: + labels: list[str] = [] + for field in fields: + for label in _field_aliases(field): + if label not in labels: + labels.append(label) + return labels + + def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]: return _extract_colon_label_value(text, label, labels) diff --git a/review_agent/application_form_fill/services/field_merge.py b/review_agent/application_form_fill/services/field_merge.py index b6c858a..bbc9eb5 100644 --- a/review_agent/application_form_fill/services/field_merge.py +++ b/review_agent/application_form_fill/services/field_merge.py @@ -81,8 +81,30 @@ def merge_fields(regex_results: dict[str, Any], llm_results: dict[str, Any]) -> "handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段", } ) + _apply_agent_company_fallbacks(merged) return merged, conflicts def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]: return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")} + + +def _apply_agent_company_fallbacks(merged: dict[str, MergedField]) -> None: + fallback_pairs = { + "agent_name": ("applicant_name", "代理人名称"), + "agent_address": ("applicant_address", "代理人住所"), + } + for target_key, (source_key, target_label) in fallback_pairs.items(): + if target_key in merged or source_key not in merged: + continue + source = merged[source_key] + merged[target_key] = MergedField( + key=target_key, + label=target_label, + value=source.value, + source_file=source.source_file, + evidence=source.evidence, + confidence=source.confidence, + has_conflict=source.has_conflict, + conflict_values=source.conflict_values, + ) diff --git a/review_agent/application_form_fill/templates/application_form_templates_v1.yaml b/review_agent/application_form_fill/templates/application_form_templates_v1.yaml index 9b106d7..75ef0a5 100644 --- a/review_agent/application_form_fill/templates/application_form_templates_v1.yaml +++ b/review_agent/application_form_fill/templates/application_form_templates_v1.yaml @@ -36,6 +36,24 @@ templates: source_roles: - 申请表 - 质量管理体系文件 + - key: agent_name + label: 代理人名称 + target: + type: table_row + row_label: 代理人名称 + source_roles: + - 说明书 + - 企业信息 + - 申请表 + - key: agent_address + label: 代理人住所 + target: + type: table_row + row_label: 代理人住所 + source_roles: + - 说明书 + - 企业信息 + - 申请表 - key: product_name label: 产品名称 target: diff --git a/tests/test_application_form_fill_field_extract.py b/tests/test_application_form_fill_field_extract.py index 2ceea79..b1e2b01 100644 --- a/tests/test_application_form_fill_field_extract.py +++ b/tests/test_application_form_fill_field_extract.py @@ -84,6 +84,25 @@ def test_rule_extracts_bracket_sections_from_instructions(): assert "-20±5℃" in values["storage_condition_and_validity"] +def test_rule_maps_agent_fields_to_manufacturer_company_for_now(): + texts = { + "目标产品说明书.docx": "\n".join( + [ + "生产企业名称:卡尤迪生物科技宜兴有限公司", + "生产企业住所:江苏省宜兴经济技术开发区杏里路10号", + "生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室", + ] + ) + } + + result = extract_by_rules(texts, _registration_specs()) + + values = {field["key"]: field["value"] for field in result["fields"]} + assert values["agent_name"] == "卡尤迪生物科技宜兴有限公司" + assert values["agent_address"] == "江苏省宜兴经济技术开发区杏里路10号" + assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室" + + def test_llm_extract_parses_structured_json(monkeypatch): monkeypatch.setattr( "review_agent.application_form_fill.services.field_extract.generate_completion", diff --git a/tests/test_application_form_fill_field_merge.py b/tests/test_application_form_fill_field_merge.py index a449ad6..261f612 100644 --- a/tests/test_application_form_fill_field_merge.py +++ b/tests/test_application_form_fill_field_merge.py @@ -77,3 +77,35 @@ def test_merge_fields_combines_consistent_values_without_conflict(): assert merged["product_name"].value == "甲胎蛋白检测试剂盒" assert merged["product_name"].has_conflict is False assert conflicts == [] + + +def test_merge_fields_fills_agent_from_applicant_for_now(): + regex_results = { + "fields": [ + { + "key": "applicant_name", + "label": "注册人名称", + "value": "卡尤迪生物科技宜兴有限公司", + "source_file": "目标产品说明书.docx", + "source_role": "说明书", + "evidence": "生产企业名称:卡尤迪生物科技宜兴有限公司", + "confidence": 0.75, + }, + { + "key": "applicant_address", + "label": "注册人住所", + "value": "江苏省宜兴经济技术开发区杏里路10号", + "source_file": "目标产品说明书.docx", + "source_role": "说明书", + "evidence": "生产企业住所:江苏省宜兴经济技术开发区杏里路10号", + "confidence": 0.75, + }, + ] + } + + merged, conflicts = merge_fields(regex_results, {"fields": []}) + + assert merged["agent_name"].value == "卡尤迪生物科技宜兴有限公司" + assert merged["agent_name"].label == "代理人名称" + assert merged["agent_address"].value == "江苏省宜兴经济技术开发区杏里路10号" + assert conflicts == []