From 003ff592686862c9c1f14e60a675525b46568adb Mon Sep 17 00:00:00 2001 From: bruce Date: Sun, 7 Jun 2026 20:34:24 +0800 Subject: [PATCH] =?UTF-8?q?fix(application-form-fill):=20=E8=BF=87?= =?UTF-8?q?=E6=BB=A4=E7=94=B3=E8=AF=B7=E8=A1=A8=E5=99=AA=E5=A3=B0=E5=86=B2?= =?UTF-8?q?=E7=AA=81=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../services/field_extract.py | 16 ++++++-- .../application_form_fill/services/summary.py | 12 +++++- ...est_application_form_fill_field_extract.py | 20 ++++++++++ tests/test_application_form_fill_summary.py | 39 +++++++++++++++++++ 4 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 tests/test_application_form_fill_summary.py diff --git a/review_agent/application_form_fill/services/field_extract.py b/review_agent/application_form_fill/services/field_extract.py index 7bb636f..35207da 100644 --- a/review_agent/application_form_fill/services/field_extract.py +++ b/review_agent/application_form_fill/services/field_extract.py @@ -17,11 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text FIELD_ALIASES = { "product_name": ["产品名称"], - "applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"], - "applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"], + "applicant_name": ["注册人名称", "申请人名称", "生产企业名称"], + "applicant_address": ["注册人住所", "申请人住所", "生产企业住所"], "manufacturer_address": ["生产地址", "生产企业地址", "生产场所"], - "agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"], - "agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"], + "agent_name": ["代理人名称", "生产企业名称", "注册人名称", "申请人名称"], + "agent_address": ["代理人住所", "生产企业住所", "注册人住所", "申请人住所"], "package_specification": ["包装规格", "规格"], "main_components": ["主要组成成分", "主要组成", "组成成分"], "intended_use": ["预期用途"], @@ -35,6 +35,14 @@ STATIC_STOP_LABELS = [ "注", "保证书", "应附资料", + "优先通道申请", + "分类编码", + "医疗器械唯一标识", + "注册产品目前是否", + "临床评价路径", + "临床试验", + "其他需要说明的问题", + "国家药监局器审中心医疗器械", ] diff --git a/review_agent/application_form_fill/services/summary.py b/review_agent/application_form_fill/services/summary.py index 7501d7b..bb4d663 100644 --- a/review_agent/application_form_fill/services/summary.py +++ b/review_agent/application_form_fill/services/summary.py @@ -22,10 +22,11 @@ def build_assistant_summary(batch: ApplicationFormFillBatch, exports: list[Expor lines.extend(["", "| 冲突字段 | 采用值 | 冲突来源 | 处理 |", "| --- | --- | --- | --- |"]) for item in conflicts: conflict_sources = ";".join( - f"{value.get('source_file', '')}:{value.get('value', '')}" for value in item.get("conflict_values", []) + f"{_compact_table_text(value.get('source_file', ''))}:{_compact_table_text(value.get('value', ''))}" + for value in item.get("conflict_values", []) ) lines.append( - f"| {item.get('field_label', item.get('field_key', ''))} | {item.get('selected_value', '')} | {conflict_sources or '-'} | {item.get('handling', '')} |" + f"| {_compact_table_text(item.get('field_label', item.get('field_key', '')))} | {_compact_table_text(item.get('selected_value', ''))} | {_compact_table_text(conflict_sources or '-')} | {_compact_table_text(item.get('handling', ''))} |" ) if trace_exports: @@ -33,3 +34,10 @@ def build_assistant_summary(batch: ApplicationFormFillBatch, exports: list[Expor for export in trace_exports: lines.append(f"[下载{export.file_name}](/api/review-agent/file-summary/exports/{export.pk}/download/)") return "\n".join(lines).strip() + + +def _compact_table_text(value: object, *, limit: int = 80) -> str: + text = " ".join(str(value or "").replace("|", " ").split()) + if len(text) <= limit: + return text + return f"{text[:limit]}..." diff --git a/tests/test_application_form_fill_field_extract.py b/tests/test_application_form_fill_field_extract.py index 28f020b..4b1494b 100644 --- a/tests/test_application_form_fill_field_extract.py +++ b/tests/test_application_form_fill_field_extract.py @@ -124,6 +124,26 @@ def test_rule_stops_product_name_before_application_form_instructions(): assert "填表说明" not in values["product_name"] +def test_rule_ignores_generic_enterprise_name_from_application_form(): + texts = { + "CH1.4 申请表.docx": "\n".join( + [ + "注册人制度\t是 企业名称:否", + "优先通道申请 应急通道 同品种首个产品首次申报", + "临床试验", + "临床试验机构名称: 中国医学科学院北京协和医院、晋中市第一人民医院", + "应附资料", + ] + ) + } + + result = extract_by_rules(texts, _registration_specs()) + + values = {field["key"]: field["value"] for field in result["fields"]} + assert "applicant_name" not in values + assert "agent_name" not in values + + def test_llm_extract_parses_structured_json(monkeypatch): monkeypatch.setattr( "review_agent.application_form_fill.services.field_extract.generate_completion", diff --git a/tests/test_application_form_fill_summary.py b/tests/test_application_form_fill_summary.py new file mode 100644 index 0000000..b9d66d2 --- /dev/null +++ b/tests/test_application_form_fill_summary.py @@ -0,0 +1,39 @@ +import pytest + +from review_agent.application_form_fill.services.summary import build_assistant_summary +from review_agent.models import ApplicationFormFillBatch, Conversation, FileSummaryBatch + + +pytestmark = pytest.mark.django_db + + +def test_assistant_summary_compacts_long_conflict_values(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-SUMMARY") + batch = ApplicationFormFillBatch.objects.create( + conversation=conversation, + user=user, + source_summary_batch=summary, + batch_no="AFF-SUMMARY", + conflict_summary=[ + { + "field_key": "applicant_name", + "field_label": "注册人名称", + "selected_value": "卡尤迪生物科技宜兴有限公司", + "conflict_values": [ + { + "source_file": "CH1.4 申请表.docx", + "value": "否\n临床试验\n临床试验机构名称: 中国医学科学院北京协和医院、晋中市第一人民医院、北京市疾病预防控制中心 临床数据库.zip\n应附资料", + } + ], + "handling": "说明书优先,模板内黄底红字高亮", + } + ], + ) + + content = build_assistant_summary(batch, []) + + assert "临床试验机构名称" in content + assert len([line for line in content.splitlines() if "临床试验机构名称" in line][0]) < 220 + assert "\n临床试验\n" not in content