From d640ced7488f41e96c9b4d09f8406b8b2b4c7d23 Mon Sep 17 00:00:00 2001
From: bruce <sunzhiye01@outlook.com>
Date: Sun, 7 Jun 2026 20:26:32 +0800
Subject: [PATCH] =?UTF-8?q?fix(application-form-fill):=20=E6=B8=85?=
 =?UTF-8?q?=E7=90=86=E5=A1=AB=E8=A1=A8=E8=AF=B4=E6=98=8E=E5=B9=B6=E6=94=B6?=
 =?UTF-8?q?=E7=AA=84=E6=8C=89=E9=92=AE=E8=AF=9D=E6=9C=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../services/field_extract.py                 | 13 ++++++--
 .../services/word_fill.py                     | 29 +++++++++++++++++
 templates/home.html                           |  2 +-
 ...est_application_form_fill_field_extract.py | 21 ++++++++++++
 tests/test_application_form_fill_word_fill.py | 32 +++++++++++++++++++
 tests/test_file_summary_frontend.py           |  3 +-
 6 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/review_agent/application_form_fill/services/field_extract.py b/review_agent/application_form_fill/services/field_extract.py
index 82650a0..7bb636f 100644
--- a/review_agent/application_form_fill/services/field_extract.py
+++ b/review_agent/application_form_fill/services/field_extract.py
@@ -28,6 +28,15 @@ FIELD_ALIASES = {
     "storage_condition_and_validity": ["产品储存条件及有效期", "储存条件及有效期", "储存条件", "有效期"],
 }
 
+STATIC_STOP_LABELS = [
+    "申请人",
+    "国家药品监督管理局",
+    "填表说明",
+    "注",
+    "保证书",
+    "应附资料",
+]
+
 
 def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
     texts: dict[str, str] = {}
@@ -180,7 +189,7 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
 
 
 def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
-    labels: list[str] = []
+    labels: list[str] = list(STATIC_STOP_LABELS)
     for field in fields:
         for label in _field_aliases(field):
             if label not in labels:
@@ -194,7 +203,7 @@ def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str,
 
 def _extract_colon_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
     escaped_labels = "|".join(re.escape(item) for item in labels if item != label)
-    stop_pattern = rf"(?=\n\s*(?:{escaped_labels})\s*[:：])" if escaped_labels else r"(?=\Z)"
+    stop_pattern = rf"(?=\n\s*(?:{escaped_labels})(?:\s*[:：]|\s*$))" if escaped_labels else r"(?=\Z)"
     pattern = re.compile(rf"{re.escape(label)}\s*[:：]\s*(.+?)(?:{stop_pattern}|\Z)", re.S)
     match = pattern.search(text or "")
     if not match:
diff --git a/review_agent/application_form_fill/services/word_fill.py b/review_agent/application_form_fill/services/word_fill.py
index 195b918..9a6e11a 100644
--- a/review_agent/application_form_fill/services/word_fill.py
+++ b/review_agent/application_form_fill/services/word_fill.py
@@ -22,6 +22,7 @@ def fill_template(
     conflicts: list[dict] | None = None,
 ) -> Path:
     document = Document(str(template_path))
+    remove_fill_instructions(document)
     conflict_keys = {item.get("field_key") for item in conflicts or []}
     for field_config in spec.fields:
         target = field_config.get("target") or {}
@@ -43,6 +44,25 @@ def fill_template(
     return output
 
 
+def remove_fill_instructions(document: Document) -> None:
+    removing = False
+    for paragraph in list(document.paragraphs):
+        text = _normalize_label(paragraph.text)
+        if text == "填表说明":
+            removing = True
+        if removing:
+            _remove_paragraph(paragraph)
+            continue
+        if text.startswith("注填表前") and "填表说明" in text:
+            _remove_paragraph(paragraph)
+
+    for table in document.tables:
+        for row in list(table.rows):
+            row_text = _normalize_label("".join(cell.text for cell in row.cells))
+            if row_text == "填表说明" or row_text.startswith("注填表前"):
+                _remove_row(row)
+
+
 def fill_table_row(document: Document, row_label: str, value: str, *, conflict: bool = False) -> bool:
     normalized_label = _normalize_label(row_label)
     for table in document.tables:
@@ -71,6 +91,15 @@ def apply_cell_shading(cell, fill: str) -> None:
     shading.set(qn("w:fill"), fill)
 
 
+def _remove_paragraph(paragraph) -> None:
+    element = paragraph._element
+    element.getparent().remove(element)
+
+
+def _remove_row(row) -> None:
+    row._tr.getparent().remove(row._tr)
+
+
 def create_word_export(
     batch: ApplicationFormFillBatch,
     spec: TemplateSpec,
diff --git a/templates/home.html b/templates/home.html
index 38fa136..ef75d33 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -211,7 +211,7 @@
               <button
                 class="tool-chip"
                 type="button"
-                data-prompt-template="请基于当前对话最近成功汇总的产品资料，自动提取产品关键信息并填入申报文件模板，优先生成注册证 Word 和字段来源追溯清单。"
+                data-prompt-template="请基于当前对话最近成功汇总的产品资料，自动提取产品关键信息并填入申报文件模板"
               >申报文件填表</button>
             </div>
             <button class="send-button" type="submit" id="sendButton">发送</button>
diff --git a/tests/test_application_form_fill_field_extract.py b/tests/test_application_form_fill_field_extract.py
index b1e2b01..28f020b 100644
--- a/tests/test_application_form_fill_field_extract.py
+++ b/tests/test_application_form_fill_field_extract.py
@@ -103,6 +103,27 @@ def test_rule_maps_agent_fields_to_manufacturer_company_for_now():
     assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
 
 
+def test_rule_stops_product_name_before_application_form_instructions():
+    texts = {
+        "境内体外诊断试剂注册申请表.docx": "\n".join(
+            [
+                "产品名称：呼吸道合胞病毒、肺炎支原体核酸检测试剂盒（荧光PCR法）",
+                "申请人：",
+                "卡尤迪生物科技宜兴有限公司",
+                "国家药品监督管理局",
+                "填表说明",
+                "1. 本表依据《体外诊断注册与备案管理办法》制定。",
+            ]
+        )
+    }
+
+    result = extract_by_rules(texts, _registration_specs())
+
+    values = {field["key"]: field["value"] for field in result["fields"]}
+    assert values["product_name"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒（荧光PCR法）"
+    assert "填表说明" not in values["product_name"]
+
+
 def test_llm_extract_parses_structured_json(monkeypatch):
     monkeypatch.setattr(
         "review_agent.application_form_fill.services.field_extract.generate_completion",
diff --git a/tests/test_application_form_fill_word_fill.py b/tests/test_application_form_fill_word_fill.py
index 04c918f..2708e5d 100644
--- a/tests/test_application_form_fill_word_fill.py
+++ b/tests/test_application_form_fill_word_fill.py
@@ -41,6 +41,17 @@ def _template(path):
     document.save(path)
 
 
+def _template_with_instructions(path):
+    document = Document()
+    table = document.add_table(rows=2, cols=2)
+    table.rows[0].cells[0].text = "产品名称"
+    table.rows[1].cells[0].text = "预期用途"
+    document.add_paragraph("填表说明")
+    document.add_paragraph("1. 本表依据《体外诊断注册与备案管理办法》制定。")
+    document.add_paragraph("2. 本表可从国家药品监督管理局网站下载。")
+    document.save(path)
+
+
 def test_word_fill_writes_table_rows(tmp_path):
     template_path = tmp_path / "template.docx"
     output_path = tmp_path / "filled.docx"
@@ -61,6 +72,27 @@ def test_word_fill_writes_table_rows(tmp_path):
     assert document.tables[0].rows[1].cells[1].text == "用于体外检测"
 
 
+def test_word_fill_removes_template_fill_instructions(tmp_path):
+    template_path = tmp_path / "template.docx"
+    output_path = tmp_path / "filled.docx"
+    _template_with_instructions(template_path)
+
+    fill_template(
+        template_path,
+        output_path,
+        _spec(),
+        {
+            "product_name": MergedField("product_name", "产品名称", "甲胎蛋白检测试剂盒", "说明书.txt", "证据", 0.8),
+        },
+    )
+
+    document = Document(output_path)
+    text = "\n".join(paragraph.text for paragraph in document.paragraphs)
+    assert "填表说明" not in text
+    assert "本表依据" not in text
+    assert document.tables[0].rows[0].cells[1].text == "甲胎蛋白检测试剂盒"
+
+
 def test_word_fill_highlights_conflict_in_docx_xml(tmp_path):
     template_path = tmp_path / "template.docx"
     output_path = tmp_path / "filled.docx"
diff --git a/tests/test_file_summary_frontend.py b/tests/test_file_summary_frontend.py
index 5481f5b..1355619 100644
--- a/tests/test_file_summary_frontend.py
+++ b/tests/test_file_summary_frontend.py
@@ -251,6 +251,7 @@ def test_workspace_tool_buttons_fill_default_prompts(client, django_user_model):
     assert ">风险预警</button>" not in content
     assert 'data-prompt-template="请对当前对话已上传的文件或压缩包自动汇总文件目录' in content
     assert 'data-prompt-template="请对当前对话最近成功汇总的注册资料发起 NMPA 法规核查与风险预警' in content
-    assert 'data-prompt-template="请基于当前对话最近成功汇总的产品资料，自动提取产品关键信息并填入申报文件模板' in content
+    assert 'data-prompt-template="请基于当前对话最近成功汇总的产品资料，自动提取产品关键信息并填入申报文件模板"' in content
+    assert "优先生成注册证 Word 和字段来源追溯清单" not in content
     assert "bindPromptTemplateButtons" in script
     assert "promptInput.value = template" in script