test(regulatory-info-package): 补充模板生成回归覆盖

2026-06-10 23:56:51 +08:00
parent 1bf8634373
commit 9c6cad481c
4 changed files with 200 additions and 24 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,8 @@
 import pytest
@pytest.fixture(autouse=True)
 def mock_regulatory_info_package_page_count(monkeypatch):
    from review_agent.regulatory_info_package.services import package_generate
    monkeypatch.setattr(package_generate, "count_document_pages", lambda _path: 1)
--- a/tests/test_regulatory_info_package_field_extract.py
+++ b/tests/test_regulatory_info_package_field_extract.py
@@ -1,3 +1,5 @@
 import json
 from review_agent.regulatory_info_package.schemas import InstructionExtractResult
 from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
@@ -18,6 +20,57 @@ def test_extract_fields_by_rules_finds_product_name_and_storage():
    assert result["storage_condition"]["value"] == "2-8℃保存"
 def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant():
    instruction = InstructionExtractResult(
        source_file_name="目标产品说明书.docx",
        paragraphs=[
            "注册人/售后服务单位名称：卡尤迪生物科技宜兴有限公司",
            "生产企业名称：卡尤迪生物科技宜兴有限公司",
            "生产企业住所：宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室",
            "联系方式： 0510-80330909, 0510-80330919",
            "生产地址：江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
        ],
        sections={},
        tables=[],
        component_tables=[],
        front_text="",
    )
    result = extract_fields_by_rules(instruction)
    assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
    assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
    assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室"
    assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919"
    assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
 def test_extract_fields_by_rules_serializes_component_table_and_notes():
    instruction = InstructionExtractResult(
        source_file_name="目标产品说明书.docx",
        paragraphs=[],
        sections={"【主要组成成分】": "表1  规格A大包装试剂盒组成成分\n注：不同批号试剂盒中各组分不得互换使用。"},
        tables=[],
        component_tables=[
            {
                "header": ["组分", "主要组成成分", "规格（24人份/盒）", "规格（48人份/盒）"],
                "rows": [
                    ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
                    ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
                ],
            }
        ],
        front_text="",
    )
    result = extract_fields_by_rules(instruction)
    payload = json.loads(result["component_table"]["value"])
    assert payload["header"][0:2] == ["组分", "主要组成成分"]
    assert payload["rows"][0][0] == "PCR反应液 I"
    assert result["component_notes"]["value"] == "表1  规格A大包装试剂盒组成成分\n注：不同批号试剂盒中各组分不得互换使用。"
 def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
    instruction = InstructionExtractResult(
        source_file_name="目标产品说明书.docx",
@@ -33,4 +86,3 @@ def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
    assert result["regex_results"]["product_name"]["value"] == "测试产品"
    assert result["llm_results"] == {}
    assert result["llm_error"]
--- a/tests/test_regulatory_info_package_package_generate.py
+++ b/tests/test_regulatory_info_package_package_generate.py
@@ -1,10 +1,13 @@
 import json
 import pytest
 from docx import Document
 from pathlib import Path
 from django.conf import settings
 from django.utils import timezone
 from review_agent.models import Conversation, RegulatoryInfoPackageBatch
 from review_agent.regulatory_info_package.services.field_merge import merge_fields
 from review_agent.regulatory_info_package.services import package_generate
 from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
 from review_agent.regulatory_info_package.services.template_config import load_template_config
@@ -18,7 +21,7 @@ def test_template_config_uses_clean_internal_templates():
    assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
    assert source_dir.exists()
-    assert len(config["templates"]) == 7
+    assert len(config["templates"]) == 6
    assert all((source_dir / item["source_file"]).exists() for item in config["templates"])
@@ -26,13 +29,12 @@ def test_clean_templates_expose_stable_fill_placeholders():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    expected_by_code = {
-        "ch1_2_directory": {"{{product_name}}", "{{applicant_name}}"},
+        "ch1_2_directory": {"{{product_name}}"},
        "ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
-        "ch1_5_product_list": {"{{product_name}}", "{{package_specification}}"},
+        "ch1_5_product_list": {"{{product_name}}"},
-        "ch1_9_pre_submission": {"{{product_name}}", "{{applicant_name}}"},
+        "ch1_11_1_standards": {"{{product_name}}"},
-        "ch1_11_1_standards": {"{{standard_no}}", "{{product_name}}"},
+        "ch1_11_5_authenticity": {"{{product_name}}"},
-        "ch1_11_5_authenticity": {"{{product_name}}", "{{applicant_name}}"},
+        "ch1_11_6_conformity": {"{{product_name}}"},
        "ch1_11_6_conformity": {"{{product_name}}", "{{applicant_name}}"},
    }
    for item in config["templates"]:
@@ -42,7 +44,29 @@ def test_clean_templates_expose_stable_fill_placeholders():
            assert placeholder in text
-def test_generate_package_documents_creates_seven_results(django_user_model, tmp_path):
+def test_directory_template_includes_page_numbers():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory")
    document = Document(source_dir / item["source_file"])
    page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]]
    assert page_numbers == ["1", "1", "1", "1", "1", "1"]
 def test_application_form_template_uses_real_checkbox_symbols():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form")
    text = _document_text(Document(source_dir / item["source_file"]))
    assert "{{复选框}}" not in text
    assert "{{}}" not in text
    assert "☐" in text
    assert "☑" in text
 def test_generate_package_documents_creates_six_results(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
@@ -55,14 +79,55 @@ def test_generate_package_documents_creates_seven_results(django_user_model, tmp
    results = generate_package_documents(batch, load_template_config(), merged)
-    assert len(results) == 7
+    assert len(results) == 6
    assert all(result.status in {"success", "fallback_success"} for result in results), [
        (result.template_code, result.status, result.error_message) for result in results
    ]
    assert all(result.path for result in results)
-def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tmp_path):
+def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154010-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
    page_counts = {
        "CH1.4 申请表.docx": 3,
        "CH1.5 产品列表.docx": 5,
        "CH1.11.1 符合标准的清单.docx": 2,
        "CH1.11.5 真实性声明.docx": 4,
        "CH1.11.6 符合性声明.docx": 6,
    }
    counted_files = []
    def fake_count(path):
        counted_files.append(Path(path).name)
        return page_counts[Path(path).name]
    monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False)
    results = generate_package_documents(batch, load_template_config(), merged)
    assert results[-1].template_code == "ch1_2_directory"
    assert set(counted_files) == set(page_counts)
    directory = Document(results[-1].path)
    directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]}
    assert directory_pages == {
        "CH1.2": "1",
        "CH1.4": "3",
        "CH1.5": "5",
        "CH1.11.1": "2",
        "CH1.11.5": "4",
        "CH1.11.6": "6",
    }
 def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
@@ -74,12 +139,15 @@ def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tm
    merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
    results = generate_package_documents(batch, load_template_config(), merged)
-    docx_result = next(result for result in results if result.template_code == "ch1_2_directory")
+    for result in results:
-    document = Document(docx_result.path)
+        document = Document(result.path)
-    first_text = "\n".join(paragraph.text for paragraph in document.paragraphs[:8])
+        text = _document_text(document)
-    assert "预生成版" in first_text
+        assert "预生成版" not in text
-    assert "测试产品" in first_text
+        assert "预生成字段" not in text
        assert "component_table" not in text
        assert '"header"' not in text
        assert "测试产品" in text
 def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
@@ -141,13 +209,18 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
        result = next(item for item in results if item.template_code == code)
        text = _document_text(Document(result.path))
        assert "甲型流感病毒核酸检测试剂盒" in text
-        assert "星河医疗科技有限公司" in text
+        if code == "ch1_4_application_form":
            assert "星河医疗科技有限公司" in text
        assert "{{" not in text
        assert "}}" not in text
-    standards = next(item for item in results if item.template_code == "ch1_11_1_standards")
+    today = timezone.localdate().strftime("%Y年%m月%d日")
-    standards_text = _document_text(Document(standards.path))
+    for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
-    assert "GB/T 29791.1-2013" in standards_text
+        result = next(item for item in results if item.template_code == code)
        text = _document_text(Document(result.path))
        assert today in text
        assert "xxxx年xx月xx日" not in text
        assert "星河医疗科技有限公司" not in text
    product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
    product_text = _document_text(Document(product_list.path))
@@ -155,6 +228,51 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
    assert "48人份/盒" in product_text
 def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154400-abcdef",
        work_dir=str(tmp_path),
    )
    component_payload = {
        "header": ["组分", "主要组成成分", "规格（24人份/盒）", "规格（48人份/盒）"],
        "rows": [
            ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
            ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
        ],
    }
    merged, _summary = merge_fields(
        {
            "product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"},
            "package_specification": {"value": "24人份/盒；48人份/盒", "label": "包装规格"},
            "component_table": {
                "value": json.dumps(component_payload, ensure_ascii=False),
                "label": "主要组成成分",
            },
            "component_notes": {
                "value": "注：不同批号试剂盒中各组分不得互换使用。",
                "label": "主要组成成分备注",
            },
        },
        {},
    )
    results = generate_package_documents(batch, load_template_config(), merged)
    product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
    document = Document(product_list.path)
    text = _document_text(document)
    assert "PCR反应液 I" in text
    assert "840μL/管×1管" in text
    assert "840μL/管×2管" in text
    assert "注：不同批号试剂盒中各组分不得互换使用。" in text
    assert "RSV&MP" not in text
    assert "6018003102" not in text
 def _document_text(document: Document) -> str:
    text = "\n".join(paragraph.text for paragraph in document.paragraphs)
    for table in document.tables:
--- a/tests/test_regulatory_info_package_template_config.py
+++ b/tests/test_regulatory_info_package_template_config.py
@@ -10,17 +10,16 @@ from review_agent.regulatory_info_package.services.template_config import (
 )
-def test_template_config_loads_seven_templates():
+def test_template_config_loads_six_templates():
    config = load_template_config()
    assert config["version"] == "regulatory_info_package_templates_v1"
    assert config["zip_name"] == DEFAULT_ZIP_NAME
-    assert len(config["templates"]) == 7
+    assert len(config["templates"]) == 6
    assert {template["code"] for template in config["templates"]} == {
        "ch1_2_directory",
        "ch1_4_application_form",
        "ch1_5_product_list",
        "ch1_9_pre_submission",
        "ch1_11_1_standards",
        "ch1_11_5_authenticity",
        "ch1_11_6_conformity",
@@ -45,4 +44,3 @@ def test_template_config_sources_exist():
    assert source_dir.exists()
    for template in config["templates"]:
        assert (source_dir / template["source_file"]).exists()