From 9c6cad481cafadaf7c9e327be49c8f05f5348c4b Mon Sep 17 00:00:00 2001 From: bruce Date: Wed, 10 Jun 2026 23:56:51 +0800 Subject: [PATCH] =?UTF-8?q?test(regulatory-info-package):=20=E8=A1=A5?= =?UTF-8?q?=E5=85=85=E6=A8=A1=E6=9D=BF=E7=94=9F=E6=88=90=E5=9B=9E=E5=BD=92?= =?UTF-8?q?=E8=A6=86=E7=9B=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/conftest.py | 8 + ...t_regulatory_info_package_field_extract.py | 54 +++++- ...egulatory_info_package_package_generate.py | 156 +++++++++++++++--- ...regulatory_info_package_template_config.py | 6 +- 4 files changed, 200 insertions(+), 24 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9912414 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +import pytest + + +@pytest.fixture(autouse=True) +def mock_regulatory_info_package_page_count(monkeypatch): + from review_agent.regulatory_info_package.services import package_generate + + monkeypatch.setattr(package_generate, "count_document_pages", lambda _path: 1) diff --git a/tests/test_regulatory_info_package_field_extract.py b/tests/test_regulatory_info_package_field_extract.py index 0d50569..b84754d 100644 --- a/tests/test_regulatory_info_package_field_extract.py +++ b/tests/test_regulatory_info_package_field_extract.py @@ -1,3 +1,5 @@ +import json + from review_agent.regulatory_info_package.schemas import InstructionExtractResult from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract @@ -18,6 +20,57 @@ def test_extract_fields_by_rules_finds_product_name_and_storage(): assert result["storage_condition"]["value"] == "2-8℃保存" +def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant(): + instruction = InstructionExtractResult( + source_file_name="目标产品说明书.docx", + paragraphs=[ + "注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司", + "生产企业名称:卡尤迪生物科技宜兴有限公司", + "生产企业住所:宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室", + "联系方式: 0510-80330909, 0510-80330919", + "生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室", + ], + sections={}, + tables=[], + component_tables=[], + front_text="", + ) + + result = extract_fields_by_rules(instruction) + + assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司" + assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司" + assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室" + assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919" + assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室" + + +def test_extract_fields_by_rules_serializes_component_table_and_notes(): + instruction = InstructionExtractResult( + source_file_name="目标产品说明书.docx", + paragraphs=[], + sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"}, + tables=[], + component_tables=[ + { + "header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"], + "rows": [ + ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"], + ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"], + ], + } + ], + front_text="", + ) + + result = extract_fields_by_rules(instruction) + payload = json.loads(result["component_table"]["value"]) + + assert payload["header"][0:2] == ["组分", "主要组成成分"] + assert payload["rows"][0][0] == "PCR反应液 I" + assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。" + + def test_run_parallel_extract_keeps_rule_result_when_llm_fails(): instruction = InstructionExtractResult( source_file_name="目标产品说明书.docx", @@ -33,4 +86,3 @@ def test_run_parallel_extract_keeps_rule_result_when_llm_fails(): assert result["regex_results"]["product_name"]["value"] == "测试产品" assert result["llm_results"] == {} assert result["llm_error"] - diff --git a/tests/test_regulatory_info_package_package_generate.py b/tests/test_regulatory_info_package_package_generate.py index 6c47560..c1331a9 100644 --- a/tests/test_regulatory_info_package_package_generate.py +++ b/tests/test_regulatory_info_package_package_generate.py @@ -1,10 +1,13 @@ +import json import pytest from docx import Document from pathlib import Path from django.conf import settings +from django.utils import timezone from review_agent.models import Conversation, RegulatoryInfoPackageBatch from review_agent.regulatory_info_package.services.field_merge import merge_fields +from review_agent.regulatory_info_package.services import package_generate from review_agent.regulatory_info_package.services.package_generate import generate_package_documents from review_agent.regulatory_info_package.services.template_config import load_template_config @@ -18,7 +21,7 @@ def test_template_config_uses_clean_internal_templates(): assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean" assert source_dir.exists() - assert len(config["templates"]) == 7 + assert len(config["templates"]) == 6 assert all((source_dir / item["source_file"]).exists() for item in config["templates"]) @@ -26,13 +29,12 @@ def test_clean_templates_expose_stable_fill_placeholders(): config = load_template_config() source_dir = Path(config["source_dir"]) expected_by_code = { - "ch1_2_directory": {"{{product_name}}", "{{applicant_name}}"}, + "ch1_2_directory": {"{{product_name}}"}, "ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"}, - "ch1_5_product_list": {"{{product_name}}", "{{package_specification}}"}, - "ch1_9_pre_submission": {"{{product_name}}", "{{applicant_name}}"}, - "ch1_11_1_standards": {"{{standard_no}}", "{{product_name}}"}, - "ch1_11_5_authenticity": {"{{product_name}}", "{{applicant_name}}"}, - "ch1_11_6_conformity": {"{{product_name}}", "{{applicant_name}}"}, + "ch1_5_product_list": {"{{product_name}}"}, + "ch1_11_1_standards": {"{{product_name}}"}, + "ch1_11_5_authenticity": {"{{product_name}}"}, + "ch1_11_6_conformity": {"{{product_name}}"}, } for item in config["templates"]: @@ -42,7 +44,29 @@ def test_clean_templates_expose_stable_fill_placeholders(): assert placeholder in text -def test_generate_package_documents_creates_seven_results(django_user_model, tmp_path): +def test_directory_template_includes_page_numbers(): + config = load_template_config() + source_dir = Path(config["source_dir"]) + item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory") + document = Document(source_dir / item["source_file"]) + page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]] + + assert page_numbers == ["1", "1", "1", "1", "1", "1"] + + +def test_application_form_template_uses_real_checkbox_symbols(): + config = load_template_config() + source_dir = Path(config["source_dir"]) + item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form") + text = _document_text(Document(source_dir / item["source_file"])) + + assert "{{复选框}}" not in text + assert "{{}}" not in text + assert "☐" in text + assert "☑" in text + + +def test_generate_package_documents_creates_six_results(django_user_model, tmp_path): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") batch = RegulatoryInfoPackageBatch.objects.create( @@ -55,14 +79,55 @@ def test_generate_package_documents_creates_seven_results(django_user_model, tmp results = generate_package_documents(batch, load_template_config(), merged) - assert len(results) == 7 + assert len(results) == 6 assert all(result.status in {"success", "fallback_success"} for result in results), [ (result.template_code, result.status, result.error_message) for result in results ] assert all(result.path for result in results) -def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tmp_path): +def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = RegulatoryInfoPackageBatch.objects.create( + conversation=conversation, + user=user, + batch_no="RIP-20260610154010-abcdef", + work_dir=str(tmp_path), + ) + merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {}) + page_counts = { + "CH1.4 申请表.docx": 3, + "CH1.5 产品列表.docx": 5, + "CH1.11.1 符合标准的清单.docx": 2, + "CH1.11.5 真实性声明.docx": 4, + "CH1.11.6 符合性声明.docx": 6, + } + counted_files = [] + + def fake_count(path): + counted_files.append(Path(path).name) + return page_counts[Path(path).name] + + monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False) + + results = generate_package_documents(batch, load_template_config(), merged) + + assert results[-1].template_code == "ch1_2_directory" + assert set(counted_files) == set(page_counts) + directory = Document(results[-1].path) + directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]} + assert directory_pages == { + "CH1.2": "1", + "CH1.4": "3", + "CH1.5": "5", + "CH1.11.1": "2", + "CH1.11.5": "4", + "CH1.11.6": "6", + } + + +def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") batch = RegulatoryInfoPackageBatch.objects.create( @@ -74,12 +139,15 @@ def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tm merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {}) results = generate_package_documents(batch, load_template_config(), merged) - docx_result = next(result for result in results if result.template_code == "ch1_2_directory") - document = Document(docx_result.path) - first_text = "\n".join(paragraph.text for paragraph in document.paragraphs[:8]) + for result in results: + document = Document(result.path) + text = _document_text(document) - assert "预生成版" in first_text - assert "测试产品" in first_text + assert "预生成版" not in text + assert "预生成字段" not in text + assert "component_table" not in text + assert '"header"' not in text + assert "测试产品" in text def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path): @@ -141,13 +209,18 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path): result = next(item for item in results if item.template_code == code) text = _document_text(Document(result.path)) assert "甲型流感病毒核酸检测试剂盒" in text - assert "星河医疗科技有限公司" in text + if code == "ch1_4_application_form": + assert "星河医疗科技有限公司" in text assert "{{" not in text assert "}}" not in text - standards = next(item for item in results if item.template_code == "ch1_11_1_standards") - standards_text = _document_text(Document(standards.path)) - assert "GB/T 29791.1-2013" in standards_text + today = timezone.localdate().strftime("%Y年%m月%d日") + for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]: + result = next(item for item in results if item.template_code == code) + text = _document_text(Document(result.path)) + assert today in text + assert "xxxx年xx月xx日" not in text + assert "星河医疗科技有限公司" not in text product_list = next(item for item in results if item.template_code == "ch1_5_product_list") product_text = _document_text(Document(product_list.path)) @@ -155,6 +228,51 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path): assert "48人份/盒" in product_text +def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = RegulatoryInfoPackageBatch.objects.create( + conversation=conversation, + user=user, + batch_no="RIP-20260610154400-abcdef", + work_dir=str(tmp_path), + ) + component_payload = { + "header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"], + "rows": [ + ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"], + ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"], + ], + } + merged, _summary = merge_fields( + { + "product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"}, + "package_specification": {"value": "24人份/盒;48人份/盒", "label": "包装规格"}, + "component_table": { + "value": json.dumps(component_payload, ensure_ascii=False), + "label": "主要组成成分", + }, + "component_notes": { + "value": "注:不同批号试剂盒中各组分不得互换使用。", + "label": "主要组成成分备注", + }, + }, + {}, + ) + + results = generate_package_documents(batch, load_template_config(), merged) + product_list = next(result for result in results if result.template_code == "ch1_5_product_list") + document = Document(product_list.path) + text = _document_text(document) + + assert "PCR反应液 I" in text + assert "840μL/管×1管" in text + assert "840μL/管×2管" in text + assert "注:不同批号试剂盒中各组分不得互换使用。" in text + assert "RSV&MP" not in text + assert "6018003102" not in text + + def _document_text(document: Document) -> str: text = "\n".join(paragraph.text for paragraph in document.paragraphs) for table in document.tables: diff --git a/tests/test_regulatory_info_package_template_config.py b/tests/test_regulatory_info_package_template_config.py index 506f9ab..ed4e132 100644 --- a/tests/test_regulatory_info_package_template_config.py +++ b/tests/test_regulatory_info_package_template_config.py @@ -10,17 +10,16 @@ from review_agent.regulatory_info_package.services.template_config import ( ) -def test_template_config_loads_seven_templates(): +def test_template_config_loads_six_templates(): config = load_template_config() assert config["version"] == "regulatory_info_package_templates_v1" assert config["zip_name"] == DEFAULT_ZIP_NAME - assert len(config["templates"]) == 7 + assert len(config["templates"]) == 6 assert {template["code"] for template in config["templates"]} == { "ch1_2_directory", "ch1_4_application_form", "ch1_5_product_list", - "ch1_9_pre_submission", "ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity", @@ -45,4 +44,3 @@ def test_template_config_sources_exist(): assert source_dir.exists() for template in config["templates"]: assert (source_dir / template["source_file"]).exists() -