test(regulatory-info-package): 补充模板生成回归覆盖

This commit is contained in:
2026-06-10 23:56:51 +08:00
parent 1bf8634373
commit 9c6cad481c
4 changed files with 200 additions and 24 deletions

8
tests/conftest.py Normal file
View File

@@ -0,0 +1,8 @@
import pytest
@pytest.fixture(autouse=True)
def mock_regulatory_info_package_page_count(monkeypatch):
from review_agent.regulatory_info_package.services import package_generate
monkeypatch.setattr(package_generate, "count_document_pages", lambda _path: 1)

View File

@@ -1,3 +1,5 @@
import json
from review_agent.regulatory_info_package.schemas import InstructionExtractResult from review_agent.regulatory_info_package.schemas import InstructionExtractResult
from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
@@ -18,6 +20,57 @@ def test_extract_fields_by_rules_finds_product_name_and_storage():
assert result["storage_condition"]["value"] == "2-8℃保存" assert result["storage_condition"]["value"] == "2-8℃保存"
def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant():
instruction = InstructionExtractResult(
source_file_name="目标产品说明书.docx",
paragraphs=[
"注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司",
"生产企业名称:卡尤迪生物科技宜兴有限公司",
"生产企业住所宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室",
"联系方式: 0510-80330909, 0510-80330919",
"生产地址江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
],
sections={},
tables=[],
component_tables=[],
front_text="",
)
result = extract_fields_by_rules(instruction)
assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室"
assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919"
assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
def test_extract_fields_by_rules_serializes_component_table_and_notes():
instruction = InstructionExtractResult(
source_file_name="目标产品说明书.docx",
paragraphs=[],
sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"},
tables=[],
component_tables=[
{
"header": ["组分", "主要组成成分", "规格24人份/盒)", "规格48人份/盒)"],
"rows": [
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
],
}
],
front_text="",
)
result = extract_fields_by_rules(instruction)
payload = json.loads(result["component_table"]["value"])
assert payload["header"][0:2] == ["组分", "主要组成成分"]
assert payload["rows"][0][0] == "PCR反应液 I"
assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"
def test_run_parallel_extract_keeps_rule_result_when_llm_fails(): def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
instruction = InstructionExtractResult( instruction = InstructionExtractResult(
source_file_name="目标产品说明书.docx", source_file_name="目标产品说明书.docx",
@@ -33,4 +86,3 @@ def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
assert result["regex_results"]["product_name"]["value"] == "测试产品" assert result["regex_results"]["product_name"]["value"] == "测试产品"
assert result["llm_results"] == {} assert result["llm_results"] == {}
assert result["llm_error"] assert result["llm_error"]

View File

@@ -1,10 +1,13 @@
import json
import pytest import pytest
from docx import Document from docx import Document
from pathlib import Path from pathlib import Path
from django.conf import settings from django.conf import settings
from django.utils import timezone
from review_agent.models import Conversation, RegulatoryInfoPackageBatch from review_agent.models import Conversation, RegulatoryInfoPackageBatch
from review_agent.regulatory_info_package.services.field_merge import merge_fields from review_agent.regulatory_info_package.services.field_merge import merge_fields
from review_agent.regulatory_info_package.services import package_generate
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
from review_agent.regulatory_info_package.services.template_config import load_template_config from review_agent.regulatory_info_package.services.template_config import load_template_config
@@ -18,7 +21,7 @@ def test_template_config_uses_clean_internal_templates():
assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean" assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
assert source_dir.exists() assert source_dir.exists()
assert len(config["templates"]) == 7 assert len(config["templates"]) == 6
assert all((source_dir / item["source_file"]).exists() for item in config["templates"]) assert all((source_dir / item["source_file"]).exists() for item in config["templates"])
@@ -26,13 +29,12 @@ def test_clean_templates_expose_stable_fill_placeholders():
config = load_template_config() config = load_template_config()
source_dir = Path(config["source_dir"]) source_dir = Path(config["source_dir"])
expected_by_code = { expected_by_code = {
"ch1_2_directory": {"{{product_name}}", "{{applicant_name}}"}, "ch1_2_directory": {"{{product_name}}"},
"ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"}, "ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
"ch1_5_product_list": {"{{product_name}}", "{{package_specification}}"}, "ch1_5_product_list": {"{{product_name}}"},
"ch1_9_pre_submission": {"{{product_name}}", "{{applicant_name}}"}, "ch1_11_1_standards": {"{{product_name}}"},
"ch1_11_1_standards": {"{{standard_no}}", "{{product_name}}"}, "ch1_11_5_authenticity": {"{{product_name}}"},
"ch1_11_5_authenticity": {"{{product_name}}", "{{applicant_name}}"}, "ch1_11_6_conformity": {"{{product_name}}"},
"ch1_11_6_conformity": {"{{product_name}}", "{{applicant_name}}"},
} }
for item in config["templates"]: for item in config["templates"]:
@@ -42,7 +44,29 @@ def test_clean_templates_expose_stable_fill_placeholders():
assert placeholder in text assert placeholder in text
def test_generate_package_documents_creates_seven_results(django_user_model, tmp_path): def test_directory_template_includes_page_numbers():
config = load_template_config()
source_dir = Path(config["source_dir"])
item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory")
document = Document(source_dir / item["source_file"])
page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]]
assert page_numbers == ["1", "1", "1", "1", "1", "1"]
def test_application_form_template_uses_real_checkbox_symbols():
config = load_template_config()
source_dir = Path(config["source_dir"])
item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form")
text = _document_text(Document(source_dir / item["source_file"]))
assert "{{复选框}}" not in text
assert "{{}}" not in text
assert "" in text
assert "" in text
def test_generate_package_documents_creates_six_results(django_user_model, tmp_path):
user = django_user_model.objects.create_user(username="owner", password="pass") user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话") conversation = Conversation.objects.create(user=user, title="会话")
batch = RegulatoryInfoPackageBatch.objects.create( batch = RegulatoryInfoPackageBatch.objects.create(
@@ -55,14 +79,55 @@ def test_generate_package_documents_creates_seven_results(django_user_model, tmp
results = generate_package_documents(batch, load_template_config(), merged) results = generate_package_documents(batch, load_template_config(), merged)
assert len(results) == 7 assert len(results) == 6
assert all(result.status in {"success", "fallback_success"} for result in results), [ assert all(result.status in {"success", "fallback_success"} for result in results), [
(result.template_code, result.status, result.error_message) for result in results (result.template_code, result.status, result.error_message) for result in results
] ]
assert all(result.path for result in results) assert all(result.path for result in results)
def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tmp_path): def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = RegulatoryInfoPackageBatch.objects.create(
conversation=conversation,
user=user,
batch_no="RIP-20260610154010-abcdef",
work_dir=str(tmp_path),
)
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
page_counts = {
"CH1.4 申请表.docx": 3,
"CH1.5 产品列表.docx": 5,
"CH1.11.1 符合标准的清单.docx": 2,
"CH1.11.5 真实性声明.docx": 4,
"CH1.11.6 符合性声明.docx": 6,
}
counted_files = []
def fake_count(path):
counted_files.append(Path(path).name)
return page_counts[Path(path).name]
monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False)
results = generate_package_documents(batch, load_template_config(), merged)
assert results[-1].template_code == "ch1_2_directory"
assert set(counted_files) == set(page_counts)
directory = Document(results[-1].path)
directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]}
assert directory_pages == {
"CH1.2": "1",
"CH1.4": "3",
"CH1.5": "5",
"CH1.11.1": "2",
"CH1.11.5": "4",
"CH1.11.6": "6",
}
def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path):
user = django_user_model.objects.create_user(username="owner", password="pass") user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话") conversation = Conversation.objects.create(user=user, title="会话")
batch = RegulatoryInfoPackageBatch.objects.create( batch = RegulatoryInfoPackageBatch.objects.create(
@@ -74,12 +139,15 @@ def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tm
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {}) merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
results = generate_package_documents(batch, load_template_config(), merged) results = generate_package_documents(batch, load_template_config(), merged)
docx_result = next(result for result in results if result.template_code == "ch1_2_directory") for result in results:
document = Document(docx_result.path) document = Document(result.path)
first_text = "\n".join(paragraph.text for paragraph in document.paragraphs[:8]) text = _document_text(document)
assert "预生成版" in first_text assert "预生成版" not in text
assert "测试产品" in first_text assert "预生成字段" not in text
assert "component_table" not in text
assert '"header"' not in text
assert "测试产品" in text
def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path): def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
@@ -141,13 +209,18 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
result = next(item for item in results if item.template_code == code) result = next(item for item in results if item.template_code == code)
text = _document_text(Document(result.path)) text = _document_text(Document(result.path))
assert "甲型流感病毒核酸检测试剂盒" in text assert "甲型流感病毒核酸检测试剂盒" in text
assert "星河医疗科技有限公司" in text if code == "ch1_4_application_form":
assert "星河医疗科技有限公司" in text
assert "{{" not in text assert "{{" not in text
assert "}}" not in text assert "}}" not in text
standards = next(item for item in results if item.template_code == "ch1_11_1_standards") today = timezone.localdate().strftime("%Y年%m月%d")
standards_text = _document_text(Document(standards.path)) for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
assert "GB/T 29791.1-2013" in standards_text result = next(item for item in results if item.template_code == code)
text = _document_text(Document(result.path))
assert today in text
assert "xxxx年xx月xx日" not in text
assert "星河医疗科技有限公司" not in text
product_list = next(item for item in results if item.template_code == "ch1_5_product_list") product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
product_text = _document_text(Document(product_list.path)) product_text = _document_text(Document(product_list.path))
@@ -155,6 +228,51 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
assert "48人份/盒" in product_text assert "48人份/盒" in product_text
def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = RegulatoryInfoPackageBatch.objects.create(
conversation=conversation,
user=user,
batch_no="RIP-20260610154400-abcdef",
work_dir=str(tmp_path),
)
component_payload = {
"header": ["组分", "主要组成成分", "规格24人份/盒)", "规格48人份/盒)"],
"rows": [
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
],
}
merged, _summary = merge_fields(
{
"product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"},
"package_specification": {"value": "24人份/盒48人份/盒", "label": "包装规格"},
"component_table": {
"value": json.dumps(component_payload, ensure_ascii=False),
"label": "主要组成成分",
},
"component_notes": {
"value": "注:不同批号试剂盒中各组分不得互换使用。",
"label": "主要组成成分备注",
},
},
{},
)
results = generate_package_documents(batch, load_template_config(), merged)
product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
document = Document(product_list.path)
text = _document_text(document)
assert "PCR反应液 I" in text
assert "840μL/管×1管" in text
assert "840μL/管×2管" in text
assert "注:不同批号试剂盒中各组分不得互换使用。" in text
assert "RSV&MP" not in text
assert "6018003102" not in text
def _document_text(document: Document) -> str: def _document_text(document: Document) -> str:
text = "\n".join(paragraph.text for paragraph in document.paragraphs) text = "\n".join(paragraph.text for paragraph in document.paragraphs)
for table in document.tables: for table in document.tables:

View File

@@ -10,17 +10,16 @@ from review_agent.regulatory_info_package.services.template_config import (
) )
def test_template_config_loads_seven_templates(): def test_template_config_loads_six_templates():
config = load_template_config() config = load_template_config()
assert config["version"] == "regulatory_info_package_templates_v1" assert config["version"] == "regulatory_info_package_templates_v1"
assert config["zip_name"] == DEFAULT_ZIP_NAME assert config["zip_name"] == DEFAULT_ZIP_NAME
assert len(config["templates"]) == 7 assert len(config["templates"]) == 6
assert {template["code"] for template in config["templates"]} == { assert {template["code"] for template in config["templates"]} == {
"ch1_2_directory", "ch1_2_directory",
"ch1_4_application_form", "ch1_4_application_form",
"ch1_5_product_list", "ch1_5_product_list",
"ch1_9_pre_submission",
"ch1_11_1_standards", "ch1_11_1_standards",
"ch1_11_5_authenticity", "ch1_11_5_authenticity",
"ch1_11_6_conformity", "ch1_11_6_conformity",
@@ -45,4 +44,3 @@ def test_template_config_sources_exist():
assert source_dir.exists() assert source_dir.exists()
for template in config["templates"]: for template in config["templates"]:
assert (source_dir / template["source_file"]).exists() assert (source_dir / template["source_file"]).exists()