test(regulatory-info-package): 补充模板生成回归覆盖
This commit is contained in:
8
tests/conftest.py
Normal file
8
tests/conftest.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def mock_regulatory_info_package_page_count(monkeypatch):
|
||||||
|
from review_agent.regulatory_info_package.services import package_generate
|
||||||
|
|
||||||
|
monkeypatch.setattr(package_generate, "count_document_pages", lambda _path: 1)
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
||||||
from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
|
from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
|
||||||
|
|
||||||
@@ -18,6 +20,57 @@ def test_extract_fields_by_rules_finds_product_name_and_storage():
|
|||||||
assert result["storage_condition"]["value"] == "2-8℃保存"
|
assert result["storage_condition"]["value"] == "2-8℃保存"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=[
|
||||||
|
"注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"生产企业住所:宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室",
|
||||||
|
"联系方式: 0510-80330909, 0510-80330919",
|
||||||
|
"生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
|
||||||
|
],
|
||||||
|
sections={},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[],
|
||||||
|
front_text="",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_fields_by_rules(instruction)
|
||||||
|
|
||||||
|
assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室"
|
||||||
|
assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919"
|
||||||
|
assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_fields_by_rules_serializes_component_table_and_notes():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=[],
|
||||||
|
sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[
|
||||||
|
{
|
||||||
|
"header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"],
|
||||||
|
"rows": [
|
||||||
|
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
|
||||||
|
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
front_text="",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_fields_by_rules(instruction)
|
||||||
|
payload = json.loads(result["component_table"]["value"])
|
||||||
|
|
||||||
|
assert payload["header"][0:2] == ["组分", "主要组成成分"]
|
||||||
|
assert payload["rows"][0][0] == "PCR反应液 I"
|
||||||
|
assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"
|
||||||
|
|
||||||
|
|
||||||
def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
|
def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
|
||||||
instruction = InstructionExtractResult(
|
instruction = InstructionExtractResult(
|
||||||
source_file_name="目标产品说明书.docx",
|
source_file_name="目标产品说明书.docx",
|
||||||
@@ -33,4 +86,3 @@ def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
|
|||||||
assert result["regex_results"]["product_name"]["value"] == "测试产品"
|
assert result["regex_results"]["product_name"]["value"] == "测试产品"
|
||||||
assert result["llm_results"] == {}
|
assert result["llm_results"] == {}
|
||||||
assert result["llm_error"]
|
assert result["llm_error"]
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
from review_agent.models import Conversation, RegulatoryInfoPackageBatch
|
from review_agent.models import Conversation, RegulatoryInfoPackageBatch
|
||||||
from review_agent.regulatory_info_package.services.field_merge import merge_fields
|
from review_agent.regulatory_info_package.services.field_merge import merge_fields
|
||||||
|
from review_agent.regulatory_info_package.services import package_generate
|
||||||
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
|
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
|
||||||
from review_agent.regulatory_info_package.services.template_config import load_template_config
|
from review_agent.regulatory_info_package.services.template_config import load_template_config
|
||||||
|
|
||||||
@@ -18,7 +21,7 @@ def test_template_config_uses_clean_internal_templates():
|
|||||||
|
|
||||||
assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
|
assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
|
||||||
assert source_dir.exists()
|
assert source_dir.exists()
|
||||||
assert len(config["templates"]) == 7
|
assert len(config["templates"]) == 6
|
||||||
assert all((source_dir / item["source_file"]).exists() for item in config["templates"])
|
assert all((source_dir / item["source_file"]).exists() for item in config["templates"])
|
||||||
|
|
||||||
|
|
||||||
@@ -26,13 +29,12 @@ def test_clean_templates_expose_stable_fill_placeholders():
|
|||||||
config = load_template_config()
|
config = load_template_config()
|
||||||
source_dir = Path(config["source_dir"])
|
source_dir = Path(config["source_dir"])
|
||||||
expected_by_code = {
|
expected_by_code = {
|
||||||
"ch1_2_directory": {"{{product_name}}", "{{applicant_name}}"},
|
"ch1_2_directory": {"{{product_name}}"},
|
||||||
"ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
|
"ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
|
||||||
"ch1_5_product_list": {"{{product_name}}", "{{package_specification}}"},
|
"ch1_5_product_list": {"{{product_name}}"},
|
||||||
"ch1_9_pre_submission": {"{{product_name}}", "{{applicant_name}}"},
|
"ch1_11_1_standards": {"{{product_name}}"},
|
||||||
"ch1_11_1_standards": {"{{standard_no}}", "{{product_name}}"},
|
"ch1_11_5_authenticity": {"{{product_name}}"},
|
||||||
"ch1_11_5_authenticity": {"{{product_name}}", "{{applicant_name}}"},
|
"ch1_11_6_conformity": {"{{product_name}}"},
|
||||||
"ch1_11_6_conformity": {"{{product_name}}", "{{applicant_name}}"},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for item in config["templates"]:
|
for item in config["templates"]:
|
||||||
@@ -42,7 +44,29 @@ def test_clean_templates_expose_stable_fill_placeholders():
|
|||||||
assert placeholder in text
|
assert placeholder in text
|
||||||
|
|
||||||
|
|
||||||
def test_generate_package_documents_creates_seven_results(django_user_model, tmp_path):
|
def test_directory_template_includes_page_numbers():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory")
|
||||||
|
document = Document(source_dir / item["source_file"])
|
||||||
|
page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]]
|
||||||
|
|
||||||
|
assert page_numbers == ["1", "1", "1", "1", "1", "1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_application_form_template_uses_real_checkbox_symbols():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form")
|
||||||
|
text = _document_text(Document(source_dir / item["source_file"]))
|
||||||
|
|
||||||
|
assert "{{复选框}}" not in text
|
||||||
|
assert "{{}}" not in text
|
||||||
|
assert "☐" in text
|
||||||
|
assert "☑" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_package_documents_creates_six_results(django_user_model, tmp_path):
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
conversation = Conversation.objects.create(user=user, title="会话")
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
batch = RegulatoryInfoPackageBatch.objects.create(
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
@@ -55,14 +79,55 @@ def test_generate_package_documents_creates_seven_results(django_user_model, tmp
|
|||||||
|
|
||||||
results = generate_package_documents(batch, load_template_config(), merged)
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
|
||||||
assert len(results) == 7
|
assert len(results) == 6
|
||||||
assert all(result.status in {"success", "fallback_success"} for result in results), [
|
assert all(result.status in {"success", "fallback_success"} for result in results), [
|
||||||
(result.template_code, result.status, result.error_message) for result in results
|
(result.template_code, result.status, result.error_message) for result in results
|
||||||
]
|
]
|
||||||
assert all(result.path for result in results)
|
assert all(result.path for result in results)
|
||||||
|
|
||||||
|
|
||||||
def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tmp_path):
|
def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154010-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
||||||
|
page_counts = {
|
||||||
|
"CH1.4 申请表.docx": 3,
|
||||||
|
"CH1.5 产品列表.docx": 5,
|
||||||
|
"CH1.11.1 符合标准的清单.docx": 2,
|
||||||
|
"CH1.11.5 真实性声明.docx": 4,
|
||||||
|
"CH1.11.6 符合性声明.docx": 6,
|
||||||
|
}
|
||||||
|
counted_files = []
|
||||||
|
|
||||||
|
def fake_count(path):
|
||||||
|
counted_files.append(Path(path).name)
|
||||||
|
return page_counts[Path(path).name]
|
||||||
|
|
||||||
|
monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
|
||||||
|
assert results[-1].template_code == "ch1_2_directory"
|
||||||
|
assert set(counted_files) == set(page_counts)
|
||||||
|
directory = Document(results[-1].path)
|
||||||
|
directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]}
|
||||||
|
assert directory_pages == {
|
||||||
|
"CH1.2": "1",
|
||||||
|
"CH1.4": "3",
|
||||||
|
"CH1.5": "5",
|
||||||
|
"CH1.11.1": "2",
|
||||||
|
"CH1.11.5": "4",
|
||||||
|
"CH1.11.6": "6",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path):
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
conversation = Conversation.objects.create(user=user, title="会话")
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
batch = RegulatoryInfoPackageBatch.objects.create(
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
@@ -74,12 +139,15 @@ def test_generated_docx_has_visible_prefill_block_near_top(django_user_model, tm
|
|||||||
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
||||||
|
|
||||||
results = generate_package_documents(batch, load_template_config(), merged)
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
docx_result = next(result for result in results if result.template_code == "ch1_2_directory")
|
for result in results:
|
||||||
document = Document(docx_result.path)
|
document = Document(result.path)
|
||||||
first_text = "\n".join(paragraph.text for paragraph in document.paragraphs[:8])
|
text = _document_text(document)
|
||||||
|
|
||||||
assert "预生成版" in first_text
|
assert "预生成版" not in text
|
||||||
assert "测试产品" in first_text
|
assert "预生成字段" not in text
|
||||||
|
assert "component_table" not in text
|
||||||
|
assert '"header"' not in text
|
||||||
|
assert "测试产品" in text
|
||||||
|
|
||||||
|
|
||||||
def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
|
def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
|
||||||
@@ -141,13 +209,18 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
|
|||||||
result = next(item for item in results if item.template_code == code)
|
result = next(item for item in results if item.template_code == code)
|
||||||
text = _document_text(Document(result.path))
|
text = _document_text(Document(result.path))
|
||||||
assert "甲型流感病毒核酸检测试剂盒" in text
|
assert "甲型流感病毒核酸检测试剂盒" in text
|
||||||
|
if code == "ch1_4_application_form":
|
||||||
assert "星河医疗科技有限公司" in text
|
assert "星河医疗科技有限公司" in text
|
||||||
assert "{{" not in text
|
assert "{{" not in text
|
||||||
assert "}}" not in text
|
assert "}}" not in text
|
||||||
|
|
||||||
standards = next(item for item in results if item.template_code == "ch1_11_1_standards")
|
today = timezone.localdate().strftime("%Y年%m月%d日")
|
||||||
standards_text = _document_text(Document(standards.path))
|
for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
|
||||||
assert "GB/T 29791.1-2013" in standards_text
|
result = next(item for item in results if item.template_code == code)
|
||||||
|
text = _document_text(Document(result.path))
|
||||||
|
assert today in text
|
||||||
|
assert "xxxx年xx月xx日" not in text
|
||||||
|
assert "星河医疗科技有限公司" not in text
|
||||||
|
|
||||||
product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
|
product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
|
||||||
product_text = _document_text(Document(product_list.path))
|
product_text = _document_text(Document(product_list.path))
|
||||||
@@ -155,6 +228,51 @@ def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
|
|||||||
assert "48人份/盒" in product_text
|
assert "48人份/盒" in product_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154400-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
component_payload = {
|
||||||
|
"header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"],
|
||||||
|
"rows": [
|
||||||
|
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
|
||||||
|
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
merged, _summary = merge_fields(
|
||||||
|
{
|
||||||
|
"product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"},
|
||||||
|
"package_specification": {"value": "24人份/盒;48人份/盒", "label": "包装规格"},
|
||||||
|
"component_table": {
|
||||||
|
"value": json.dumps(component_payload, ensure_ascii=False),
|
||||||
|
"label": "主要组成成分",
|
||||||
|
},
|
||||||
|
"component_notes": {
|
||||||
|
"value": "注:不同批号试剂盒中各组分不得互换使用。",
|
||||||
|
"label": "主要组成成分备注",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
|
||||||
|
document = Document(product_list.path)
|
||||||
|
text = _document_text(document)
|
||||||
|
|
||||||
|
assert "PCR反应液 I" in text
|
||||||
|
assert "840μL/管×1管" in text
|
||||||
|
assert "840μL/管×2管" in text
|
||||||
|
assert "注:不同批号试剂盒中各组分不得互换使用。" in text
|
||||||
|
assert "RSV&MP" not in text
|
||||||
|
assert "6018003102" not in text
|
||||||
|
|
||||||
|
|
||||||
def _document_text(document: Document) -> str:
|
def _document_text(document: Document) -> str:
|
||||||
text = "\n".join(paragraph.text for paragraph in document.paragraphs)
|
text = "\n".join(paragraph.text for paragraph in document.paragraphs)
|
||||||
for table in document.tables:
|
for table in document.tables:
|
||||||
|
|||||||
@@ -10,17 +10,16 @@ from review_agent.regulatory_info_package.services.template_config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_template_config_loads_seven_templates():
|
def test_template_config_loads_six_templates():
|
||||||
config = load_template_config()
|
config = load_template_config()
|
||||||
|
|
||||||
assert config["version"] == "regulatory_info_package_templates_v1"
|
assert config["version"] == "regulatory_info_package_templates_v1"
|
||||||
assert config["zip_name"] == DEFAULT_ZIP_NAME
|
assert config["zip_name"] == DEFAULT_ZIP_NAME
|
||||||
assert len(config["templates"]) == 7
|
assert len(config["templates"]) == 6
|
||||||
assert {template["code"] for template in config["templates"]} == {
|
assert {template["code"] for template in config["templates"]} == {
|
||||||
"ch1_2_directory",
|
"ch1_2_directory",
|
||||||
"ch1_4_application_form",
|
"ch1_4_application_form",
|
||||||
"ch1_5_product_list",
|
"ch1_5_product_list",
|
||||||
"ch1_9_pre_submission",
|
|
||||||
"ch1_11_1_standards",
|
"ch1_11_1_standards",
|
||||||
"ch1_11_5_authenticity",
|
"ch1_11_5_authenticity",
|
||||||
"ch1_11_6_conformity",
|
"ch1_11_6_conformity",
|
||||||
@@ -45,4 +44,3 @@ def test_template_config_sources_exist():
|
|||||||
assert source_dir.exists()
|
assert source_dir.exists()
|
||||||
for template in config["templates"]:
|
for template in config["templates"]:
|
||||||
assert (source_dir / template["source_file"]).exists()
|
assert (source_dir / template["source_file"]).exists()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user