DEMO-AGENT/tests/test_regulatory_info_package_package_generate.py

import json
import pytest
from docx import Document
from pathlib import Path

from django.conf import settings
from django.utils import timezone
from review_agent.models import Conversation, RegulatoryInfoPackageBatch
from review_agent.regulatory_info_package.services.field_merge import merge_fields
from review_agent.regulatory_info_package.services import package_generate
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
from review_agent.regulatory_info_package.services.template_config import load_template_config


pytestmark = pytest.mark.django_db


def test_template_config_uses_clean_internal_templates():
    config = load_template_config()
    source_dir = Path(config["source_dir"])

    assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
    assert source_dir.exists()
    assert len(config["templates"]) == 6
    assert all((source_dir / item["source_file"]).exists() for item in config["templates"])


def test_clean_templates_expose_stable_fill_placeholders():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    expected_by_code = {
        "ch1_2_directory": {"{{product_name}}"},
        "ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
        "ch1_5_product_list": {"{{product_name}}"},
        "ch1_11_1_standards": {"{{product_name}}"},
        "ch1_11_5_authenticity": {"{{product_name}}"},
        "ch1_11_6_conformity": {"{{product_name}}"},
    }

    for item in config["templates"]:
        document = Document(source_dir / item["source_file"])
        text = _document_text(document)
        for placeholder in expected_by_code[item["code"]]:
            assert placeholder in text


def test_directory_template_includes_page_numbers():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory")
    document = Document(source_dir / item["source_file"])
    page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]]

    assert page_numbers == ["1", "1", "1", "1", "1", "1"]


def test_application_form_template_uses_real_checkbox_symbols():
    config = load_template_config()
    source_dir = Path(config["source_dir"])
    item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form")
    text = _document_text(Document(source_dir / item["source_file"]))

    assert "{{复选框}}" not in text
    assert "{{}}" not in text
    assert "☐" in text
    assert "☑" in text


def test_generate_package_documents_creates_six_results(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154000-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})

    results = generate_package_documents(batch, load_template_config(), merged)

    assert len(results) == 6
    assert all(result.status in {"success", "fallback_success"} for result in results), [
        (result.template_code, result.status, result.error_message) for result in results
    ]
    assert all(result.path for result in results)


def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154010-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
    page_counts = {
        "CH1.4 申请表.docx": 3,
        "CH1.5 产品列表.docx": 5,
        "CH1.11.1 符合标准的清单.docx": 2,
        "CH1.11.5 真实性声明.docx": 4,
        "CH1.11.6 符合性声明.docx": 6,
    }
    counted_files = []

    def fake_count(path):
        counted_files.append(Path(path).name)
        return page_counts[Path(path).name]

    monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False)

    results = generate_package_documents(batch, load_template_config(), merged)

    assert results[-1].template_code == "ch1_2_directory"
    assert set(counted_files) == set(page_counts)
    directory = Document(results[-1].path)
    directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]}
    assert directory_pages == {
        "CH1.2": "1",
        "CH1.4": "3",
        "CH1.5": "5",
        "CH1.11.1": "2",
        "CH1.11.5": "4",
        "CH1.11.6": "6",
    }


def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154100-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})

    results = generate_package_documents(batch, load_template_config(), merged)
    for result in results:
        document = Document(result.path)
        text = _document_text(document)

        assert "预生成版" not in text
        assert "预生成字段" not in text
        assert "component_table" not in text
        assert '"header"' not in text
        assert "测试产品" in text


def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154200-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields(
        {
            "product_name": {"value": "测试产品", "label": "产品名称"},
            "package_specification": {"value": "24人份/盒；48人份/盒", "label": "包装规格"},
        },
        {},
    )

    results = generate_package_documents(batch, load_template_config(), merged)
    docx_results = [result for result in results if result.actual_format == "docx"]
    for result in docx_results:
        document = Document(result.path)
        text = "\n".join(paragraph.text for paragraph in document.paragraphs)
        for table in document.tables:
            for row in table.rows:
                text += "\n" + "\t".join(cell.text for cell in row.cells)
        assert "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒" not in text
    product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
    product_doc = Document(product_list.path)
    table = product_doc.tables[0]
    assert table.rows[1].cells[0].text == "24人份/盒"
    assert table.rows[1].cells[1].text == "/"
    assert "6018003102" not in "\n".join(cell.text for row in table.rows for cell in row.cells)


def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154300-abcdef",
        work_dir=str(tmp_path),
    )
    merged, _summary = merge_fields(
        {
            "product_name": {"value": "甲型流感病毒核酸检测试剂盒", "label": "产品名称"},
            "applicant_name": {"value": "星河医疗科技有限公司", "label": "申请人名称"},
            "package_specification": {"value": "24人份/盒；48人份/盒", "label": "包装规格"},
            "standard_no": {"value": "GB/T 29791.1-2013", "label": "标准号"},
        },
        {},
    )

    results = generate_package_documents(batch, load_template_config(), merged)

    for code in ["ch1_2_directory", "ch1_4_application_form", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
        result = next(item for item in results if item.template_code == code)
        text = _document_text(Document(result.path))
        assert "甲型流感病毒核酸检测试剂盒" in text
        if code == "ch1_4_application_form":
            assert "星河医疗科技有限公司" in text
        assert "{{" not in text
        assert "}}" not in text

    today = timezone.localdate().strftime("%Y年%m月%d日")
    for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
        result = next(item for item in results if item.template_code == code)
        text = _document_text(Document(result.path))
        assert today in text
        assert "xxxx年xx月xx日" not in text
        assert "星河医疗科技有限公司" not in text

    product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
    product_text = _document_text(Document(product_list.path))
    assert "24人份/盒" in product_text
    assert "48人份/盒" in product_text


def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path):
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    batch = RegulatoryInfoPackageBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="RIP-20260610154400-abcdef",
        work_dir=str(tmp_path),
    )
    component_payload = {
        "header": ["组分", "主要组成成分", "规格（24人份/盒）", "规格（48人份/盒）"],
        "rows": [
            ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
            ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
        ],
    }
    merged, _summary = merge_fields(
        {
            "product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"},
            "package_specification": {"value": "24人份/盒；48人份/盒", "label": "包装规格"},
            "component_table": {
                "value": json.dumps(component_payload, ensure_ascii=False),
                "label": "主要组成成分",
            },
            "component_notes": {
                "value": "注：不同批号试剂盒中各组分不得互换使用。",
                "label": "主要组成成分备注",
            },
        },
        {},
    )

    results = generate_package_documents(batch, load_template_config(), merged)
    product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
    document = Document(product_list.path)
    text = _document_text(document)

    assert "PCR反应液 I" in text
    assert "840μL/管×1管" in text
    assert "840μL/管×2管" in text
    assert "注：不同批号试剂盒中各组分不得互换使用。" in text
    assert "RSV&MP" not in text
    assert "6018003102" not in text


def _document_text(document: Document) -> str:
    text = "\n".join(paragraph.text for paragraph in document.paragraphs)
    for table in document.tables:
        for row in table.rows:
            text += "\n" + "\t".join(cell.text for cell in row.cells)
    return text