DEMO-AGENT/tests/test_application_form_fill_field_extract.py

import json

import pytest

from review_agent.application_form_fill.services.field_extract import (
    extract_by_llm,
    extract_by_rules,
    run_parallel_extract,
    save_field_extract_result,
)
from review_agent.application_form_fill.services.template_config import load_template_config
from review_agent.application_form_fill.services.template_select import select_templates
from review_agent.models import (
    ApplicationFormFillArtifact,
    ApplicationFormFillBatch,
    Conversation,
    FileSummaryBatch,
)


pytestmark = pytest.mark.django_db


def _registration_specs():
    config = load_template_config()
    specs, _risk_notes = select_templates(config, ["registration_certificate"], "首次注册")
    return specs


def test_rule_extracts_registration_certificate_fields():
    texts = {
        "产品说明书.txt": "\n".join(
            [
                "产品名称：甲胎蛋白检测试剂盒",
                "包装规格：20人份/盒",
                "预期用途：用于体外定量检测人血清中甲胎蛋白含量",
                "产品储存条件及有效期：2-8℃保存，有效期12个月",
            ]
        )
    }

    result = extract_by_rules(texts, _registration_specs())

    values = {field["key"]: field for field in result["fields"]}
    assert values["product_name"]["value"] == "甲胎蛋白检测试剂盒"
    assert values["intended_use"]["source_role"] == "说明书"
    assert "2-8℃保存" in values["storage_condition_and_validity"]["value"]
    assert values["package_specification"]["extractor"] == "rule"


def test_llm_extract_parses_structured_json(monkeypatch):
    monkeypatch.setattr(
        "review_agent.application_form_fill.services.field_extract.generate_completion",
        lambda messages, temperature=0.0: json.dumps(
            {
                "fields": [
                    {
                        "key": "product_name",
                        "label": "产品名称",
                        "value": "甲胎蛋白检测试剂盒",
                        "source_file": "说明书.txt",
                        "source_role": "说明书",
                        "evidence": "产品名称：甲胎蛋白检测试剂盒",
                        "confidence": 0.9,
                    }
                ],
                "checklist_items": [],
            },
            ensure_ascii=False,
        ),
    )

    result = extract_by_llm({"说明书.txt": "产品名称：甲胎蛋白检测试剂盒"}, _registration_specs())

    assert result["fields"][0]["extractor"] == "llm"
    assert result["fields"][0]["value"] == "甲胎蛋白检测试剂盒"


def test_llm_extract_failure_returns_empty_result(monkeypatch):
    monkeypatch.setattr(
        "review_agent.application_form_fill.services.field_extract.generate_completion",
        lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("timeout")),
    )

    result = extract_by_llm({"说明书.txt": "产品名称：甲胎蛋白检测试剂盒"}, _registration_specs())

    assert result["fields"] == []
    assert "timeout" in result["error_message"]


def test_parallel_extract_preserves_rule_result_when_llm_fails(monkeypatch):
    monkeypatch.setattr(
        "review_agent.application_form_fill.services.field_extract.generate_completion",
        lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("timeout")),
    )

    payload = run_parallel_extract({"说明书.txt": "产品名称：甲胎蛋白检测试剂盒"}, _registration_specs())

    assert payload["regex_results"]["fields"]
    assert payload["llm_results"]["fields"] == []
    assert payload["selected_templates"] == ["registration_certificate"]


def test_save_field_extract_result_creates_json_artifact(settings, tmp_path, django_user_model):
    settings.MEDIA_ROOT = tmp_path
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-FIELD")
    batch = ApplicationFormFillBatch.objects.create(
        conversation=conversation,
        user=user,
        source_summary_batch=summary,
        batch_no="AFF-FIELD",
        work_dir=str(tmp_path / "aff" / "AFF-FIELD"),
    )

    artifact = save_field_extract_result(batch, {"regex_results": {"fields": []}, "llm_results": {"fields": []}})

    assert artifact.artifact_type == ApplicationFormFillArtifact.ArtifactType.FIELD_EXTRACT_RESULT
    assert artifact.file_format == ApplicationFormFillArtifact.FileFormat.JSON
    assert artifact.content_hash