Files
DEMO-AGENT/tests/test_application_form_fill_field_extract.py

122 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import pytest
from review_agent.application_form_fill.services.field_extract import (
extract_by_llm,
extract_by_rules,
run_parallel_extract,
save_field_extract_result,
)
from review_agent.application_form_fill.services.template_config import load_template_config
from review_agent.application_form_fill.services.template_select import select_templates
from review_agent.models import (
ApplicationFormFillArtifact,
ApplicationFormFillBatch,
Conversation,
FileSummaryBatch,
)
pytestmark = pytest.mark.django_db
def _registration_specs():
config = load_template_config()
specs, _risk_notes = select_templates(config, ["registration_certificate"], "首次注册")
return specs
def test_rule_extracts_registration_certificate_fields():
texts = {
"产品说明书.txt": "\n".join(
[
"产品名称:甲胎蛋白检测试剂盒",
"包装规格20人份/盒",
"预期用途:用于体外定量检测人血清中甲胎蛋白含量",
"产品储存条件及有效期2-8℃保存有效期12个月",
]
)
}
result = extract_by_rules(texts, _registration_specs())
values = {field["key"]: field for field in result["fields"]}
assert values["product_name"]["value"] == "甲胎蛋白检测试剂盒"
assert values["intended_use"]["source_role"] == "说明书"
assert "2-8℃保存" in values["storage_condition_and_validity"]["value"]
assert values["package_specification"]["extractor"] == "rule"
def test_llm_extract_parses_structured_json(monkeypatch):
monkeypatch.setattr(
"review_agent.application_form_fill.services.field_extract.generate_completion",
lambda messages, temperature=0.0: json.dumps(
{
"fields": [
{
"key": "product_name",
"label": "产品名称",
"value": "甲胎蛋白检测试剂盒",
"source_file": "说明书.txt",
"source_role": "说明书",
"evidence": "产品名称:甲胎蛋白检测试剂盒",
"confidence": 0.9,
}
],
"checklist_items": [],
},
ensure_ascii=False,
),
)
result = extract_by_llm({"说明书.txt": "产品名称:甲胎蛋白检测试剂盒"}, _registration_specs())
assert result["fields"][0]["extractor"] == "llm"
assert result["fields"][0]["value"] == "甲胎蛋白检测试剂盒"
def test_llm_extract_failure_returns_empty_result(monkeypatch):
monkeypatch.setattr(
"review_agent.application_form_fill.services.field_extract.generate_completion",
lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("timeout")),
)
result = extract_by_llm({"说明书.txt": "产品名称:甲胎蛋白检测试剂盒"}, _registration_specs())
assert result["fields"] == []
assert "timeout" in result["error_message"]
def test_parallel_extract_preserves_rule_result_when_llm_fails(monkeypatch):
monkeypatch.setattr(
"review_agent.application_form_fill.services.field_extract.generate_completion",
lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("timeout")),
)
payload = run_parallel_extract({"说明书.txt": "产品名称:甲胎蛋白检测试剂盒"}, _registration_specs())
assert payload["regex_results"]["fields"]
assert payload["llm_results"]["fields"] == []
assert payload["selected_templates"] == ["registration_certificate"]
def test_save_field_extract_result_creates_json_artifact(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-FIELD")
batch = ApplicationFormFillBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="AFF-FIELD",
work_dir=str(tmp_path / "aff" / "AFF-FIELD"),
)
artifact = save_field_extract_result(batch, {"regex_results": {"fields": []}, "llm_results": {"fields": []}})
assert artifact.artifact_type == ApplicationFormFillArtifact.ArtifactType.FIELD_EXTRACT_RESULT
assert artifact.file_format == ApplicationFormFillArtifact.FileFormat.JSON
assert artifact.content_hash