89 lines
4.0 KiB
Python
89 lines
4.0 KiB
Python
import json
|
||
|
||
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
||
from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
|
||
|
||
|
||
def test_extract_fields_by_rules_finds_product_name_and_storage():
|
||
instruction = InstructionExtractResult(
|
||
source_file_name="目标产品说明书.docx",
|
||
paragraphs=["产品名称:新型冠状病毒检测试剂盒", "储存条件:2-8℃保存"],
|
||
sections={},
|
||
tables=[],
|
||
component_tables=[],
|
||
front_text="产品名称:新型冠状病毒检测试剂盒\n储存条件:2-8℃保存",
|
||
)
|
||
|
||
result = extract_fields_by_rules(instruction)
|
||
|
||
assert result["product_name"]["value"] == "新型冠状病毒检测试剂盒"
|
||
assert result["storage_condition"]["value"] == "2-8℃保存"
|
||
|
||
|
||
def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant():
|
||
instruction = InstructionExtractResult(
|
||
source_file_name="目标产品说明书.docx",
|
||
paragraphs=[
|
||
"注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司",
|
||
"生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||
"生产企业住所:宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室",
|
||
"联系方式: 0510-80330909, 0510-80330919",
|
||
"生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
|
||
],
|
||
sections={},
|
||
tables=[],
|
||
component_tables=[],
|
||
front_text="",
|
||
)
|
||
|
||
result = extract_fields_by_rules(instruction)
|
||
|
||
assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||
assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||
assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室"
|
||
assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919"
|
||
assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||
|
||
|
||
def test_extract_fields_by_rules_serializes_component_table_and_notes():
|
||
instruction = InstructionExtractResult(
|
||
source_file_name="目标产品说明书.docx",
|
||
paragraphs=[],
|
||
sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"},
|
||
tables=[],
|
||
component_tables=[
|
||
{
|
||
"header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"],
|
||
"rows": [
|
||
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
|
||
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
|
||
],
|
||
}
|
||
],
|
||
front_text="",
|
||
)
|
||
|
||
result = extract_fields_by_rules(instruction)
|
||
payload = json.loads(result["component_table"]["value"])
|
||
|
||
assert payload["header"][0:2] == ["组分", "主要组成成分"]
|
||
assert payload["rows"][0][0] == "PCR反应液 I"
|
||
assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"
|
||
|
||
|
||
def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
|
||
instruction = InstructionExtractResult(
|
||
source_file_name="目标产品说明书.docx",
|
||
paragraphs=["产品名称:测试产品"],
|
||
sections={},
|
||
tables=[],
|
||
component_tables=[],
|
||
front_text="产品名称:测试产品",
|
||
)
|
||
|
||
result = run_parallel_extract(instruction, llm_extract_func=lambda _instruction: (_ for _ in ()).throw(ValueError("bad llm")))
|
||
|
||
assert result["regex_results"]["product_name"]["value"] == "测试产品"
|
||
assert result["llm_results"] == {}
|
||
assert result["llm_error"]
|