import json from review_agent.regulatory_info_package.schemas import InstructionExtractResult from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract def test_extract_fields_by_rules_finds_product_name_and_storage(): instruction = InstructionExtractResult( source_file_name="目标产品说明书.docx", paragraphs=["产品名称:新型冠状病毒检测试剂盒", "储存条件:2-8℃保存"], sections={}, tables=[], component_tables=[], front_text="产品名称:新型冠状病毒检测试剂盒\n储存条件:2-8℃保存", ) result = extract_fields_by_rules(instruction) assert result["product_name"]["value"] == "新型冠状病毒检测试剂盒" assert result["storage_condition"]["value"] == "2-8℃保存" def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant(): instruction = InstructionExtractResult( source_file_name="目标产品说明书.docx", paragraphs=[ "注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司", "生产企业名称:卡尤迪生物科技宜兴有限公司", "生产企业住所:宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室", "联系方式: 0510-80330909, 0510-80330919", "生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室", ], sections={}, tables=[], component_tables=[], front_text="", ) result = extract_fields_by_rules(instruction) assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司" assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司" assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室" assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919" assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室" def test_extract_fields_by_rules_serializes_component_table_and_notes(): instruction = InstructionExtractResult( source_file_name="目标产品说明书.docx", paragraphs=[], sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"}, tables=[], component_tables=[ { "header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"], "rows": [ ["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"], ["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"], ], } ], front_text="", ) result = extract_fields_by_rules(instruction) payload = json.loads(result["component_table"]["value"]) assert payload["header"][0:2] == ["组分", "主要组成成分"] assert payload["rows"][0][0] == "PCR反应液 I" assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。" def test_run_parallel_extract_keeps_rule_result_when_llm_fails(): instruction = InstructionExtractResult( source_file_name="目标产品说明书.docx", paragraphs=["产品名称:测试产品"], sections={}, tables=[], component_tables=[], front_text="产品名称:测试产品", ) result = run_parallel_extract(instruction, llm_extract_func=lambda _instruction: (_ for _ in ()).throw(ValueError("bad llm"))) assert result["regex_results"]["product_name"]["value"] == "测试产品" assert result["llm_results"] == {} assert result["llm_error"]