fix(application-form-fill): 抽取说明书章节和表格字段
This commit is contained in:
@@ -48,6 +48,42 @@ def test_rule_extracts_registration_certificate_fields():
|
||||
assert values["package_specification"]["extractor"] == "rule"
|
||||
|
||||
|
||||
def test_rule_extracts_bracket_sections_from_instructions():
|
||||
texts = {
|
||||
"目标产品说明书.docx": "\n".join(
|
||||
[
|
||||
"【产品名称】",
|
||||
"新型冠状病毒2019-nCoV核酸检测试剂盒(荧光PCR法)",
|
||||
"【包装规格】",
|
||||
"规格A:24人份/盒、48人份/盒、96人份/盒。",
|
||||
"规格B:24人份/盒、48人份/盒、96人份/盒。",
|
||||
"【预期用途】",
|
||||
"本试剂盒用于体外定性检测咽拭子、痰液样本中新型冠状病毒(2019-nCoV)ORF1ab和N基因。",
|
||||
"【检测原理】",
|
||||
"本段不应进入预期用途。",
|
||||
"【主要组成成分】",
|
||||
"表1 规格A大包装试剂盒组成成分",
|
||||
"组分\t规格\t数量",
|
||||
"PCR反应液\t24人份/盒\t1管",
|
||||
"【储存条件及有效期】",
|
||||
"-20±5℃的避光条件,有效期12个月。",
|
||||
"反复冻融次数不得超过4次。",
|
||||
"【样本要求】",
|
||||
"适用样本类型:咽拭子、痰液。",
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
result = extract_by_rules(texts, _registration_specs())
|
||||
|
||||
values = {field["key"]: field["value"] for field in result["fields"]}
|
||||
assert values["product_name"] == "新型冠状病毒2019-nCoV核酸检测试剂盒(荧光PCR法)"
|
||||
assert "规格A" in values["package_specification"]
|
||||
assert "检测原理" not in values["intended_use"]
|
||||
assert "PCR反应液" in values["main_components"]
|
||||
assert "-20±5℃" in values["storage_condition_and_validity"]
|
||||
|
||||
|
||||
def test_llm_extract_parses_structured_json(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
||||
|
||||
@@ -37,3 +37,25 @@ def test_extract_text_reports_unsupported_file(tmp_path):
|
||||
|
||||
assert result.status == "unsupported"
|
||||
assert result.text == ""
|
||||
|
||||
|
||||
def test_extract_text_from_docx_preserves_table_text(tmp_path):
|
||||
from docx import Document
|
||||
|
||||
path = tmp_path / "说明书.docx"
|
||||
document = Document()
|
||||
document.add_paragraph("【主要组成成分】")
|
||||
table = document.add_table(rows=2, cols=2)
|
||||
table.rows[0].cells[0].text = "组分"
|
||||
table.rows[0].cells[1].text = "数量"
|
||||
table.rows[1].cells[0].text = "PCR反应液"
|
||||
table.rows[1].cells[1].text = "1管"
|
||||
document.add_paragraph("【储存条件及有效期】")
|
||||
document.add_paragraph("-20±5℃保存,有效期12个月。")
|
||||
document.save(path)
|
||||
|
||||
result = extract_text(path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert "组分\t数量" in result.text
|
||||
assert result.text.index("PCR反应液") < result.text.index("【储存条件及有效期】")
|
||||
|
||||
Reference in New Issue
Block a user