fix(application-form-fill): 抽取说明书章节和表格字段
This commit is contained in:
@@ -37,3 +37,25 @@ def test_extract_text_reports_unsupported_file(tmp_path):
|
||||
|
||||
assert result.status == "unsupported"
|
||||
assert result.text == ""
|
||||
|
||||
|
||||
def test_extract_text_from_docx_preserves_table_text(tmp_path):
|
||||
from docx import Document
|
||||
|
||||
path = tmp_path / "说明书.docx"
|
||||
document = Document()
|
||||
document.add_paragraph("【主要组成成分】")
|
||||
table = document.add_table(rows=2, cols=2)
|
||||
table.rows[0].cells[0].text = "组分"
|
||||
table.rows[0].cells[1].text = "数量"
|
||||
table.rows[1].cells[0].text = "PCR反应液"
|
||||
table.rows[1].cells[1].text = "1管"
|
||||
document.add_paragraph("【储存条件及有效期】")
|
||||
document.add_paragraph("-20±5℃保存,有效期12个月。")
|
||||
document.save(path)
|
||||
|
||||
result = extract_text(path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert "组分\t数量" in result.text
|
||||
assert result.text.index("PCR反应液") < result.text.index("【储存条件及有效期】")
|
||||
|
||||
Reference in New Issue
Block a user