feat(application-form-fill): 实现字段抽取与冲突合并

This commit is contained in:
2026-06-07 18:31:34 +08:00
parent 72890783b3
commit a48f778e09
5 changed files with 498 additions and 0 deletions

View File

@@ -0,0 +1,79 @@
import pytest
from review_agent.application_form_fill.services.field_merge import merge_fields, normalize_field_value, rank_source
def test_normalize_field_value_removes_whitespace():
assert normalize_field_value(" 2-8℃ 保存 \n 有效期12个月 ") == "2-8℃保存有效期12个月"
def test_rank_source_prefers_instructions():
assert rank_source("说明书") < rank_source("产品技术要求")
def test_merge_fields_prefers_instructions_and_marks_conflict():
regex_results = {
"fields": [
{
"key": "storage_condition_and_validity",
"label": "产品储存条件及有效期",
"value": "2-8℃保存有效期12个月",
"source_file": "说明书.txt",
"source_role": "说明书",
"evidence": "产品储存条件及有效期2-8℃保存有效期12个月",
"confidence": 0.75,
},
{
"key": "storage_condition_and_validity",
"label": "产品储存条件及有效期",
"value": "-20℃保存",
"source_file": "产品技术要求.txt",
"source_role": "产品技术要求",
"evidence": "产品储存条件及有效期:-20℃保存",
"confidence": 0.8,
},
]
}
merged, conflicts = merge_fields(regex_results, {"fields": []})
field = merged["storage_condition_and_validity"]
assert field.value == "2-8℃保存有效期12个月"
assert field.has_conflict is True
assert conflicts[0]["selected_value"] == "2-8℃保存有效期12个月"
assert conflicts[0]["conflict_values"][0]["value"] == "-20℃保存"
def test_merge_fields_combines_consistent_values_without_conflict():
regex_results = {
"fields": [
{
"key": "product_name",
"label": "产品名称",
"value": "甲胎蛋白检测试剂盒",
"source_file": "说明书.txt",
"source_role": "说明书",
"evidence": "产品名称:甲胎蛋白检测试剂盒",
"confidence": 0.75,
}
]
}
llm_results = {
"fields": [
{
"key": "product_name",
"label": "产品名称",
"value": "甲胎蛋白 检测试剂盒",
"source_file": "产品技术要求.txt",
"source_role": "产品技术要求",
"evidence": "产品名称:甲胎蛋白 检测试剂盒",
"confidence": 0.9,
}
]
}
merged, conflicts = merge_fields(regex_results, llm_results)
assert merged["product_name"].value == "甲胎蛋白检测试剂盒"
assert merged["product_name"].has_conflict is False
assert conflicts == []