fix(application-form-fill): 代理人字段暂用生产企业信息
This commit is contained in:
@@ -17,6 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text
|
|||||||
|
|
||||||
FIELD_ALIASES = {
|
FIELD_ALIASES = {
|
||||||
"product_name": ["产品名称"],
|
"product_name": ["产品名称"],
|
||||||
|
"applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"],
|
||||||
|
"applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"],
|
||||||
|
"manufacturer_address": ["生产地址", "生产企业地址", "生产场所"],
|
||||||
|
"agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"],
|
||||||
|
"agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"],
|
||||||
"package_specification": ["包装规格", "规格"],
|
"package_specification": ["包装规格", "规格"],
|
||||||
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
||||||
"intended_use": ["预期用途"],
|
"intended_use": ["预期用途"],
|
||||||
@@ -41,7 +46,7 @@ def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
|||||||
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||||
fields: list[dict[str, Any]] = []
|
fields: list[dict[str, Any]] = []
|
||||||
field_defs = _field_defs(specs)
|
field_defs = _field_defs(specs)
|
||||||
labels = [field["label"] for field in field_defs if field.get("label")]
|
labels = _all_field_labels(field_defs)
|
||||||
for file_name, text in texts.items():
|
for file_name, text in texts.items():
|
||||||
source_role = detect_source_role(file_name, text)
|
source_role = detect_source_role(file_name, text)
|
||||||
for field in field_defs:
|
for field in field_defs:
|
||||||
@@ -174,6 +179,15 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||||
|
labels: list[str] = []
|
||||||
|
for field in fields:
|
||||||
|
for label in _field_aliases(field):
|
||||||
|
if label not in labels:
|
||||||
|
labels.append(label)
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||||
return _extract_colon_label_value(text, label, labels)
|
return _extract_colon_label_value(text, label, labels)
|
||||||
|
|
||||||
|
|||||||
@@ -81,8 +81,30 @@ def merge_fields(regex_results: dict[str, Any], llm_results: dict[str, Any]) ->
|
|||||||
"handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段",
|
"handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
_apply_agent_company_fallbacks(merged)
|
||||||
return merged, conflicts
|
return merged, conflicts
|
||||||
|
|
||||||
|
|
||||||
def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]:
|
def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]:
|
||||||
return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")}
|
return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")}
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_agent_company_fallbacks(merged: dict[str, MergedField]) -> None:
|
||||||
|
fallback_pairs = {
|
||||||
|
"agent_name": ("applicant_name", "代理人名称"),
|
||||||
|
"agent_address": ("applicant_address", "代理人住所"),
|
||||||
|
}
|
||||||
|
for target_key, (source_key, target_label) in fallback_pairs.items():
|
||||||
|
if target_key in merged or source_key not in merged:
|
||||||
|
continue
|
||||||
|
source = merged[source_key]
|
||||||
|
merged[target_key] = MergedField(
|
||||||
|
key=target_key,
|
||||||
|
label=target_label,
|
||||||
|
value=source.value,
|
||||||
|
source_file=source.source_file,
|
||||||
|
evidence=source.evidence,
|
||||||
|
confidence=source.confidence,
|
||||||
|
has_conflict=source.has_conflict,
|
||||||
|
conflict_values=source.conflict_values,
|
||||||
|
)
|
||||||
|
|||||||
@@ -36,6 +36,24 @@ templates:
|
|||||||
source_roles:
|
source_roles:
|
||||||
- 申请表
|
- 申请表
|
||||||
- 质量管理体系文件
|
- 质量管理体系文件
|
||||||
|
- key: agent_name
|
||||||
|
label: 代理人名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 代理人名称
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 企业信息
|
||||||
|
- 申请表
|
||||||
|
- key: agent_address
|
||||||
|
label: 代理人住所
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 代理人住所
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 企业信息
|
||||||
|
- 申请表
|
||||||
- key: product_name
|
- key: product_name
|
||||||
label: 产品名称
|
label: 产品名称
|
||||||
target:
|
target:
|
||||||
|
|||||||
@@ -84,6 +84,25 @@ def test_rule_extracts_bracket_sections_from_instructions():
|
|||||||
assert "-20±5℃" in values["storage_condition_and_validity"]
|
assert "-20±5℃" in values["storage_condition_and_validity"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_rule_maps_agent_fields_to_manufacturer_company_for_now():
|
||||||
|
texts = {
|
||||||
|
"目标产品说明书.docx": "\n".join(
|
||||||
|
[
|
||||||
|
"生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"生产企业住所:江苏省宜兴经济技术开发区杏里路10号",
|
||||||
|
"生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
result = extract_by_rules(texts, _registration_specs())
|
||||||
|
|
||||||
|
values = {field["key"]: field["value"] for field in result["fields"]}
|
||||||
|
assert values["agent_name"] == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert values["agent_address"] == "江苏省宜兴经济技术开发区杏里路10号"
|
||||||
|
assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||||||
|
|
||||||
|
|
||||||
def test_llm_extract_parses_structured_json(monkeypatch):
|
def test_llm_extract_parses_structured_json(monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
||||||
|
|||||||
@@ -77,3 +77,35 @@ def test_merge_fields_combines_consistent_values_without_conflict():
|
|||||||
assert merged["product_name"].value == "甲胎蛋白检测试剂盒"
|
assert merged["product_name"].value == "甲胎蛋白检测试剂盒"
|
||||||
assert merged["product_name"].has_conflict is False
|
assert merged["product_name"].has_conflict is False
|
||||||
assert conflicts == []
|
assert conflicts == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_fields_fills_agent_from_applicant_for_now():
|
||||||
|
regex_results = {
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "applicant_name",
|
||||||
|
"label": "注册人名称",
|
||||||
|
"value": "卡尤迪生物科技宜兴有限公司",
|
||||||
|
"source_file": "目标产品说明书.docx",
|
||||||
|
"source_role": "说明书",
|
||||||
|
"evidence": "生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"confidence": 0.75,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "applicant_address",
|
||||||
|
"label": "注册人住所",
|
||||||
|
"value": "江苏省宜兴经济技术开发区杏里路10号",
|
||||||
|
"source_file": "目标产品说明书.docx",
|
||||||
|
"source_role": "说明书",
|
||||||
|
"evidence": "生产企业住所:江苏省宜兴经济技术开发区杏里路10号",
|
||||||
|
"confidence": 0.75,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
merged, conflicts = merge_fields(regex_results, {"fields": []})
|
||||||
|
|
||||||
|
assert merged["agent_name"].value == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert merged["agent_name"].label == "代理人名称"
|
||||||
|
assert merged["agent_address"].value == "江苏省宜兴经济技术开发区杏里路10号"
|
||||||
|
assert conflicts == []
|
||||||
|
|||||||
Reference in New Issue
Block a user