fix(application-form-fill): 代理人字段暂用生产企业信息
This commit is contained in:
@@ -17,6 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
FIELD_ALIASES = {
|
||||
"product_name": ["产品名称"],
|
||||
"applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"],
|
||||
"applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"],
|
||||
"manufacturer_address": ["生产地址", "生产企业地址", "生产场所"],
|
||||
"agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"],
|
||||
"agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"],
|
||||
"package_specification": ["包装规格", "规格"],
|
||||
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
||||
"intended_use": ["预期用途"],
|
||||
@@ -41,7 +46,7 @@ def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||
fields: list[dict[str, Any]] = []
|
||||
field_defs = _field_defs(specs)
|
||||
labels = [field["label"] for field in field_defs if field.get("label")]
|
||||
labels = _all_field_labels(field_defs)
|
||||
for file_name, text in texts.items():
|
||||
source_role = detect_source_role(file_name, text)
|
||||
for field in field_defs:
|
||||
@@ -174,6 +179,15 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
|
||||
return result
|
||||
|
||||
|
||||
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||
labels: list[str] = []
|
||||
for field in fields:
|
||||
for label in _field_aliases(field):
|
||||
if label not in labels:
|
||||
labels.append(label)
|
||||
return labels
|
||||
|
||||
|
||||
def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||
return _extract_colon_label_value(text, label, labels)
|
||||
|
||||
|
||||
@@ -81,8 +81,30 @@ def merge_fields(regex_results: dict[str, Any], llm_results: dict[str, Any]) ->
|
||||
"handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段",
|
||||
}
|
||||
)
|
||||
_apply_agent_company_fallbacks(merged)
|
||||
return merged, conflicts
|
||||
|
||||
|
||||
def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]:
|
||||
return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")}
|
||||
|
||||
|
||||
def _apply_agent_company_fallbacks(merged: dict[str, MergedField]) -> None:
|
||||
fallback_pairs = {
|
||||
"agent_name": ("applicant_name", "代理人名称"),
|
||||
"agent_address": ("applicant_address", "代理人住所"),
|
||||
}
|
||||
for target_key, (source_key, target_label) in fallback_pairs.items():
|
||||
if target_key in merged or source_key not in merged:
|
||||
continue
|
||||
source = merged[source_key]
|
||||
merged[target_key] = MergedField(
|
||||
key=target_key,
|
||||
label=target_label,
|
||||
value=source.value,
|
||||
source_file=source.source_file,
|
||||
evidence=source.evidence,
|
||||
confidence=source.confidence,
|
||||
has_conflict=source.has_conflict,
|
||||
conflict_values=source.conflict_values,
|
||||
)
|
||||
|
||||
@@ -36,6 +36,24 @@ templates:
|
||||
source_roles:
|
||||
- 申请表
|
||||
- 质量管理体系文件
|
||||
- key: agent_name
|
||||
label: 代理人名称
|
||||
target:
|
||||
type: table_row
|
||||
row_label: 代理人名称
|
||||
source_roles:
|
||||
- 说明书
|
||||
- 企业信息
|
||||
- 申请表
|
||||
- key: agent_address
|
||||
label: 代理人住所
|
||||
target:
|
||||
type: table_row
|
||||
row_label: 代理人住所
|
||||
source_roles:
|
||||
- 说明书
|
||||
- 企业信息
|
||||
- 申请表
|
||||
- key: product_name
|
||||
label: 产品名称
|
||||
target:
|
||||
|
||||
Reference in New Issue
Block a user