feat(regulatory-info-package): 完善目录页码与组成成分填充
This commit is contained in:
@@ -13,6 +13,11 @@ from review_agent.regulatory_info_package.schemas import InstructionExtractResul
|
||||
|
||||
FIELD_PATTERNS = {
|
||||
"product_name": ("产品名称", r"产品名称[::\s]*([^\n\r]+)"),
|
||||
"applicant_name": ("申请人名称", r"(?:申请人名称|注册人/售后服务单位名称|注册人名称|售后服务单位名称|生产企业名称)[::\s]*([^\n\r]+)"),
|
||||
"manufacturer_name": ("生产企业名称", r"生产企业名称[::\s]*([^\n\r]+)"),
|
||||
"applicant_address": ("申请人住所", r"(?:申请人住所|注册人住所|生产企业住所)[::\s]*([^\n\r]+)"),
|
||||
"applicant_contact": ("申请人联系方式", r"(?:联系方式|联系电话|电话)[::\s]*([^\n\r]+)"),
|
||||
"production_address": ("生产地址", r"生产地址[::\s]*([^\n\r]+)"),
|
||||
"storage_condition": ("储存条件", r"(?:储存条件|贮存条件|保存条件)[::\s]*([^\n\r]+)"),
|
||||
"intended_use": ("预期用途", r"预期用途[::\s]*([^\n\r]+)"),
|
||||
"package_specification": ("包装规格", r"(?:包装规格|规格)[::\s]*([^\n\r]+)"),
|
||||
@@ -47,6 +52,24 @@ def extract_fields_by_rules(instruction: InstructionExtractResult) -> dict[str,
|
||||
"confidence": 0.75,
|
||||
"source": "rule",
|
||||
}
|
||||
component_table = _best_component_table(instruction.component_tables)
|
||||
if component_table:
|
||||
results["component_table"] = {
|
||||
"label": "主要组成成分",
|
||||
"value": json.dumps(component_table, ensure_ascii=False),
|
||||
"evidence": "说明书【主要组成成分】表格",
|
||||
"confidence": 0.86,
|
||||
"source": "rule",
|
||||
}
|
||||
component_notes = _component_notes(instruction.sections)
|
||||
if component_notes:
|
||||
results["component_notes"] = {
|
||||
"label": "主要组成成分备注",
|
||||
"value": component_notes,
|
||||
"evidence": "说明书【主要组成成分】段落",
|
||||
"confidence": 0.8,
|
||||
"source": "rule",
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
@@ -133,3 +156,16 @@ def _parse_json_object(raw: str) -> dict:
|
||||
if start == -1 or end == -1:
|
||||
return {}
|
||||
return json.loads(text[start : end + 1])
|
||||
|
||||
|
||||
def _best_component_table(component_tables: list[dict]) -> dict:
|
||||
if not component_tables:
|
||||
return {}
|
||||
return max(component_tables, key=lambda table: len(table.get("rows") or []))
|
||||
|
||||
|
||||
def _component_notes(sections: dict[str, str]) -> str:
|
||||
for key, value in sections.items():
|
||||
if "主要组成" in key:
|
||||
return value.strip()
|
||||
return ""
|
||||
|
||||
Reference in New Issue
Block a user