feat(regulatory-info-package): 完善目录页码与组成成分填充

This commit is contained in:
2026-06-10 23:56:40 +08:00
parent 3bcf9647a1
commit 1bf8634373
13 changed files with 296 additions and 82 deletions

View File

@@ -13,6 +13,11 @@ from review_agent.regulatory_info_package.schemas import InstructionExtractResul
FIELD_PATTERNS = {
"product_name": ("产品名称", r"产品名称[:\s]*([^\n\r]+)"),
"applicant_name": ("申请人名称", r"(?:申请人名称|注册人/售后服务单位名称|注册人名称|售后服务单位名称|生产企业名称)[:\s]*([^\n\r]+)"),
"manufacturer_name": ("生产企业名称", r"生产企业名称[:\s]*([^\n\r]+)"),
"applicant_address": ("申请人住所", r"(?:申请人住所|注册人住所|生产企业住所)[:\s]*([^\n\r]+)"),
"applicant_contact": ("申请人联系方式", r"(?:联系方式|联系电话|电话)[:\s]*([^\n\r]+)"),
"production_address": ("生产地址", r"生产地址[:\s]*([^\n\r]+)"),
"storage_condition": ("储存条件", r"(?:储存条件|贮存条件|保存条件)[:\s]*([^\n\r]+)"),
"intended_use": ("预期用途", r"预期用途[:\s]*([^\n\r]+)"),
"package_specification": ("包装规格", r"(?:包装规格|规格)[:\s]*([^\n\r]+)"),
@@ -47,6 +52,24 @@ def extract_fields_by_rules(instruction: InstructionExtractResult) -> dict[str,
"confidence": 0.75,
"source": "rule",
}
component_table = _best_component_table(instruction.component_tables)
if component_table:
results["component_table"] = {
"label": "主要组成成分",
"value": json.dumps(component_table, ensure_ascii=False),
"evidence": "说明书【主要组成成分】表格",
"confidence": 0.86,
"source": "rule",
}
component_notes = _component_notes(instruction.sections)
if component_notes:
results["component_notes"] = {
"label": "主要组成成分备注",
"value": component_notes,
"evidence": "说明书【主要组成成分】段落",
"confidence": 0.8,
"source": "rule",
}
return results
@@ -133,3 +156,16 @@ def _parse_json_object(raw: str) -> dict:
if start == -1 or end == -1:
return {}
return json.loads(text[start : end + 1])
def _best_component_table(component_tables: list[dict]) -> dict:
if not component_tables:
return {}
return max(component_tables, key=lambda table: len(table.get("rows") or []))
def _component_notes(sections: dict[str, str]) -> str:
for key, value in sections.items():
if "主要组成" in key:
return value.strip()
return ""