from __future__ import annotations import re from collections import defaultdict from review_agent.regulatory_review.schemas import Finding FIELDS = { "产品名称": r"产品名称[::]\s*([^\n\r]+)", "型号规格": r"型号规格[::]\s*([^\n\r]+)", "预期用途": r"预期用途[::]\s*([^\n\r]+)", "管理类别": r"管理类别[::]\s*([^\n\r]+)", "分类编码": r"分类编码[::]\s*([^\n\r]+)", "注册类型": r"注册类型[::]\s*([^\n\r]+)", "临床评价路径": r"临床评价路径[::]\s*([^\n\r]+)", } def run_consistency_check(document_texts: dict[str, str]) -> list[Finding]: findings: list[Finding] = [] for label, pattern in FIELDS.items(): values: dict[str, list[str]] = defaultdict(list) for file_name, text in document_texts.items(): match = re.search(pattern, text) if match: values[_normalize(match.group(1))].append(file_name) if len(values) <= 1: continue findings.append( Finding( rule_code=f"consistency:{label}", category="consistency", severity="high", title=f"{label}在不同文件中不一致", detail=f"发现 {len(values)} 个不同的{label}取值。", suggestion=f"请统一各注册资料中的{label}。", evidence={"field": label, "values": dict(values)}, ) ) return findings def _normalize(value: str) -> str: return " ".join(value.strip().split())