60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from collections import defaultdict
|
||
from collections.abc import Callable
|
||
|
||
from review_agent.regulatory_review.schemas import Finding
|
||
|
||
|
||
FIELDS = {
|
||
"产品名称": r"产品名称[::]\s*([^\n\r]+)",
|
||
"型号规格": r"型号规格[::]\s*([^\n\r]+)",
|
||
"预期用途": r"预期用途[::]\s*([^\n\r]+)",
|
||
"管理类别": r"管理类别[::]\s*([^\n\r]+)",
|
||
"分类编码": r"分类编码[::]\s*([^\n\r]+)",
|
||
"注册类型": r"注册类型[::]\s*([^\n\r]+)",
|
||
"临床评价路径": r"临床评价路径[::]\s*([^\n\r]+)",
|
||
}
|
||
|
||
|
||
def run_consistency_check(
|
||
document_texts: dict[str, str],
|
||
progress_callback: Callable[[dict[str, object]], None] | None = None,
|
||
) -> list[Finding]:
|
||
findings: list[Finding] = []
|
||
fields = list(FIELDS.items())
|
||
total = len(fields)
|
||
for index, (label, pattern) in enumerate(fields, start=1):
|
||
values: dict[str, list[str]] = defaultdict(list)
|
||
for file_name, text in document_texts.items():
|
||
match = re.search(pattern, text)
|
||
if match:
|
||
values[_normalize(match.group(1))].append(file_name)
|
||
if len(values) > 1:
|
||
findings.append(
|
||
Finding(
|
||
rule_code=f"consistency:{label}",
|
||
category="consistency",
|
||
severity="high",
|
||
title=f"{label}在不同文件中不一致",
|
||
detail=f"发现 {len(values)} 个不同的{label}取值。",
|
||
suggestion=f"请统一各注册资料中的{label}。",
|
||
evidence={"field": label, "values": dict(values)},
|
||
)
|
||
)
|
||
if progress_callback:
|
||
progress_callback(
|
||
{
|
||
"processed": index,
|
||
"total": total,
|
||
"label": label,
|
||
"finding_count": len(findings),
|
||
}
|
||
)
|
||
return findings
|
||
|
||
|
||
def _normalize(value: str) -> str:
|
||
return " ".join(value.strip().split())
|