Files
DEMO-AGENT/review_agent/regulatory_review/services/consistency_check.py

60 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from collections import defaultdict
from collections.abc import Callable
from review_agent.regulatory_review.schemas import Finding
FIELDS = {
"产品名称": r"产品名称[:]\s*([^\n\r]+)",
"型号规格": r"型号规格[:]\s*([^\n\r]+)",
"预期用途": r"预期用途[:]\s*([^\n\r]+)",
"管理类别": r"管理类别[:]\s*([^\n\r]+)",
"分类编码": r"分类编码[:]\s*([^\n\r]+)",
"注册类型": r"注册类型[:]\s*([^\n\r]+)",
"临床评价路径": r"临床评价路径[:]\s*([^\n\r]+)",
}
def run_consistency_check(
document_texts: dict[str, str],
progress_callback: Callable[[dict[str, object]], None] | None = None,
) -> list[Finding]:
findings: list[Finding] = []
fields = list(FIELDS.items())
total = len(fields)
for index, (label, pattern) in enumerate(fields, start=1):
values: dict[str, list[str]] = defaultdict(list)
for file_name, text in document_texts.items():
match = re.search(pattern, text)
if match:
values[_normalize(match.group(1))].append(file_name)
if len(values) > 1:
findings.append(
Finding(
rule_code=f"consistency:{label}",
category="consistency",
severity="high",
title=f"{label}在不同文件中不一致",
detail=f"发现 {len(values)} 个不同的{label}取值。",
suggestion=f"请统一各注册资料中的{label}",
evidence={"field": label, "values": dict(values)},
)
)
if progress_callback:
progress_callback(
{
"processed": index,
"total": total,
"label": label,
"finding_count": len(findings),
}
)
return findings
def _normalize(value: str) -> str:
return " ".join(value.strip().split())