42 lines
1.7 KiB
Python
42 lines
1.7 KiB
Python
from __future__ import annotations
|
|
|
|
from review_agent.regulatory_review.schemas import Finding
|
|
|
|
|
|
def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[Finding]:
|
|
findings: list[Finding] = []
|
|
for requirement in rule_set.get("requirements", []):
|
|
required_sections = requirement.get("required_sections") or []
|
|
if not required_sections:
|
|
continue
|
|
matching_docs = _matching_documents(document_texts, requirement.get("file_keywords", []))
|
|
if not matching_docs:
|
|
continue
|
|
combined_text = "\n".join(matching_docs.values())
|
|
for section in required_sections:
|
|
if section in combined_text:
|
|
continue
|
|
findings.append(
|
|
Finding(
|
|
rule_code=f"{requirement['code']}:{section}",
|
|
category="structure",
|
|
severity=requirement.get("severity", "medium"),
|
|
title=f"{requirement['title']}缺少{section}章节",
|
|
detail=f"已匹配{requirement['title']}文件,但未发现{section}相关内容。",
|
|
suggestion=requirement.get("suggestion", ""),
|
|
evidence={"section": section, "files": list(matching_docs)},
|
|
)
|
|
)
|
|
return findings
|
|
|
|
|
|
def _matching_documents(document_texts: dict[str, str], keywords: list[str]) -> dict[str, str]:
|
|
if not keywords:
|
|
return document_texts
|
|
result = {}
|
|
for name, text in document_texts.items():
|
|
haystack = f"{name}\n{text}".lower()
|
|
if any(str(keyword).lower() in haystack for keyword in keywords):
|
|
result[name] = text
|
|
return result
|