from __future__ import annotations from review_agent.regulatory_review.schemas import Finding def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[Finding]: findings: list[Finding] = [] combined_all_text = "\n".join(document_texts.values()) for requirement in rule_set.get("requirements", []): if requirement.get("structure_required") and not _contains_any( combined_all_text, [requirement.get("title", ""), *requirement.get("aliases", [])], ): findings.append( Finding( rule_code=requirement["code"], category="structure", severity=requirement.get("severity", "medium"), title=f"申报资料目录缺少{requirement['title']}章节", detail=f"未在申报资料目录或章节标题候选中发现{requirement['title']}。", suggestion=requirement.get("suggestion", ""), evidence={ "attachment4_code": requirement.get("attachment4_code"), "expected_title": requirement["title"], "aliases": requirement.get("aliases", []), }, ) ) required_sections = requirement.get("required_sections") or [] if not required_sections: continue matching_docs = _matching_documents(document_texts, requirement.get("file_keywords", [])) if not matching_docs: continue combined_text = "\n".join(matching_docs.values()) for section in required_sections: if _contains_any(combined_text, [section]): continue findings.append( Finding( rule_code=f"{requirement['code']}:{section}", category="structure", severity=requirement.get("severity", "medium"), title=f"{requirement['title']}缺少{section}章节", detail=f"已匹配{requirement['title']}文件,但未发现{section}相关内容。", suggestion=requirement.get("suggestion", ""), evidence={"section": section, "files": list(matching_docs)}, ) ) return findings def _matching_documents(document_texts: dict[str, str], keywords: list[str]) -> dict[str, str]: if not keywords: return document_texts result = {} for name, text in document_texts.items(): haystack = f"{name}\n{text}".lower() if any(str(keyword).lower() in haystack for keyword in keywords): result[name] = text return result def _contains_any(text: str, needles: list[str]) -> bool: normalized = _normalize_title(text) return any(_normalize_title(needle) in normalized for needle in needles if needle) def _normalize_title(value: str) -> str: return "".join(str(value).lower().replace("/", "").replace("/", "").split())