feat(regulatory): 对齐附件4目录核查规则
This commit is contained in:
@@ -107,12 +107,19 @@ def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
||||
try:
|
||||
text = extract_text_from_path(path)
|
||||
except RuntimeError as exc:
|
||||
if _is_attachment4(path):
|
||||
raise RuntimeError(f"附件 4 核心法规材料抽取失败:{path.name}") from exc
|
||||
logger.warning("Regulatory source extraction skipped", extra={"path": str(path), "error": str(exc)})
|
||||
continue
|
||||
chunks.extend(chunk_text(text, source=str(path.relative_to(source_dir))))
|
||||
return chunks
|
||||
|
||||
|
||||
def _is_attachment4(path: Path) -> bool:
|
||||
normalized = path.name.replace(" ", "")
|
||||
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
|
||||
|
||||
|
||||
def build_chroma_index(
|
||||
*,
|
||||
source_dir: Path,
|
||||
|
||||
Reference in New Issue
Block a user