125 lines
4.6 KiB
Python
125 lines
4.6 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from django.conf import settings
|
|
|
|
from review_agent.models import FileSummaryBatch
|
|
from review_agent.regulatory_review.services.llm_review import review_condition_fields
|
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
|
|
|
|
|
OPTION_FIELDS = {
|
|
"product_category": ["体外诊断试剂", "医疗器械", "其他"],
|
|
"registration_type": ["首次注册", "变更注册", "延续注册"],
|
|
"clinical_evaluation_path": ["临床试验", "免临床", "同品种比对", "待确认"],
|
|
}
|
|
|
|
|
|
def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> dict[str, dict[str, object]]:
|
|
"""Infers review-scope conditions from the summary batch and file names."""
|
|
|
|
corpus_parts = [summary_batch.product_name or ""]
|
|
field_candidates: dict[str, str] = {}
|
|
field_sources: dict[str, str] = {}
|
|
for item in summary_batch.items.order_by("file_index"):
|
|
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
|
|
review = _extract_item_fields(item)
|
|
extracted = review.get("selected_fields", {})
|
|
sources = review.get("selected_sources", {})
|
|
field_candidates.update({key: value for key, value in extracted.items() if value and key not in field_candidates})
|
|
field_sources.update({key: value for key, value in sources.items() if value and key not in field_sources})
|
|
corpus_parts.extend(extracted.values())
|
|
corpus = "\n".join(part for part in corpus_parts if part)
|
|
product_name = field_candidates.get("产品名称") or _safe_summary_product_name(summary_batch.product_name)
|
|
|
|
return {
|
|
"product_category": {
|
|
"label": "产品类别",
|
|
"input_type": "select",
|
|
"options": OPTION_FIELDS["product_category"],
|
|
"suggested": _detect_product_category(corpus),
|
|
},
|
|
"registration_type": {
|
|
"label": "注册类型",
|
|
"input_type": "select",
|
|
"options": OPTION_FIELDS["registration_type"],
|
|
"suggested": _detect_registration_type(corpus),
|
|
},
|
|
"clinical_evaluation_path": {
|
|
"label": "临床评价路径",
|
|
"input_type": "select",
|
|
"options": OPTION_FIELDS["clinical_evaluation_path"],
|
|
"suggested": _detect_clinical_path(corpus),
|
|
},
|
|
"product_name": {
|
|
"label": "产品名称",
|
|
"input_type": "text",
|
|
"suggested": product_name,
|
|
"source": field_sources.get("产品名称", "summary" if product_name else ""),
|
|
},
|
|
"model_spec": {
|
|
"label": "型号规格",
|
|
"input_type": "text",
|
|
"suggested": field_candidates.get("型号规格", ""),
|
|
"source": field_sources.get("型号规格", ""),
|
|
},
|
|
"intended_use": {
|
|
"label": "预期用途",
|
|
"input_type": "text",
|
|
"suggested": field_candidates.get("预期用途", ""),
|
|
"source": field_sources.get("预期用途", ""),
|
|
},
|
|
}
|
|
|
|
|
|
def _extract_item_fields(item) -> dict[str, object]:
|
|
path = Path(item.storage_path)
|
|
if not path.is_absolute():
|
|
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
|
if not path.exists():
|
|
return {}
|
|
result = extract_text(path)
|
|
if result.status != "success" or not result.field_candidates:
|
|
return {}
|
|
return review_condition_fields(
|
|
text=result.front_text or result.text,
|
|
rule_fields=result.field_candidates,
|
|
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
|
|
)
|
|
|
|
|
|
def _safe_summary_product_name(product_name: str) -> str:
|
|
value = (product_name or "").strip()
|
|
if not value:
|
|
return ""
|
|
if any(keyword in value for keyword in ["第1章", "第2章", "监管信息", "综述资料", "非临床资料", "章节目录"]):
|
|
return ""
|
|
return value
|
|
|
|
|
|
def _detect_product_category(corpus: str) -> str:
|
|
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
|
|
return "体外诊断试剂"
|
|
if "医疗器械" in corpus:
|
|
return "医疗器械"
|
|
return "其他"
|
|
|
|
|
|
def _detect_registration_type(corpus: str) -> str:
|
|
if "延续" in corpus:
|
|
return "延续注册"
|
|
if "变更" in corpus:
|
|
return "变更注册"
|
|
return "首次注册"
|
|
|
|
|
|
def _detect_clinical_path(corpus: str) -> str:
|
|
if "免临床" in corpus or "免于临床" in corpus:
|
|
return "免临床"
|
|
if "同品种" in corpus or "同类" in corpus:
|
|
return "同品种比对"
|
|
if "临床试验" in corpus:
|
|
return "临床试验"
|
|
return "待确认"
|