fix(regulatory): 优先从附件字段识别适用条件
This commit is contained in:
@@ -1,6 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import FileSummaryBatch
|
||||
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
|
||||
OPTION_FIELDS = {
|
||||
@@ -14,9 +19,14 @@ def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> d
|
||||
"""Infers review-scope conditions from the summary batch and file names."""
|
||||
|
||||
corpus_parts = [summary_batch.product_name or ""]
|
||||
field_candidates: dict[str, str] = {}
|
||||
for item in summary_batch.items.order_by("file_index"):
|
||||
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
|
||||
extracted = _extract_item_fields(item)
|
||||
field_candidates.update({key: value for key, value in extracted.items() if value and key not in field_candidates})
|
||||
corpus_parts.extend(extracted.values())
|
||||
corpus = "\n".join(part for part in corpus_parts if part)
|
||||
product_name = field_candidates.get("产品名称") or _safe_summary_product_name(summary_batch.product_name)
|
||||
|
||||
return {
|
||||
"product_category": {
|
||||
@@ -40,21 +50,42 @@ def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> d
|
||||
"product_name": {
|
||||
"label": "产品名称",
|
||||
"input_type": "text",
|
||||
"suggested": summary_batch.product_name or "",
|
||||
"suggested": product_name,
|
||||
},
|
||||
"model_spec": {
|
||||
"label": "型号规格",
|
||||
"input_type": "text",
|
||||
"suggested": "",
|
||||
"suggested": field_candidates.get("型号规格", ""),
|
||||
},
|
||||
"intended_use": {
|
||||
"label": "预期用途",
|
||||
"input_type": "text",
|
||||
"suggested": "",
|
||||
"suggested": field_candidates.get("预期用途", ""),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _extract_item_fields(item) -> dict[str, str]:
|
||||
path = Path(item.storage_path)
|
||||
if not path.is_absolute():
|
||||
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||
if not path.exists():
|
||||
return {}
|
||||
result = extract_text(path)
|
||||
if result.status != "success" or not result.field_candidates:
|
||||
return {}
|
||||
return result.field_candidates
|
||||
|
||||
|
||||
def _safe_summary_product_name(product_name: str) -> str:
|
||||
value = (product_name or "").strip()
|
||||
if not value:
|
||||
return ""
|
||||
if any(keyword in value for keyword in ["第1章", "第2章", "监管信息", "综述资料", "非临床资料", "章节目录"]):
|
||||
return ""
|
||||
return value
|
||||
|
||||
|
||||
def _detect_product_category(corpus: str) -> str:
|
||||
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
|
||||
return "体外诊断试剂"
|
||||
|
||||
Reference in New Issue
Block a user