fix(regulatory): 优先从附件字段识别适用条件
This commit is contained in:
@@ -1,6 +1,11 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from review_agent.models import FileSummaryBatch
|
from review_agent.models import FileSummaryBatch
|
||||||
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||||
|
|
||||||
|
|
||||||
OPTION_FIELDS = {
|
OPTION_FIELDS = {
|
||||||
@@ -14,9 +19,14 @@ def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> d
|
|||||||
"""Infers review-scope conditions from the summary batch and file names."""
|
"""Infers review-scope conditions from the summary batch and file names."""
|
||||||
|
|
||||||
corpus_parts = [summary_batch.product_name or ""]
|
corpus_parts = [summary_batch.product_name or ""]
|
||||||
|
field_candidates: dict[str, str] = {}
|
||||||
for item in summary_batch.items.order_by("file_index"):
|
for item in summary_batch.items.order_by("file_index"):
|
||||||
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
|
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
|
||||||
|
extracted = _extract_item_fields(item)
|
||||||
|
field_candidates.update({key: value for key, value in extracted.items() if value and key not in field_candidates})
|
||||||
|
corpus_parts.extend(extracted.values())
|
||||||
corpus = "\n".join(part for part in corpus_parts if part)
|
corpus = "\n".join(part for part in corpus_parts if part)
|
||||||
|
product_name = field_candidates.get("产品名称") or _safe_summary_product_name(summary_batch.product_name)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"product_category": {
|
"product_category": {
|
||||||
@@ -40,21 +50,42 @@ def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> d
|
|||||||
"product_name": {
|
"product_name": {
|
||||||
"label": "产品名称",
|
"label": "产品名称",
|
||||||
"input_type": "text",
|
"input_type": "text",
|
||||||
"suggested": summary_batch.product_name or "",
|
"suggested": product_name,
|
||||||
},
|
},
|
||||||
"model_spec": {
|
"model_spec": {
|
||||||
"label": "型号规格",
|
"label": "型号规格",
|
||||||
"input_type": "text",
|
"input_type": "text",
|
||||||
"suggested": "",
|
"suggested": field_candidates.get("型号规格", ""),
|
||||||
},
|
},
|
||||||
"intended_use": {
|
"intended_use": {
|
||||||
"label": "预期用途",
|
"label": "预期用途",
|
||||||
"input_type": "text",
|
"input_type": "text",
|
||||||
"suggested": "",
|
"suggested": field_candidates.get("预期用途", ""),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_item_fields(item) -> dict[str, str]:
|
||||||
|
path = Path(item.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||||
|
if not path.exists():
|
||||||
|
return {}
|
||||||
|
result = extract_text(path)
|
||||||
|
if result.status != "success" or not result.field_candidates:
|
||||||
|
return {}
|
||||||
|
return result.field_candidates
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_summary_product_name(product_name: str) -> str:
|
||||||
|
value = (product_name or "").strip()
|
||||||
|
if not value:
|
||||||
|
return ""
|
||||||
|
if any(keyword in value for keyword in ["第1章", "第2章", "监管信息", "综述资料", "非临床资料", "章节目录"]):
|
||||||
|
return ""
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def _detect_product_category(corpus: str) -> str:
|
def _detect_product_category(corpus: str) -> str:
|
||||||
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
|
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
|
||||||
return "体外诊断试剂"
|
return "体外诊断试剂"
|
||||||
|
|||||||
@@ -49,6 +49,39 @@ def test_detect_regulatory_condition_candidates_from_summary_items(django_user_m
|
|||||||
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
|
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_prefers_attachment_fields_over_chapter_title(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒\n预期用途:用于人血清中甲胎蛋白检测\n注册类型:首次注册\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="1. 监管信息 / 1.2 申请表",
|
||||||
|
file_name="申请表.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="1.监管信息/申请表.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
|
||||||
|
assert candidates["model_spec"]["suggested"] == "20人份/盒"
|
||||||
|
assert candidates["intended_use"]["suggested"] == "用于人血清中甲胎蛋白检测"
|
||||||
|
|
||||||
|
|
||||||
def test_workflow_pauses_before_rule_scope_until_conditions_confirmed(settings, tmp_path, django_user_model):
|
def test_workflow_pauses_before_rule_scope_until_conditions_confirmed(settings, tmp_path, django_user_model):
|
||||||
settings.MEDIA_ROOT = tmp_path
|
settings.MEDIA_ROOT = tmp_path
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
|||||||
Reference in New Issue
Block a user