fix(application-form-fill): 过滤申请表噪声冲突内容
This commit is contained in:
@@ -17,11 +17,11 @@ from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
FIELD_ALIASES = {
|
||||
"product_name": ["产品名称"],
|
||||
"applicant_name": ["注册人名称", "生产企业名称", "企业名称", "生产企业"],
|
||||
"applicant_address": ["注册人住所", "生产企业住所", "企业住所", "住所"],
|
||||
"applicant_name": ["注册人名称", "申请人名称", "生产企业名称"],
|
||||
"applicant_address": ["注册人住所", "申请人住所", "生产企业住所"],
|
||||
"manufacturer_address": ["生产地址", "生产企业地址", "生产场所"],
|
||||
"agent_name": ["代理人名称", "生产企业名称", "企业名称", "生产企业", "注册人名称"],
|
||||
"agent_address": ["代理人住所", "生产企业住所", "企业住所", "住所", "注册人住所"],
|
||||
"agent_name": ["代理人名称", "生产企业名称", "注册人名称", "申请人名称"],
|
||||
"agent_address": ["代理人住所", "生产企业住所", "注册人住所", "申请人住所"],
|
||||
"package_specification": ["包装规格", "规格"],
|
||||
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
||||
"intended_use": ["预期用途"],
|
||||
@@ -35,6 +35,14 @@ STATIC_STOP_LABELS = [
|
||||
"注",
|
||||
"保证书",
|
||||
"应附资料",
|
||||
"优先通道申请",
|
||||
"分类编码",
|
||||
"医疗器械唯一标识",
|
||||
"注册产品目前是否",
|
||||
"临床评价路径",
|
||||
"临床试验",
|
||||
"其他需要说明的问题",
|
||||
"国家药监局器审中心医疗器械",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -22,10 +22,11 @@ def build_assistant_summary(batch: ApplicationFormFillBatch, exports: list[Expor
|
||||
lines.extend(["", "| 冲突字段 | 采用值 | 冲突来源 | 处理 |", "| --- | --- | --- | --- |"])
|
||||
for item in conflicts:
|
||||
conflict_sources = ";".join(
|
||||
f"{value.get('source_file', '')}:{value.get('value', '')}" for value in item.get("conflict_values", [])
|
||||
f"{_compact_table_text(value.get('source_file', ''))}:{_compact_table_text(value.get('value', ''))}"
|
||||
for value in item.get("conflict_values", [])
|
||||
)
|
||||
lines.append(
|
||||
f"| {item.get('field_label', item.get('field_key', ''))} | {item.get('selected_value', '')} | {conflict_sources or '-'} | {item.get('handling', '')} |"
|
||||
f"| {_compact_table_text(item.get('field_label', item.get('field_key', '')))} | {_compact_table_text(item.get('selected_value', ''))} | {_compact_table_text(conflict_sources or '-')} | {_compact_table_text(item.get('handling', ''))} |"
|
||||
)
|
||||
|
||||
if trace_exports:
|
||||
@@ -33,3 +34,10 @@ def build_assistant_summary(batch: ApplicationFormFillBatch, exports: list[Expor
|
||||
for export in trace_exports:
|
||||
lines.append(f"[下载{export.file_name}](/api/review-agent/file-summary/exports/{export.pk}/download/)")
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
def _compact_table_text(value: object, *, limit: int = 80) -> str:
|
||||
text = " ".join(str(value or "").replace("|", " ").split())
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return f"{text[:limit]}..."
|
||||
|
||||
@@ -124,6 +124,26 @@ def test_rule_stops_product_name_before_application_form_instructions():
|
||||
assert "填表说明" not in values["product_name"]
|
||||
|
||||
|
||||
def test_rule_ignores_generic_enterprise_name_from_application_form():
|
||||
texts = {
|
||||
"CH1.4 申请表.docx": "\n".join(
|
||||
[
|
||||
"注册人制度\t是 企业名称:否",
|
||||
"优先通道申请 应急通道 同品种首个产品首次申报",
|
||||
"临床试验",
|
||||
"临床试验机构名称: 中国医学科学院北京协和医院、晋中市第一人民医院",
|
||||
"应附资料",
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
result = extract_by_rules(texts, _registration_specs())
|
||||
|
||||
values = {field["key"]: field["value"] for field in result["fields"]}
|
||||
assert "applicant_name" not in values
|
||||
assert "agent_name" not in values
|
||||
|
||||
|
||||
def test_llm_extract_parses_structured_json(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
||||
|
||||
39
tests/test_application_form_fill_summary.py
Normal file
39
tests/test_application_form_fill_summary.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import pytest
|
||||
|
||||
from review_agent.application_form_fill.services.summary import build_assistant_summary
|
||||
from review_agent.models import ApplicationFormFillBatch, Conversation, FileSummaryBatch
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_assistant_summary_compacts_long_conflict_values(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-SUMMARY")
|
||||
batch = ApplicationFormFillBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
source_summary_batch=summary,
|
||||
batch_no="AFF-SUMMARY",
|
||||
conflict_summary=[
|
||||
{
|
||||
"field_key": "applicant_name",
|
||||
"field_label": "注册人名称",
|
||||
"selected_value": "卡尤迪生物科技宜兴有限公司",
|
||||
"conflict_values": [
|
||||
{
|
||||
"source_file": "CH1.4 申请表.docx",
|
||||
"value": "否\n临床试验\n临床试验机构名称: 中国医学科学院北京协和医院、晋中市第一人民医院、北京市疾病预防控制中心 临床数据库.zip\n应附资料",
|
||||
}
|
||||
],
|
||||
"handling": "说明书优先,模板内黄底红字高亮",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
content = build_assistant_summary(batch, [])
|
||||
|
||||
assert "临床试验机构名称" in content
|
||||
assert len([line for line in content.splitlines() if "临床试验机构名称" in line][0]) < 220
|
||||
assert "\n临床试验\n" not in content
|
||||
Reference in New Issue
Block a user