fix(application-form-fill): 过滤申请表噪声冲突内容

This commit is contained in:
2026-06-07 20:34:24 +08:00
parent d640ced748
commit 003ff59268
4 changed files with 81 additions and 6 deletions

View File

@@ -124,6 +124,26 @@ def test_rule_stops_product_name_before_application_form_instructions():
assert "填表说明" not in values["product_name"]
def test_rule_ignores_generic_enterprise_name_from_application_form():
texts = {
"CH1.4 申请表.docx": "\n".join(
[
"注册人制度\t是 企业名称:否",
"优先通道申请 应急通道 同品种首个产品首次申报",
"临床试验",
"临床试验机构名称: 中国医学科学院北京协和医院、晋中市第一人民医院",
"应附资料",
]
)
}
result = extract_by_rules(texts, _registration_specs())
values = {field["key"]: field["value"] for field in result["fields"]}
assert "applicant_name" not in values
assert "agent_name" not in values
def test_llm_extract_parses_structured_json(monkeypatch):
monkeypatch.setattr(
"review_agent.application_form_fill.services.field_extract.generate_completion",