fix(application-form-fill): 清理填表说明并收窄按钮话术
This commit is contained in:
@@ -28,6 +28,15 @@ FIELD_ALIASES = {
|
|||||||
"storage_condition_and_validity": ["产品储存条件及有效期", "储存条件及有效期", "储存条件", "有效期"],
|
"storage_condition_and_validity": ["产品储存条件及有效期", "储存条件及有效期", "储存条件", "有效期"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STATIC_STOP_LABELS = [
|
||||||
|
"申请人",
|
||||||
|
"国家药品监督管理局",
|
||||||
|
"填表说明",
|
||||||
|
"注",
|
||||||
|
"保证书",
|
||||||
|
"应附资料",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||||
texts: dict[str, str] = {}
|
texts: dict[str, str] = {}
|
||||||
@@ -180,7 +189,7 @@ def _field_aliases(field: dict[str, str]) -> list[str]:
|
|||||||
|
|
||||||
|
|
||||||
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||||
labels: list[str] = []
|
labels: list[str] = list(STATIC_STOP_LABELS)
|
||||||
for field in fields:
|
for field in fields:
|
||||||
for label in _field_aliases(field):
|
for label in _field_aliases(field):
|
||||||
if label not in labels:
|
if label not in labels:
|
||||||
@@ -194,7 +203,7 @@ def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str,
|
|||||||
|
|
||||||
def _extract_colon_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
def _extract_colon_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||||
escaped_labels = "|".join(re.escape(item) for item in labels if item != label)
|
escaped_labels = "|".join(re.escape(item) for item in labels if item != label)
|
||||||
stop_pattern = rf"(?=\n\s*(?:{escaped_labels})\s*[::])" if escaped_labels else r"(?=\Z)"
|
stop_pattern = rf"(?=\n\s*(?:{escaped_labels})(?:\s*[::]|\s*$))" if escaped_labels else r"(?=\Z)"
|
||||||
pattern = re.compile(rf"{re.escape(label)}\s*[::]\s*(.+?)(?:{stop_pattern}|\Z)", re.S)
|
pattern = re.compile(rf"{re.escape(label)}\s*[::]\s*(.+?)(?:{stop_pattern}|\Z)", re.S)
|
||||||
match = pattern.search(text or "")
|
match = pattern.search(text or "")
|
||||||
if not match:
|
if not match:
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ def fill_template(
|
|||||||
conflicts: list[dict] | None = None,
|
conflicts: list[dict] | None = None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
document = Document(str(template_path))
|
document = Document(str(template_path))
|
||||||
|
remove_fill_instructions(document)
|
||||||
conflict_keys = {item.get("field_key") for item in conflicts or []}
|
conflict_keys = {item.get("field_key") for item in conflicts or []}
|
||||||
for field_config in spec.fields:
|
for field_config in spec.fields:
|
||||||
target = field_config.get("target") or {}
|
target = field_config.get("target") or {}
|
||||||
@@ -43,6 +44,25 @@ def fill_template(
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def remove_fill_instructions(document: Document) -> None:
|
||||||
|
removing = False
|
||||||
|
for paragraph in list(document.paragraphs):
|
||||||
|
text = _normalize_label(paragraph.text)
|
||||||
|
if text == "填表说明":
|
||||||
|
removing = True
|
||||||
|
if removing:
|
||||||
|
_remove_paragraph(paragraph)
|
||||||
|
continue
|
||||||
|
if text.startswith("注填表前") and "填表说明" in text:
|
||||||
|
_remove_paragraph(paragraph)
|
||||||
|
|
||||||
|
for table in document.tables:
|
||||||
|
for row in list(table.rows):
|
||||||
|
row_text = _normalize_label("".join(cell.text for cell in row.cells))
|
||||||
|
if row_text == "填表说明" or row_text.startswith("注填表前"):
|
||||||
|
_remove_row(row)
|
||||||
|
|
||||||
|
|
||||||
def fill_table_row(document: Document, row_label: str, value: str, *, conflict: bool = False) -> bool:
|
def fill_table_row(document: Document, row_label: str, value: str, *, conflict: bool = False) -> bool:
|
||||||
normalized_label = _normalize_label(row_label)
|
normalized_label = _normalize_label(row_label)
|
||||||
for table in document.tables:
|
for table in document.tables:
|
||||||
@@ -71,6 +91,15 @@ def apply_cell_shading(cell, fill: str) -> None:
|
|||||||
shading.set(qn("w:fill"), fill)
|
shading.set(qn("w:fill"), fill)
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_paragraph(paragraph) -> None:
|
||||||
|
element = paragraph._element
|
||||||
|
element.getparent().remove(element)
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_row(row) -> None:
|
||||||
|
row._tr.getparent().remove(row._tr)
|
||||||
|
|
||||||
|
|
||||||
def create_word_export(
|
def create_word_export(
|
||||||
batch: ApplicationFormFillBatch,
|
batch: ApplicationFormFillBatch,
|
||||||
spec: TemplateSpec,
|
spec: TemplateSpec,
|
||||||
|
|||||||
@@ -211,7 +211,7 @@
|
|||||||
<button
|
<button
|
||||||
class="tool-chip"
|
class="tool-chip"
|
||||||
type="button"
|
type="button"
|
||||||
data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板,优先生成注册证 Word 和字段来源追溯清单。"
|
data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板"
|
||||||
>申报文件填表</button>
|
>申报文件填表</button>
|
||||||
</div>
|
</div>
|
||||||
<button class="send-button" type="submit" id="sendButton">发送</button>
|
<button class="send-button" type="submit" id="sendButton">发送</button>
|
||||||
|
|||||||
@@ -103,6 +103,27 @@ def test_rule_maps_agent_fields_to_manufacturer_company_for_now():
|
|||||||
assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
assert values["manufacturer_address"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||||||
|
|
||||||
|
|
||||||
|
def test_rule_stops_product_name_before_application_form_instructions():
|
||||||
|
texts = {
|
||||||
|
"境内体外诊断试剂注册申请表.docx": "\n".join(
|
||||||
|
[
|
||||||
|
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)",
|
||||||
|
"申请人:",
|
||||||
|
"卡尤迪生物科技宜兴有限公司",
|
||||||
|
"国家药品监督管理局",
|
||||||
|
"填表说明",
|
||||||
|
"1. 本表依据《体外诊断注册与备案管理办法》制定。",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
result = extract_by_rules(texts, _registration_specs())
|
||||||
|
|
||||||
|
values = {field["key"]: field["value"] for field in result["fields"]}
|
||||||
|
assert values["product_name"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)"
|
||||||
|
assert "填表说明" not in values["product_name"]
|
||||||
|
|
||||||
|
|
||||||
def test_llm_extract_parses_structured_json(monkeypatch):
|
def test_llm_extract_parses_structured_json(monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
"review_agent.application_form_fill.services.field_extract.generate_completion",
|
||||||
|
|||||||
@@ -41,6 +41,17 @@ def _template(path):
|
|||||||
document.save(path)
|
document.save(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _template_with_instructions(path):
|
||||||
|
document = Document()
|
||||||
|
table = document.add_table(rows=2, cols=2)
|
||||||
|
table.rows[0].cells[0].text = "产品名称"
|
||||||
|
table.rows[1].cells[0].text = "预期用途"
|
||||||
|
document.add_paragraph("填表说明")
|
||||||
|
document.add_paragraph("1. 本表依据《体外诊断注册与备案管理办法》制定。")
|
||||||
|
document.add_paragraph("2. 本表可从国家药品监督管理局网站下载。")
|
||||||
|
document.save(path)
|
||||||
|
|
||||||
|
|
||||||
def test_word_fill_writes_table_rows(tmp_path):
|
def test_word_fill_writes_table_rows(tmp_path):
|
||||||
template_path = tmp_path / "template.docx"
|
template_path = tmp_path / "template.docx"
|
||||||
output_path = tmp_path / "filled.docx"
|
output_path = tmp_path / "filled.docx"
|
||||||
@@ -61,6 +72,27 @@ def test_word_fill_writes_table_rows(tmp_path):
|
|||||||
assert document.tables[0].rows[1].cells[1].text == "用于体外检测"
|
assert document.tables[0].rows[1].cells[1].text == "用于体外检测"
|
||||||
|
|
||||||
|
|
||||||
|
def test_word_fill_removes_template_fill_instructions(tmp_path):
|
||||||
|
template_path = tmp_path / "template.docx"
|
||||||
|
output_path = tmp_path / "filled.docx"
|
||||||
|
_template_with_instructions(template_path)
|
||||||
|
|
||||||
|
fill_template(
|
||||||
|
template_path,
|
||||||
|
output_path,
|
||||||
|
_spec(),
|
||||||
|
{
|
||||||
|
"product_name": MergedField("product_name", "产品名称", "甲胎蛋白检测试剂盒", "说明书.txt", "证据", 0.8),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
document = Document(output_path)
|
||||||
|
text = "\n".join(paragraph.text for paragraph in document.paragraphs)
|
||||||
|
assert "填表说明" not in text
|
||||||
|
assert "本表依据" not in text
|
||||||
|
assert document.tables[0].rows[0].cells[1].text == "甲胎蛋白检测试剂盒"
|
||||||
|
|
||||||
|
|
||||||
def test_word_fill_highlights_conflict_in_docx_xml(tmp_path):
|
def test_word_fill_highlights_conflict_in_docx_xml(tmp_path):
|
||||||
template_path = tmp_path / "template.docx"
|
template_path = tmp_path / "template.docx"
|
||||||
output_path = tmp_path / "filled.docx"
|
output_path = tmp_path / "filled.docx"
|
||||||
|
|||||||
@@ -251,6 +251,7 @@ def test_workspace_tool_buttons_fill_default_prompts(client, django_user_model):
|
|||||||
assert ">风险预警</button>" not in content
|
assert ">风险预警</button>" not in content
|
||||||
assert 'data-prompt-template="请对当前对话已上传的文件或压缩包自动汇总文件目录' in content
|
assert 'data-prompt-template="请对当前对话已上传的文件或压缩包自动汇总文件目录' in content
|
||||||
assert 'data-prompt-template="请对当前对话最近成功汇总的注册资料发起 NMPA 法规核查与风险预警' in content
|
assert 'data-prompt-template="请对当前对话最近成功汇总的注册资料发起 NMPA 法规核查与风险预警' in content
|
||||||
assert 'data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板' in content
|
assert 'data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板"' in content
|
||||||
|
assert "优先生成注册证 Word 和字段来源追溯清单" not in content
|
||||||
assert "bindPromptTemplateButtons" in script
|
assert "bindPromptTemplateButtons" in script
|
||||||
assert "promptInput.value = template" in script
|
assert "promptInput.value = template" in script
|
||||||
|
|||||||
Reference in New Issue
Block a user