From e48d44f832df8759b5396d85bb2a4b2fca92f239 Mon Sep 17 00:00:00 2001 From: bruce Date: Sun, 7 Jun 2026 18:23:06 +0800 Subject: [PATCH] =?UTF-8?q?feat(application-form-fill):=20=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E6=A8=A1=E6=9D=BF=E9=85=8D=E7=BD=AE=E9=AA=A8=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../application_form_fill/__init__.py | 1 + .../application_form_fill/constants.py | 31 +++++ review_agent/application_form_fill/schemas.py | 58 +++++++++ .../services/__init__.py | 1 + .../services/template_config.py | 96 +++++++++++++++ review_agent/application_form_fill/storage.py | 55 +++++++++ .../application_form_templates_v1.yaml | 112 ++++++++++++++++++ review_agent/application_form_fill/views.py | 7 ++ .../application_form_fill/workflow.py | 21 ++++ ...t_application_form_fill_template_config.py | 97 +++++++++++++++ 10 files changed, 479 insertions(+) create mode 100644 review_agent/application_form_fill/__init__.py create mode 100644 review_agent/application_form_fill/constants.py create mode 100644 review_agent/application_form_fill/schemas.py create mode 100644 review_agent/application_form_fill/services/__init__.py create mode 100644 review_agent/application_form_fill/services/template_config.py create mode 100644 review_agent/application_form_fill/storage.py create mode 100644 review_agent/application_form_fill/templates/application_form_templates_v1.yaml create mode 100644 review_agent/application_form_fill/views.py create mode 100644 review_agent/application_form_fill/workflow.py create mode 100644 tests/test_application_form_fill_template_config.py diff --git a/review_agent/application_form_fill/__init__.py b/review_agent/application_form_fill/__init__.py new file mode 100644 index 0000000..3a7b8c0 --- /dev/null +++ b/review_agent/application_form_fill/__init__.py @@ -0,0 +1 @@ +"""Application form auto-fill workflow package.""" diff --git a/review_agent/application_form_fill/constants.py b/review_agent/application_form_fill/constants.py new file mode 100644 index 0000000..2fc91ba --- /dev/null +++ b/review_agent/application_form_fill/constants.py @@ -0,0 +1,31 @@ +WORKFLOW_TYPE = "application_form_fill" + +TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate" +TEMPLATE_CHANGE_REGISTRATION = "change_registration" +TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles" + +DEFAULT_OUTPUT_TYPES = ["word", "excel", "json"] + +FORM_FILL_TRIGGER_KEYWORDS = [ + "填注册证", + "对应的表格", + "生成申报模板", + "安全和性能基本原则清单", + "填到申报模板", + "自动填表", + "生成表格", +] + +FORM_FILL_NODE_DEFINITIONS = [ + ("prepare", "准备资料", "form_fill"), + ("template_select", "选择模板", "form_fill"), + ("template_copy", "复制模板", "form_fill"), + ("field_extract", "抽取字段", "form_fill"), + ("conflict_merge", "冲突归并", "form_fill"), + ("word_fill", "填写 Word", "form_fill"), + ("pdf_convert", "转换 PDF", "form_fill"), + ("trace_export", "追溯清单", "form_fill"), + ("output_export", "输出下载", "form_fill"), + ("notify", "飞书通知", "form_fill"), + ("completed", "完成", "completed"), +] diff --git a/review_agent/application_form_fill/schemas.py b/review_agent/application_form_fill/schemas.py new file mode 100644 index 0000000..de89257 --- /dev/null +++ b/review_agent/application_form_fill/schemas.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +from review_agent.models import ApplicationFormFillBatch, ExportedSummaryFile, FileSummaryBatch, RegulatoryReviewBatch + + +@dataclass(frozen=True) +class TemplateSpec: + code: str + name: str + source_file: str + output_label: str + applies_when: dict[str, Any] + file_format: str + fields: list[dict[str, Any]] + checklist_items: list[dict[str, Any]] = field(default_factory=list) + + +@dataclass(frozen=True) +class ExtractedField: + key: str + label: str + value: str + source_file: str + source_role: str + evidence: str + extractor: str + confidence: float + + +@dataclass(frozen=True) +class MergedField: + key: str + label: str + value: str + source_file: str + evidence: str + confidence: float + has_conflict: bool = False + conflict_values: list[dict[str, Any]] = field(default_factory=list) + + +@dataclass +class FormFillContext: + batch: ApplicationFormFillBatch + source_summary_batch: FileSummaryBatch + source_regulatory_batch: RegulatoryReviewBatch | None + template_config: dict[str, Any] = field(default_factory=dict) + selected_templates: list[TemplateSpec] = field(default_factory=list) + document_texts: dict[str, str] = field(default_factory=dict) + regex_results: dict[str, Any] = field(default_factory=dict) + llm_results: dict[str, Any] = field(default_factory=dict) + merged_fields: dict[str, MergedField] = field(default_factory=dict) + checklist_items: dict[str, Any] = field(default_factory=dict) + conflicts: list[dict[str, Any]] = field(default_factory=list) + exports: list[ExportedSummaryFile] = field(default_factory=list) diff --git a/review_agent/application_form_fill/services/__init__.py b/review_agent/application_form_fill/services/__init__.py new file mode 100644 index 0000000..d92b991 --- /dev/null +++ b/review_agent/application_form_fill/services/__init__.py @@ -0,0 +1 @@ +"""Application form auto-fill services.""" diff --git a/review_agent/application_form_fill/services/template_config.py b/review_agent/application_form_fill/services/template_config.py new file mode 100644 index 0000000..b2538b1 --- /dev/null +++ b/review_agent/application_form_fill/services/template_config.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path +from typing import Any + +import yaml +from django.conf import settings + + +DEFAULT_CONFIG_PATH = ( + Path(settings.BASE_DIR) + / "review_agent" + / "application_form_fill" + / "templates" + / "application_form_templates_v1.yaml" +) + +SUPPORTED_TARGET_TYPES = {"table_row", "placeholder"} +SUPPORTED_FILE_FORMATS = {"doc", "docx"} + + +def load_template_config(path: str | Path | None = None) -> dict[str, Any]: + config_path = Path(path) if path else DEFAULT_CONFIG_PATH + with config_path.open("r", encoding="utf-8") as handle: + payload = yaml.safe_load(handle) or {} + return payload + + +def compute_config_hash(path: str | Path | None = None) -> str: + config_path = Path(path) if path else DEFAULT_CONFIG_PATH + digest = hashlib.sha256() + with config_path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def validate_template_config(config: dict[str, Any], *, base_dir: str | Path | None = None) -> list[str]: + errors: list[str] = [] + root = Path(base_dir) if base_dir else Path(settings.BASE_DIR) + + version = config.get("version") + if not version: + errors.append("模板配置缺少 version。") + + source_dir_value = config.get("source_dir") + source_dir = root / source_dir_value if source_dir_value else None + if not source_dir_value: + errors.append("模板配置缺少 source_dir。") + elif not source_dir.exists(): + errors.append(f"模板 source_dir 不存在:{source_dir_value}") + + templates = config.get("templates") + if not isinstance(templates, list) or not templates: + errors.append("模板配置必须包含非空 templates 列表。") + return errors + + seen_codes: set[str] = set() + for index, template in enumerate(templates, start=1): + if not isinstance(template, dict): + errors.append(f"第 {index} 个模板配置必须是对象。") + continue + code = str(template.get("code") or "").strip() + if not code: + errors.append(f"第 {index} 个模板缺少 code。") + elif code in seen_codes: + errors.append(f"模板 code 重复:{code}") + seen_codes.add(code) + + file_format = str(template.get("file_format") or "").strip().lower() + if file_format not in SUPPORTED_FILE_FORMATS: + errors.append(f"模板 {code or index} 的 file_format 不支持:{file_format or '空'}") + + source_file = str(template.get("source_file") or "").strip() + if not source_file: + errors.append(f"模板 {code or index} 缺少 source_file。") + elif source_dir and source_dir.exists() and not (source_dir / source_file).exists(): + errors.append(f"模板 {code or index} 的 source_file 不存在:{source_file}") + + fields = template.get("fields") or [] + if not isinstance(fields, list): + errors.append(f"模板 {code or index} 的 fields 必须是列表。") + continue + for field_index, field in enumerate(fields, start=1): + target = field.get("target") if isinstance(field, dict) else None + target_type = str((target or {}).get("type") or "").strip() + if target_type not in SUPPORTED_TARGET_TYPES: + errors.append( + f"模板 {code or index} 第 {field_index} 个字段 target.type 不支持:{target_type or '空'}" + ) + return errors + + +def template_specs(config: dict[str, Any]) -> list[dict[str, Any]]: + return list(config.get("templates") or []) diff --git a/review_agent/application_form_fill/storage.py b/review_agent/application_form_fill/storage.py new file mode 100644 index 0000000..eeba562 --- /dev/null +++ b/review_agent/application_form_fill/storage.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path + +from django.conf import settings + +from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch + + +def build_batch_work_dir(batch: ApplicationFormFillBatch | None = None, *, batch_no: str = "") -> Path: + if batch: + return Path(settings.MEDIA_ROOT) / "application_form_fill" / str(batch.user_id) / str(batch.conversation_id) / batch.batch_no + return Path(settings.MEDIA_ROOT) / "application_form_fill" / batch_no + + +def compute_file_sha256(path: str | Path) -> str: + file_path = Path(path) + digest = hashlib.sha256() + with file_path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def ensure_batch_subdir(batch: ApplicationFormFillBatch, name: str) -> Path: + root = Path(batch.work_dir) if batch.work_dir else build_batch_work_dir(batch) + target = root / Path(name).name + target.mkdir(parents=True, exist_ok=True) + return target + + +def create_artifact_for_file( + batch: ApplicationFormFillBatch, + *, + path: str | Path, + artifact_type: str, + file_format: str, + name: str = "", + metadata: dict | None = None, + created_by_node: str = "", +) -> ApplicationFormFillArtifact: + file_path = Path(path) + return ApplicationFormFillArtifact.objects.create( + batch=batch, + artifact_type=artifact_type, + file_format=file_format, + name=name or file_path.stem, + file_name=file_path.name, + storage_path=str(file_path), + file_size=file_path.stat().st_size if file_path.exists() else 0, + content_hash=compute_file_sha256(file_path) if file_path.exists() else "", + metadata=metadata or {}, + created_by_node=created_by_node, + ) diff --git a/review_agent/application_form_fill/templates/application_form_templates_v1.yaml b/review_agent/application_form_fill/templates/application_form_templates_v1.yaml new file mode 100644 index 0000000..9b106d7 --- /dev/null +++ b/review_agent/application_form_fill/templates/application_form_templates_v1.yaml @@ -0,0 +1,112 @@ +version: application_form_templates_v1 +source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 +templates: + - code: registration_certificate + name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式) + source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx + output_label: 注册证格式 + applies_when: + registration_type: + - 首次注册 + - unknown + file_format: docx + fields: + - key: applicant_name + label: 注册人名称 + target: + type: table_row + row_label: 注册人名称 + source_roles: + - 申请表 + - 说明书 + - 企业信息 + - key: applicant_address + label: 注册人住所 + target: + type: table_row + row_label: 注册人住所 + source_roles: + - 申请表 + - 企业信息 + - key: manufacturer_address + label: 生产地址 + target: + type: table_row + row_label: 生产地址 + source_roles: + - 申请表 + - 质量管理体系文件 + - key: product_name + label: 产品名称 + target: + type: table_row + row_label: 产品名称 + source_roles: + - 说明书 + - 产品技术要求 + - 注册检验报告 + - key: package_specification + label: 包装规格 + target: + type: table_row + row_label: 包装规格 + source_roles: + - 说明书 + - 产品技术要求 + - key: main_components + label: 主要组成成分 + target: + type: table_row + row_label: 主要组成成分 + source_roles: + - 说明书 + - 产品技术要求 + - key: intended_use + label: 预期用途 + target: + type: table_row + row_label: 预期用途 + source_roles: + - 说明书 + - 临床评价资料 + - 产品技术要求 + - key: storage_condition_and_validity + label: 产品储存条件及有效期 + target: + type: table_row + row_label: 产品储存条件及有效期 + source_roles: + - 说明书 + - 产品技术要求 + - 稳定性研究资料 + - key: attachments + label: 附件 + target: + type: table_row + row_label: 附件 + source_roles: + - 注册申报资料 + - 说明书 + - code: change_registration + name: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式) + source_file: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc + output_label: 变更注册备案文件 + applies_when: + registration_type: + - 变更注册 + - 备案 + file_format: doc + fields: [] + - code: essential_principles + name: 体外诊断试剂安全和性能基本原则清单 + source_file: 体外诊断试剂安全和性能基本原则清单.doc + output_label: 安全和性能基本原则清单 + applies_when: + registration_type: + - 首次注册 + - 变更注册 + - 备案 + - unknown + file_format: doc + fields: [] + checklist_items: [] diff --git a/review_agent/application_form_fill/views.py b/review_agent/application_form_fill/views.py new file mode 100644 index 0000000..510ac0d --- /dev/null +++ b/review_agent/application_form_fill/views.py @@ -0,0 +1,7 @@ +from django.http import JsonResponse +from django.views.decorators.http import require_http_methods + + +@require_http_methods(["GET"]) +def health(request): + return JsonResponse({"workflow_type": "application_form_fill", "status": "available"}) diff --git a/review_agent/application_form_fill/workflow.py b/review_agent/application_form_fill/workflow.py new file mode 100644 index 0000000..78ec271 --- /dev/null +++ b/review_agent/application_form_fill/workflow.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from review_agent.application_form_fill.constants import FORM_FILL_NODE_DEFINITIONS, WORKFLOW_TYPE + + +class FormFillWorkflowExecutor: + """Workflow executor scaffold filled in by later AFF stages.""" + + def __init__(self, batch): + self.batch = batch + + def run(self) -> None: + raise NotImplementedError("application_form_fill workflow is implemented in later AFF stages.") + + +def start_application_form_fill_workflow(batch, *, async_run: bool = True) -> None: + executor = FormFillWorkflowExecutor(batch) + if async_run: + executor.run() + return + executor.run() diff --git a/tests/test_application_form_fill_template_config.py b/tests/test_application_form_fill_template_config.py new file mode 100644 index 0000000..b8e8859 --- /dev/null +++ b/tests/test_application_form_fill_template_config.py @@ -0,0 +1,97 @@ +import copy + +import pytest + +from review_agent.application_form_fill.services.template_config import ( + DEFAULT_CONFIG_PATH, + compute_config_hash, + load_template_config, + validate_template_config, +) + + +def test_template_config_loads_and_validates_default_yaml(settings): + config = load_template_config() + errors = validate_template_config(config) + + assert errors == [] + assert config["version"] == "application_form_templates_v1" + registration = next(item for item in config["templates"] if item["code"] == "registration_certificate") + assert registration["file_format"] == "docx" + assert {field["key"] for field in registration["fields"]} >= { + "applicant_name", + "product_name", + "package_specification", + "main_components", + "intended_use", + "storage_condition_and_validity", + "attachments", + } + assert all(field["target"]["type"] == "table_row" for field in registration["fields"]) + assert len(compute_config_hash(DEFAULT_CONFIG_PATH)) == 64 + + +def test_template_config_reports_missing_source_dir(): + config = load_template_config() + config["source_dir"] = "docs/not-exists" + + errors = validate_template_config(config) + + assert any("source_dir 不存在" in error for error in errors) + + +def test_template_config_reports_duplicate_code(): + config = load_template_config() + duplicate = copy.deepcopy(config["templates"][0]) + config["templates"].append(duplicate) + + errors = validate_template_config(config) + + assert any("模板 code 重复" in error for error in errors) + + +def test_template_config_reports_missing_source_file(): + config = load_template_config() + config["templates"][0]["source_file"] = "missing.docx" + + errors = validate_template_config(config) + + assert any("source_file 不存在" in error for error in errors) + + +def test_template_config_reports_unsupported_target_type(): + config = load_template_config() + config["templates"][0]["fields"][0]["target"]["type"] = "content_control" + + errors = validate_template_config(config) + + assert any("target.type 不支持" in error for error in errors) + + +def test_template_config_loads_custom_path(tmp_path): + config_path = tmp_path / "templates.yaml" + config_path.write_text( + """ +version: custom +source_dir: . +templates: + - code: custom_template + name: Custom + source_file: source.docx + output_label: Custom + file_format: docx + fields: + - key: product_name + label: 产品名称 + target: + type: table_row + row_label: 产品名称 +""".strip(), + encoding="utf-8", + ) + (tmp_path / "source.docx").write_bytes(b"docx") + + config = load_template_config(config_path) + + assert validate_template_config(config, base_dir=tmp_path) == [] + assert compute_config_hash(config_path)