feat(application-form-fill): 新增模板配置骨架

This commit is contained in:
2026-06-07 18:23:06 +08:00
parent 74cbe349a8
commit e48d44f832
10 changed files with 479 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Application form auto-fill workflow package."""

View File

@@ -0,0 +1,31 @@
WORKFLOW_TYPE = "application_form_fill"
TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate"
TEMPLATE_CHANGE_REGISTRATION = "change_registration"
TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles"
DEFAULT_OUTPUT_TYPES = ["word", "excel", "json"]
FORM_FILL_TRIGGER_KEYWORDS = [
"填注册证",
"对应的表格",
"生成申报模板",
"安全和性能基本原则清单",
"填到申报模板",
"自动填表",
"生成表格",
]
FORM_FILL_NODE_DEFINITIONS = [
("prepare", "准备资料", "form_fill"),
("template_select", "选择模板", "form_fill"),
("template_copy", "复制模板", "form_fill"),
("field_extract", "抽取字段", "form_fill"),
("conflict_merge", "冲突归并", "form_fill"),
("word_fill", "填写 Word", "form_fill"),
("pdf_convert", "转换 PDF", "form_fill"),
("trace_export", "追溯清单", "form_fill"),
("output_export", "输出下载", "form_fill"),
("notify", "飞书通知", "form_fill"),
("completed", "完成", "completed"),
]

View File

@@ -0,0 +1,58 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from review_agent.models import ApplicationFormFillBatch, ExportedSummaryFile, FileSummaryBatch, RegulatoryReviewBatch
@dataclass(frozen=True)
class TemplateSpec:
code: str
name: str
source_file: str
output_label: str
applies_when: dict[str, Any]
file_format: str
fields: list[dict[str, Any]]
checklist_items: list[dict[str, Any]] = field(default_factory=list)
@dataclass(frozen=True)
class ExtractedField:
key: str
label: str
value: str
source_file: str
source_role: str
evidence: str
extractor: str
confidence: float
@dataclass(frozen=True)
class MergedField:
key: str
label: str
value: str
source_file: str
evidence: str
confidence: float
has_conflict: bool = False
conflict_values: list[dict[str, Any]] = field(default_factory=list)
@dataclass
class FormFillContext:
batch: ApplicationFormFillBatch
source_summary_batch: FileSummaryBatch
source_regulatory_batch: RegulatoryReviewBatch | None
template_config: dict[str, Any] = field(default_factory=dict)
selected_templates: list[TemplateSpec] = field(default_factory=list)
document_texts: dict[str, str] = field(default_factory=dict)
regex_results: dict[str, Any] = field(default_factory=dict)
llm_results: dict[str, Any] = field(default_factory=dict)
merged_fields: dict[str, MergedField] = field(default_factory=dict)
checklist_items: dict[str, Any] = field(default_factory=dict)
conflicts: list[dict[str, Any]] = field(default_factory=list)
exports: list[ExportedSummaryFile] = field(default_factory=list)

View File

@@ -0,0 +1 @@
"""Application form auto-fill services."""

View File

@@ -0,0 +1,96 @@
from __future__ import annotations
import hashlib
from pathlib import Path
from typing import Any
import yaml
from django.conf import settings
DEFAULT_CONFIG_PATH = (
Path(settings.BASE_DIR)
/ "review_agent"
/ "application_form_fill"
/ "templates"
/ "application_form_templates_v1.yaml"
)
SUPPORTED_TARGET_TYPES = {"table_row", "placeholder"}
SUPPORTED_FILE_FORMATS = {"doc", "docx"}
def load_template_config(path: str | Path | None = None) -> dict[str, Any]:
config_path = Path(path) if path else DEFAULT_CONFIG_PATH
with config_path.open("r", encoding="utf-8") as handle:
payload = yaml.safe_load(handle) or {}
return payload
def compute_config_hash(path: str | Path | None = None) -> str:
config_path = Path(path) if path else DEFAULT_CONFIG_PATH
digest = hashlib.sha256()
with config_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def validate_template_config(config: dict[str, Any], *, base_dir: str | Path | None = None) -> list[str]:
errors: list[str] = []
root = Path(base_dir) if base_dir else Path(settings.BASE_DIR)
version = config.get("version")
if not version:
errors.append("模板配置缺少 version。")
source_dir_value = config.get("source_dir")
source_dir = root / source_dir_value if source_dir_value else None
if not source_dir_value:
errors.append("模板配置缺少 source_dir。")
elif not source_dir.exists():
errors.append(f"模板 source_dir 不存在:{source_dir_value}")
templates = config.get("templates")
if not isinstance(templates, list) or not templates:
errors.append("模板配置必须包含非空 templates 列表。")
return errors
seen_codes: set[str] = set()
for index, template in enumerate(templates, start=1):
if not isinstance(template, dict):
errors.append(f"{index} 个模板配置必须是对象。")
continue
code = str(template.get("code") or "").strip()
if not code:
errors.append(f"{index} 个模板缺少 code。")
elif code in seen_codes:
errors.append(f"模板 code 重复:{code}")
seen_codes.add(code)
file_format = str(template.get("file_format") or "").strip().lower()
if file_format not in SUPPORTED_FILE_FORMATS:
errors.append(f"模板 {code or index} 的 file_format 不支持:{file_format or ''}")
source_file = str(template.get("source_file") or "").strip()
if not source_file:
errors.append(f"模板 {code or index} 缺少 source_file。")
elif source_dir and source_dir.exists() and not (source_dir / source_file).exists():
errors.append(f"模板 {code or index} 的 source_file 不存在:{source_file}")
fields = template.get("fields") or []
if not isinstance(fields, list):
errors.append(f"模板 {code or index} 的 fields 必须是列表。")
continue
for field_index, field in enumerate(fields, start=1):
target = field.get("target") if isinstance(field, dict) else None
target_type = str((target or {}).get("type") or "").strip()
if target_type not in SUPPORTED_TARGET_TYPES:
errors.append(
f"模板 {code or index}{field_index} 个字段 target.type 不支持:{target_type or ''}"
)
return errors
def template_specs(config: dict[str, Any]) -> list[dict[str, Any]]:
return list(config.get("templates") or [])

View File

@@ -0,0 +1,55 @@
from __future__ import annotations
import hashlib
from pathlib import Path
from django.conf import settings
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch
def build_batch_work_dir(batch: ApplicationFormFillBatch | None = None, *, batch_no: str = "") -> Path:
if batch:
return Path(settings.MEDIA_ROOT) / "application_form_fill" / str(batch.user_id) / str(batch.conversation_id) / batch.batch_no
return Path(settings.MEDIA_ROOT) / "application_form_fill" / batch_no
def compute_file_sha256(path: str | Path) -> str:
file_path = Path(path)
digest = hashlib.sha256()
with file_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def ensure_batch_subdir(batch: ApplicationFormFillBatch, name: str) -> Path:
root = Path(batch.work_dir) if batch.work_dir else build_batch_work_dir(batch)
target = root / Path(name).name
target.mkdir(parents=True, exist_ok=True)
return target
def create_artifact_for_file(
batch: ApplicationFormFillBatch,
*,
path: str | Path,
artifact_type: str,
file_format: str,
name: str = "",
metadata: dict | None = None,
created_by_node: str = "",
) -> ApplicationFormFillArtifact:
file_path = Path(path)
return ApplicationFormFillArtifact.objects.create(
batch=batch,
artifact_type=artifact_type,
file_format=file_format,
name=name or file_path.stem,
file_name=file_path.name,
storage_path=str(file_path),
file_size=file_path.stat().st_size if file_path.exists() else 0,
content_hash=compute_file_sha256(file_path) if file_path.exists() else "",
metadata=metadata or {},
created_by_node=created_by_node,
)

View File

@@ -0,0 +1,112 @@
version: application_form_templates_v1
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
templates:
- code: registration_certificate
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
output_label: 注册证格式
applies_when:
registration_type:
- 首次注册
- unknown
file_format: docx
fields:
- key: applicant_name
label: 注册人名称
target:
type: table_row
row_label: 注册人名称
source_roles:
- 申请表
- 说明书
- 企业信息
- key: applicant_address
label: 注册人住所
target:
type: table_row
row_label: 注册人住所
source_roles:
- 申请表
- 企业信息
- key: manufacturer_address
label: 生产地址
target:
type: table_row
row_label: 生产地址
source_roles:
- 申请表
- 质量管理体系文件
- key: product_name
label: 产品名称
target:
type: table_row
row_label: 产品名称
source_roles:
- 说明书
- 产品技术要求
- 注册检验报告
- key: package_specification
label: 包装规格
target:
type: table_row
row_label: 包装规格
source_roles:
- 说明书
- 产品技术要求
- key: main_components
label: 主要组成成分
target:
type: table_row
row_label: 主要组成成分
source_roles:
- 说明书
- 产品技术要求
- key: intended_use
label: 预期用途
target:
type: table_row
row_label: 预期用途
source_roles:
- 说明书
- 临床评价资料
- 产品技术要求
- key: storage_condition_and_validity
label: 产品储存条件及有效期
target:
type: table_row
row_label: 产品储存条件及有效期
source_roles:
- 说明书
- 产品技术要求
- 稳定性研究资料
- key: attachments
label: 附件
target:
type: table_row
row_label: 附件
source_roles:
- 注册申报资料
- 说明书
- code: change_registration
name: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式)
source_file: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc
output_label: 变更注册备案文件
applies_when:
registration_type:
- 变更注册
- 备案
file_format: doc
fields: []
- code: essential_principles
name: 体外诊断试剂安全和性能基本原则清单
source_file: 体外诊断试剂安全和性能基本原则清单.doc
output_label: 安全和性能基本原则清单
applies_when:
registration_type:
- 首次注册
- 变更注册
- 备案
- unknown
file_format: doc
fields: []
checklist_items: []

View File

@@ -0,0 +1,7 @@
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
@require_http_methods(["GET"])
def health(request):
return JsonResponse({"workflow_type": "application_form_fill", "status": "available"})

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from review_agent.application_form_fill.constants import FORM_FILL_NODE_DEFINITIONS, WORKFLOW_TYPE
class FormFillWorkflowExecutor:
"""Workflow executor scaffold filled in by later AFF stages."""
def __init__(self, batch):
self.batch = batch
def run(self) -> None:
raise NotImplementedError("application_form_fill workflow is implemented in later AFF stages.")
def start_application_form_fill_workflow(batch, *, async_run: bool = True) -> None:
executor = FormFillWorkflowExecutor(batch)
if async_run:
executor.run()
return
executor.run()