feat(application-form-fill): 生成填表 Word 和追溯清单

This commit is contained in:
2026-06-07 18:33:59 +08:00
parent a48f778e09
commit f35a3ba9b4
4 changed files with 462 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
from __future__ import annotations
import json
from dataclasses import asdict
from pathlib import Path
from typing import Any
from openpyxl import Workbook
from review_agent.application_form_fill.constants import WORKFLOW_TYPE
from review_agent.application_form_fill.schemas import MergedField, TemplateSpec
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch, ExportedSummaryFile
def build_traceability_workbook(
batch: ApplicationFormFillBatch,
merged_fields: dict[str, MergedField],
conflicts: list[dict[str, Any]],
specs: list[TemplateSpec],
generation_results: list[dict[str, Any]] | None = None,
) -> Workbook:
workbook = Workbook()
field_sheet = workbook.active
field_sheet.title = "字段追溯"
field_sheet.append(["模板", "字段", "填入值", "来源文件", "证据", "冲突状态"])
template_names = {field.get("key"): spec.output_label for spec in specs for field in spec.fields}
for key, field in merged_fields.items():
field_sheet.append(
[
template_names.get(key, ""),
field.label,
field.value,
field.source_file,
field.evidence,
"冲突" if field.has_conflict else "一致",
]
)
conflict_sheet = workbook.create_sheet("冲突字段")
conflict_sheet.append(["字段", "采用值", "冲突值", "冲突来源", "处理方式"])
for conflict in conflicts:
conflict_values = conflict.get("conflict_values") or []
if not conflict_values:
conflict_sheet.append(
[
conflict.get("field_label", ""),
conflict.get("selected_value", ""),
"",
"",
conflict.get("handling", ""),
]
)
continue
for value in conflict_values:
conflict_sheet.append(
[
conflict.get("field_label", ""),
conflict.get("selected_value", ""),
value.get("value", ""),
value.get("source_file", ""),
conflict.get("handling", ""),
]
)
low_confidence_sheet = workbook.create_sheet("低置信度条目")
low_confidence_sheet.append(["字段", "填入值", "置信度", "来源文件"])
for field in merged_fields.values():
if field.confidence < 0.6:
low_confidence_sheet.append([field.label, field.value, field.confidence, field.source_file])
result_sheet = workbook.create_sheet("生成结果")
result_sheet.append(["模板", "Word状态", "PDF状态", "错误说明"])
for result in generation_results or []:
result_sheet.append(
[
result.get("template_label", ""),
result.get("word_status", ""),
result.get("pdf_status", "待增强"),
result.get("error_message", ""),
]
)
if not generation_results:
for spec in specs:
result_sheet.append([spec.output_label, "待生成", "待增强", ""])
return workbook
def save_traceability_exports(
batch: ApplicationFormFillBatch,
merged_fields: dict[str, MergedField],
conflicts: list[dict[str, Any]],
specs: list[TemplateSpec],
generation_results: list[dict[str, Any]] | None = None,
) -> list[ExportedSummaryFile]:
target_dir = ensure_batch_subdir(batch, "exports")
workbook = build_traceability_workbook(batch, merged_fields, conflicts, specs, generation_results)
excel_path = target_dir / f"{batch.batch_no}-字段来源追溯清单.xlsx"
workbook.save(excel_path)
create_artifact_for_file(
batch,
path=excel_path,
artifact_type=ApplicationFormFillArtifact.ArtifactType.TRACEABILITY,
file_format=ApplicationFormFillArtifact.FileFormat.EXCEL,
name="字段来源追溯清单",
metadata={"conflict_count": len(conflicts)},
created_by_node="trace_export",
)
excel_export = ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type=WORKFLOW_TYPE,
workflow_batch_id=batch.pk,
export_category="traceability",
export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=excel_path.name,
storage_path=str(excel_path),
)
json_path = target_dir / "merged_fields.json"
payload = {
"batch_no": batch.batch_no,
"merged_fields": {key: asdict(value) for key, value in merged_fields.items()},
"conflicts": conflicts,
"generation_results": generation_results or [],
}
json_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
create_artifact_for_file(
batch,
path=json_path,
artifact_type=ApplicationFormFillArtifact.ArtifactType.MERGED_FIELDS,
file_format=ApplicationFormFillArtifact.FileFormat.JSON,
name="merged_fields",
metadata={"conflict_count": len(conflicts)},
created_by_node="trace_export",
)
json_export = ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type=WORKFLOW_TYPE,
workflow_batch_id=batch.pk,
export_category="traceability",
export_type=ExportedSummaryFile.ExportType.JSON,
file_name=json_path.name,
storage_path=str(json_path),
)
return [excel_export, json_export]

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
import re
from pathlib import Path
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import RGBColor
from review_agent.application_form_fill.constants import WORKFLOW_TYPE
from review_agent.application_form_fill.schemas import MergedField, TemplateSpec
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch, ExportedSummaryFile
def fill_template(
template_path: str | Path,
output_path: str | Path,
spec: TemplateSpec,
fields: dict[str, MergedField],
conflicts: list[dict] | None = None,
) -> Path:
document = Document(str(template_path))
conflict_keys = {item.get("field_key") for item in conflicts or []}
for field_config in spec.fields:
target = field_config.get("target") or {}
if target.get("type") != "table_row":
continue
key = field_config.get("key")
field = fields.get(key)
if not field:
continue
fill_table_row(
document,
str(target.get("row_label") or field_config.get("label") or ""),
field.value,
conflict=key in conflict_keys or field.has_conflict,
)
output = Path(output_path)
output.parent.mkdir(parents=True, exist_ok=True)
document.save(str(output))
return output
def fill_table_row(document: Document, row_label: str, value: str, *, conflict: bool = False) -> bool:
normalized_label = _normalize_label(row_label)
for table in document.tables:
for row in table.rows:
if len(row.cells) < 2:
continue
if _normalize_label(row.cells[0].text) != normalized_label:
continue
target = row.cells[1]
target.text = ""
paragraph = target.paragraphs[0]
run = paragraph.add_run(value)
if conflict:
run.font.color.rgb = RGBColor(0xFF, 0x00, 0x00)
apply_cell_shading(target, "FFFF00")
return True
return False
def apply_cell_shading(cell, fill: str) -> None:
tc_pr = cell._tc.get_or_add_tcPr()
shading = tc_pr.find(qn("w:shd"))
if shading is None:
shading = OxmlElement("w:shd")
tc_pr.append(shading)
shading.set(qn("w:fill"), fill)
def create_word_export(
batch: ApplicationFormFillBatch,
spec: TemplateSpec,
template_path: str | Path,
fields: dict[str, MergedField],
conflicts: list[dict] | None = None,
) -> ExportedSummaryFile:
target_dir = ensure_batch_subdir(batch, "filled")
product_name = _safe_filename(batch.product_name or fields.get("product_name", MergedField("product_name", "产品名称", "", "", "", 0)).value or "未识别产品")
output_path = target_dir / f"{batch.batch_no}-{product_name}-{_safe_filename(spec.output_label)}.docx"
fill_template(template_path, output_path, spec, fields, conflicts)
create_artifact_for_file(
batch,
path=output_path,
artifact_type=ApplicationFormFillArtifact.ArtifactType.FILLED_TEMPLATE,
file_format=ApplicationFormFillArtifact.FileFormat.DOCX,
name=spec.output_label,
metadata={"template_code": spec.code, "conflict_count": len(conflicts or [])},
created_by_node="word_fill",
)
return ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type=WORKFLOW_TYPE,
workflow_batch_id=batch.pk,
export_category="filled_template",
export_type=ExportedSummaryFile.ExportType.WORD,
file_name=output_path.name,
storage_path=str(output_path),
)
def _normalize_label(value: str) -> str:
return re.sub(r"\s+", "", value or "").replace("", "").replace(":", "")
def _safe_filename(value: str) -> str:
text = re.sub(r'[\\/:*?"<>|]+', "_", value or "")
return text.strip()[:80] or "output"