Files
DEMO-AGENT/review_agent/regulatory_info_package/services/docx_document.py

71 lines
2.5 KiB
Python

from __future__ import annotations
from pathlib import Path
from docx import Document
from docx.enum.text import WD_COLOR_INDEX
from docx.shared import RGBColor
from review_agent.regulatory_info_package.schemas import MergedField
def write_docx_from_template(
source_path: str | Path,
output_path: str | Path,
merged_fields: dict[str, MergedField],
) -> tuple[int, int, int]:
source = Path(source_path)
output = Path(output_path)
output.parent.mkdir(parents=True, exist_ok=True)
if source.exists():
document = Document(source)
else:
document = Document()
replacements = {f"{{{{{key}}}}}": field for key, field in merged_fields.items()}
highlight_count = 0
missing_count = 0
llm_only_count = 0
for paragraph in document.paragraphs:
for placeholder, field in replacements.items():
if placeholder in paragraph.text:
_replace_paragraph_text(paragraph, paragraph.text.replace(placeholder, field.value), field)
if field.highlight_reason != "none":
highlight_count += 1
if field.highlight_reason == "missing":
missing_count += 1
if field.highlight_reason == "llm_only":
llm_only_count += 1
document.add_page_break()
heading = document.add_paragraph()
heading_run = heading.add_run("预生成字段")
heading_run.bold = True
table = document.add_table(rows=1, cols=4)
table.rows[0].cells[0].text = "字段"
table.rows[0].cells[1].text = ""
table.rows[0].cells[2].text = "来源"
table.rows[0].cells[3].text = "待确认"
for field in merged_fields.values():
cells = table.add_row().cells
cells[0].text = field.label
cells[1].text = field.value
cells[2].text = field.source
cells[3].text = "" if field.needs_review else ""
if field.highlight_reason != "none":
highlight_count += 1
if field.highlight_reason == "missing":
missing_count += 1
if field.highlight_reason == "llm_only":
llm_only_count += 1
document.save(output)
return highlight_count, missing_count, llm_only_count
def _replace_paragraph_text(paragraph, text: str, field: MergedField) -> None:
for run in paragraph.runs:
run.text = ""
run = paragraph.add_run(text)
if field.highlight_reason != "none":
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
if field.highlight_reason == "conflict":
run.font.color.rgb = RGBColor(255, 0, 0)