fix(application-form-fill): 优先路由填表提示并支持rar预览
This commit is contained in:
@@ -2,16 +2,18 @@ from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from tempfile import TemporaryDirectory
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import FileAttachment
|
||||
from review_agent.file_summary.services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
||||
|
||||
|
||||
TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "log"}
|
||||
SUPPORTED_EXTENSIONS = TEXT_EXTENSIONS | {"pdf", "docx", "xlsx", "pptx"}
|
||||
SUPPORTED_EXTENSIONS = TEXT_EXTENSIONS | {"pdf", "docx", "xlsx", "pptx"} | ARCHIVE_EXTENSIONS
|
||||
MAX_PREVIEW_CHARS = 3000
|
||||
MAX_ROWS_PER_SHEET = 20
|
||||
|
||||
@@ -72,6 +74,8 @@ def read_attachment_details(attachment: FileAttachment) -> AttachmentReadResult:
|
||||
sections = _read_pptx(file_path)
|
||||
elif file_type == "csv":
|
||||
sections = _read_csv(file_path)
|
||||
elif file_type in ARCHIVE_EXTENSIONS:
|
||||
sections = _read_archive(file_path)
|
||||
else:
|
||||
sections = _read_text(file_path)
|
||||
except Exception as exc:
|
||||
@@ -208,6 +212,44 @@ def _read_pptx(path: Path) -> list[dict[str, object]]:
|
||||
return sections
|
||||
|
||||
|
||||
def _read_archive(path: Path) -> list[dict[str, object]]:
|
||||
sections: list[dict[str, object]] = []
|
||||
with TemporaryDirectory(prefix="attachment-reader-") as temp_dir:
|
||||
extracted = extract_archive(path, Path(temp_dir))
|
||||
if not extracted:
|
||||
return [{"type": "archive", "name": path.name, "text": "压缩包未解出任何可读取文件。"}]
|
||||
for item in extracted:
|
||||
file_type = item.suffix.lower().lstrip(".")
|
||||
if file_type not in SUPPORTED_EXTENSIONS or file_type in ARCHIVE_EXTENSIONS:
|
||||
sections.append(
|
||||
{
|
||||
"type": "file",
|
||||
"name": item.name,
|
||||
"text": f"暂不支持预览压缩包内的 .{file_type or 'unknown'} 文件。",
|
||||
}
|
||||
)
|
||||
continue
|
||||
for section in _read_supported_file(item, file_type):
|
||||
section = dict(section)
|
||||
section["name"] = f"{item.name} / {section.get('name', item.name)}"
|
||||
sections.append(section)
|
||||
return sections
|
||||
|
||||
|
||||
def _read_supported_file(path: Path, file_type: str) -> list[dict[str, object]]:
|
||||
if file_type == "pdf":
|
||||
return _read_pdf(path)
|
||||
if file_type == "docx":
|
||||
return _read_docx(path)
|
||||
if file_type == "xlsx":
|
||||
return _read_xlsx(path)
|
||||
if file_type == "pptx":
|
||||
return _read_pptx(path)
|
||||
if file_type == "csv":
|
||||
return _read_csv(path)
|
||||
return _read_text(path)
|
||||
|
||||
|
||||
def _build_preview(sections: list[dict[str, object]]) -> str:
|
||||
parts: list[str] = []
|
||||
for section in sections:
|
||||
|
||||
Reference in New Issue
Block a user