feat: 支持7z资料包导入
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from pathlib import Path
|
||||
from io import BytesIO
|
||||
import re
|
||||
import tempfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from zipfile import BadZipFile, ZipFile
|
||||
|
||||
@@ -220,6 +221,9 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
||||
if extension == ".zip":
|
||||
expanded_files.extend(_extract_zip_entries(uploaded_file))
|
||||
continue
|
||||
if extension == ".7z":
|
||||
expanded_files.extend(_extract_7z_entries(uploaded_file))
|
||||
continue
|
||||
expanded_files.append(
|
||||
{
|
||||
"relative_path": uploaded_file.name,
|
||||
@@ -255,6 +259,39 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
|
||||
return entries
|
||||
|
||||
|
||||
def _extract_7z_entries(uploaded_file) -> list[dict]:
|
||||
try:
|
||||
import py7zr
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("处理 .7z 资料包需要安装 py7zr。") from exc
|
||||
|
||||
archive_bytes = uploaded_file.read()
|
||||
uploaded_file.seek(0)
|
||||
entries = []
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
|
||||
archive.extractall(path=temp_dir)
|
||||
base_path = Path(temp_dir)
|
||||
for file_path in sorted(base_path.rglob("*")):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
relative_path = file_path.relative_to(base_path).as_posix()
|
||||
extension = Path(relative_path).suffix.lower()
|
||||
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
||||
continue
|
||||
extracted_file = SimpleUploadedFile(
|
||||
file_path.name,
|
||||
file_path.read_bytes(),
|
||||
)
|
||||
entries.append(
|
||||
{
|
||||
"relative_path": relative_path,
|
||||
"uploaded_file": extracted_file,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def _detect_document_role(file_name: str) -> str:
|
||||
normalized = file_name.lower()
|
||||
if "申请表" in file_name:
|
||||
|
||||
Reference in New Issue
Block a user