feat: 支持7z资料包导入

This commit is contained in:
2026-06-04 01:28:28 +08:00
parent 24446658ad
commit 73c6336600
6 changed files with 83 additions and 5 deletions

View File

@@ -5,7 +5,7 @@ from django import forms
from apps.scenarios.services import ScenarioNotFound, get_scenario
from apps.scenarios.services import list_scenarios
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip"}
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"}
class MultipleFileInput(forms.ClearableFileInput):
@@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form):
return uploaded_file
extension = Path(uploaded_file.name).suffix.lower()
if extension not in SUPPORTED_EXTENSIONS:
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件")
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
return uploaded_file
def clean_files(self):
@@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form):
for uploaded_file in uploaded_files:
extension = Path(uploaded_file.name).suffix.lower()
if extension not in SUPPORTED_EXTENSIONS:
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件")
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
return uploaded_files
def clean(self):

View File

@@ -1,6 +1,7 @@
from pathlib import Path
from io import BytesIO
import re
import tempfile
import xml.etree.ElementTree as ET
from zipfile import BadZipFile, ZipFile
@@ -220,6 +221,9 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
if extension == ".zip":
expanded_files.extend(_extract_zip_entries(uploaded_file))
continue
if extension == ".7z":
expanded_files.extend(_extract_7z_entries(uploaded_file))
continue
expanded_files.append(
{
"relative_path": uploaded_file.name,
@@ -255,6 +259,39 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
return entries
def _extract_7z_entries(uploaded_file) -> list[dict]:
try:
import py7zr
except ImportError as exc:
raise RuntimeError("处理 .7z 资料包需要安装 py7zr。") from exc
archive_bytes = uploaded_file.read()
uploaded_file.seek(0)
entries = []
with tempfile.TemporaryDirectory() as temp_dir:
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
archive.extractall(path=temp_dir)
base_path = Path(temp_dir)
for file_path in sorted(base_path.rglob("*")):
if not file_path.is_file():
continue
relative_path = file_path.relative_to(base_path).as_posix()
extension = Path(relative_path).suffix.lower()
if extension not in {".txt", ".md", ".pdf", ".docx"}:
continue
extracted_file = SimpleUploadedFile(
file_path.name,
file_path.read_bytes(),
)
entries.append(
{
"relative_path": relative_path,
"uploaded_file": extracted_file,
}
)
return entries
def _detect_document_role(file_name: str) -> str:
normalized = file_name.lower()
if "申请表" in file_name:

View File

@@ -76,7 +76,7 @@ def upload(request):
"form": form,
"scenarios": list_scenarios(),
"upload_checks": [
"文件格式支持 PDF、DOCX、MD、TXT 与 ZIP 资料包",
"文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包",
"业务资料与法规依据资料需分开归属",
"支持一次上传多份文件并归并到同一个资料包",
"目录类文件会优先参与完整性校验",