feat: 支持7z资料包导入

This commit is contained in:
2026-06-04 01:28:28 +08:00
parent 24446658ad
commit 73c6336600
6 changed files with 83 additions and 5 deletions

View File

@@ -5,7 +5,7 @@ from django import forms
from apps.scenarios.services import ScenarioNotFound, get_scenario from apps.scenarios.services import ScenarioNotFound, get_scenario
from apps.scenarios.services import list_scenarios from apps.scenarios.services import list_scenarios
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip"} SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"}
class MultipleFileInput(forms.ClearableFileInput): class MultipleFileInput(forms.ClearableFileInput):
@@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form):
return uploaded_file return uploaded_file
extension = Path(uploaded_file.name).suffix.lower() extension = Path(uploaded_file.name).suffix.lower()
if extension not in SUPPORTED_EXTENSIONS: if extension not in SUPPORTED_EXTENSIONS:
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件") raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
return uploaded_file return uploaded_file
def clean_files(self): def clean_files(self):
@@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form):
for uploaded_file in uploaded_files: for uploaded_file in uploaded_files:
extension = Path(uploaded_file.name).suffix.lower() extension = Path(uploaded_file.name).suffix.lower()
if extension not in SUPPORTED_EXTENSIONS: if extension not in SUPPORTED_EXTENSIONS:
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件") raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
return uploaded_files return uploaded_files
def clean(self): def clean(self):

View File

@@ -1,6 +1,7 @@
from pathlib import Path from pathlib import Path
from io import BytesIO from io import BytesIO
import re import re
import tempfile
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from zipfile import BadZipFile, ZipFile from zipfile import BadZipFile, ZipFile
@@ -220,6 +221,9 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
if extension == ".zip": if extension == ".zip":
expanded_files.extend(_extract_zip_entries(uploaded_file)) expanded_files.extend(_extract_zip_entries(uploaded_file))
continue continue
if extension == ".7z":
expanded_files.extend(_extract_7z_entries(uploaded_file))
continue
expanded_files.append( expanded_files.append(
{ {
"relative_path": uploaded_file.name, "relative_path": uploaded_file.name,
@@ -255,6 +259,39 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
return entries return entries
def _extract_7z_entries(uploaded_file) -> list[dict]:
try:
import py7zr
except ImportError as exc:
raise RuntimeError("处理 .7z 资料包需要安装 py7zr。") from exc
archive_bytes = uploaded_file.read()
uploaded_file.seek(0)
entries = []
with tempfile.TemporaryDirectory() as temp_dir:
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
archive.extractall(path=temp_dir)
base_path = Path(temp_dir)
for file_path in sorted(base_path.rglob("*")):
if not file_path.is_file():
continue
relative_path = file_path.relative_to(base_path).as_posix()
extension = Path(relative_path).suffix.lower()
if extension not in {".txt", ".md", ".pdf", ".docx"}:
continue
extracted_file = SimpleUploadedFile(
file_path.name,
file_path.read_bytes(),
)
entries.append(
{
"relative_path": relative_path,
"uploaded_file": extracted_file,
}
)
return entries
def _detect_document_role(file_name: str) -> str: def _detect_document_role(file_name: str) -> str:
normalized = file_name.lower() normalized = file_name.lower()
if "申请表" in file_name: if "申请表" in file_name:

View File

@@ -76,7 +76,7 @@ def upload(request):
"form": form, "form": form,
"scenarios": list_scenarios(), "scenarios": list_scenarios(),
"upload_checks": [ "upload_checks": [
"文件格式支持 PDF、DOCX、MD、TXT 与 ZIP 资料包", "文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包",
"业务资料与法规依据资料需分开归属", "业务资料与法规依据资料需分开归属",
"支持一次上传多份文件并归并到同一个资料包", "支持一次上传多份文件并归并到同一个资料包",
"目录类文件会优先参与完整性校验", "目录类文件会优先参与完整性校验",

View File

@@ -3,3 +3,4 @@ PyYAML>=6.0,<7.0
chromadb>=0.5,<1.0 chromadb>=0.5,<1.0
pytest>=8.0,<9.0 pytest>=8.0,<9.0
pytest-django>=4.9,<5.0 pytest-django>=4.9,<5.0
py7zr>=0.20,<1.0

View File

@@ -14,7 +14,7 @@
<div class="section-heading"> <div class="section-heading">
<div> <div>
<h2 class="section-title">资料导入向导</h2> <h2 class="section-title">资料导入向导</h2>
<p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx` 与 `.zip` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p> <p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip` 与 `.7z` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p>
</div> </div>
</div> </div>
<form method="post" enctype="multipart/form-data" class="stack"> <form method="post" enctype="multipart/form-data" class="stack">

View File

@@ -1,6 +1,9 @@
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse from django.urls import reverse
from io import BytesIO from io import BytesIO
from pathlib import Path
import sys
import types
from zipfile import ZipFile from zipfile import ZipFile
from apps.documents.forms import DocumentUploadForm from apps.documents.forms import DocumentUploadForm
@@ -294,3 +297,40 @@ def test_import_submission_batch_supports_zip_package_and_preserves_relative_pat
"CH1/注册申请表.txt", "CH1/注册申请表.txt",
"CH1/目标产品说明书.txt", "CH1/目标产品说明书.txt",
] ]
def test_import_submission_batch_supports_7z_package_and_preserves_relative_paths(db, monkeypatch, tmp_path):
package = SimpleUploadedFile(
"registration-package.7z",
b"fake-7z-bytes",
content_type="application/x-7z-compressed",
)
class FakeSevenZipFile:
def __init__(self, _file_obj, mode="r"):
self.mode = mode
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def extractall(self, path):
target = Path(path)
(target / "CH1").mkdir(parents=True, exist_ok=True)
(target / "CH1" / "注册申请表.txt").write_text("产品名称产品A", encoding="utf-8")
(target / "CH1" / "目标产品说明书.txt").write_text("产品名称产品A", encoding="utf-8")
fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile)
monkeypatch.setitem(sys.modules, "py7zr", fake_module)
result = import_submission_batch("document_review", [package])
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
documents = list(UploadedDocument.objects.filter(batch=batch).order_by("relative_path"))
assert batch.file_count == 2
assert [document.relative_path for document in documents] == [
"CH1/注册申请表.txt",
"CH1/目标产品说明书.txt",
]