From 73c6336600e783642c4c16b1cb6059ece75437ea Mon Sep 17 00:00:00 2001 From: bruce Date: Thu, 4 Jun 2026 01:28:28 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=817z=E8=B5=84=E6=96=99?= =?UTF-8?q?=E5=8C=85=E5=AF=BC=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/documents/forms.py | 6 ++--- apps/documents/services.py | 37 ++++++++++++++++++++++++++++++ apps/documents/views.py | 2 +- requirements.txt | 1 + templates/documents/upload.html | 2 +- tests/test_documents.py | 40 +++++++++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 5 deletions(-) diff --git a/apps/documents/forms.py b/apps/documents/forms.py index 0f351b7..8687ae1 100644 --- a/apps/documents/forms.py +++ b/apps/documents/forms.py @@ -5,7 +5,7 @@ from django import forms from apps.scenarios.services import ScenarioNotFound, get_scenario from apps.scenarios.services import list_scenarios -SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip"} +SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"} class MultipleFileInput(forms.ClearableFileInput): @@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form): return uploaded_file extension = Path(uploaded_file.name).suffix.lower() if extension not in SUPPORTED_EXTENSIONS: - raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件") + raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件") return uploaded_file def clean_files(self): @@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form): for uploaded_file in uploaded_files: extension = Path(uploaded_file.name).suffix.lower() if extension not in SUPPORTED_EXTENSIONS: - raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件") + raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件") return uploaded_files def clean(self): diff --git a/apps/documents/services.py b/apps/documents/services.py index b69543a..8cd6207 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -1,6 +1,7 @@ from pathlib import Path from io import BytesIO import re +import tempfile import xml.etree.ElementTree as ET from zipfile import BadZipFile, ZipFile @@ -220,6 +221,9 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]: if extension == ".zip": expanded_files.extend(_extract_zip_entries(uploaded_file)) continue + if extension == ".7z": + expanded_files.extend(_extract_7z_entries(uploaded_file)) + continue expanded_files.append( { "relative_path": uploaded_file.name, @@ -255,6 +259,39 @@ def _extract_zip_entries(uploaded_file) -> list[dict]: return entries +def _extract_7z_entries(uploaded_file) -> list[dict]: + try: + import py7zr + except ImportError as exc: + raise RuntimeError("处理 .7z 资料包需要安装 py7zr。") from exc + + archive_bytes = uploaded_file.read() + uploaded_file.seek(0) + entries = [] + with tempfile.TemporaryDirectory() as temp_dir: + with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive: + archive.extractall(path=temp_dir) + base_path = Path(temp_dir) + for file_path in sorted(base_path.rglob("*")): + if not file_path.is_file(): + continue + relative_path = file_path.relative_to(base_path).as_posix() + extension = Path(relative_path).suffix.lower() + if extension not in {".txt", ".md", ".pdf", ".docx"}: + continue + extracted_file = SimpleUploadedFile( + file_path.name, + file_path.read_bytes(), + ) + entries.append( + { + "relative_path": relative_path, + "uploaded_file": extracted_file, + } + ) + return entries + + def _detect_document_role(file_name: str) -> str: normalized = file_name.lower() if "申请表" in file_name: diff --git a/apps/documents/views.py b/apps/documents/views.py index afa817d..37276ea 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -76,7 +76,7 @@ def upload(request): "form": form, "scenarios": list_scenarios(), "upload_checks": [ - "文件格式支持 PDF、DOCX、MD、TXT 与 ZIP 资料包", + "文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包", "业务资料与法规依据资料需分开归属", "支持一次上传多份文件并归并到同一个资料包", "目录类文件会优先参与完整性校验", diff --git a/requirements.txt b/requirements.txt index dc850f3..cdc8028 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ PyYAML>=6.0,<7.0 chromadb>=0.5,<1.0 pytest>=8.0,<9.0 pytest-django>=4.9,<5.0 +py7zr>=0.20,<1.0 diff --git a/templates/documents/upload.html b/templates/documents/upload.html index 586f9c5..bd1b5ca 100644 --- a/templates/documents/upload.html +++ b/templates/documents/upload.html @@ -14,7 +14,7 @@

资料导入向导

-

当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx` 与 `.zip` 资料包。上传成功后会直接形成一个资料包并绑定会话。

+

当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip` 与 `.7z` 资料包。上传成功后会直接形成一个资料包并绑定会话。

diff --git a/tests/test_documents.py b/tests/test_documents.py index d49022a..565ca63 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -1,6 +1,9 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from io import BytesIO +from pathlib import Path +import sys +import types from zipfile import ZipFile from apps.documents.forms import DocumentUploadForm @@ -294,3 +297,40 @@ def test_import_submission_batch_supports_zip_package_and_preserves_relative_pat "CH1/注册申请表.txt", "CH1/目标产品说明书.txt", ] + + +def test_import_submission_batch_supports_7z_package_and_preserves_relative_paths(db, monkeypatch, tmp_path): + package = SimpleUploadedFile( + "registration-package.7z", + b"fake-7z-bytes", + content_type="application/x-7z-compressed", + ) + + class FakeSevenZipFile: + def __init__(self, _file_obj, mode="r"): + self.mode = mode + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def extractall(self, path): + target = Path(path) + (target / "CH1").mkdir(parents=True, exist_ok=True) + (target / "CH1" / "注册申请表.txt").write_text("产品名称:产品A", encoding="utf-8") + (target / "CH1" / "目标产品说明书.txt").write_text("产品名称:产品A", encoding="utf-8") + + fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile) + monkeypatch.setitem(sys.modules, "py7zr", fake_module) + + result = import_submission_batch("document_review", [package]) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + documents = list(UploadedDocument.objects.filter(batch=batch).order_by("relative_path")) + assert batch.file_count == 2 + assert [document.relative_path for document in documents] == [ + "CH1/注册申请表.txt", + "CH1/目标产品说明书.txt", + ]