feat: 支持7z资料包导入
This commit is contained in:
@@ -5,7 +5,7 @@ from django import forms
|
|||||||
from apps.scenarios.services import ScenarioNotFound, get_scenario
|
from apps.scenarios.services import ScenarioNotFound, get_scenario
|
||||||
from apps.scenarios.services import list_scenarios
|
from apps.scenarios.services import list_scenarios
|
||||||
|
|
||||||
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip"}
|
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"}
|
||||||
|
|
||||||
|
|
||||||
class MultipleFileInput(forms.ClearableFileInput):
|
class MultipleFileInput(forms.ClearableFileInput):
|
||||||
@@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form):
|
|||||||
return uploaded_file
|
return uploaded_file
|
||||||
extension = Path(uploaded_file.name).suffix.lower()
|
extension = Path(uploaded_file.name).suffix.lower()
|
||||||
if extension not in SUPPORTED_EXTENSIONS:
|
if extension not in SUPPORTED_EXTENSIONS:
|
||||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件")
|
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
|
||||||
return uploaded_file
|
return uploaded_file
|
||||||
|
|
||||||
def clean_files(self):
|
def clean_files(self):
|
||||||
@@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form):
|
|||||||
for uploaded_file in uploaded_files:
|
for uploaded_file in uploaded_files:
|
||||||
extension = Path(uploaded_file.name).suffix.lower()
|
extension = Path(uploaded_file.name).suffix.lower()
|
||||||
if extension not in SUPPORTED_EXTENSIONS:
|
if extension not in SUPPORTED_EXTENSIONS:
|
||||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx 和 .zip 文件")
|
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
|
||||||
return uploaded_files
|
return uploaded_files
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import re
|
import re
|
||||||
|
import tempfile
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from zipfile import BadZipFile, ZipFile
|
from zipfile import BadZipFile, ZipFile
|
||||||
|
|
||||||
@@ -220,6 +221,9 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
|||||||
if extension == ".zip":
|
if extension == ".zip":
|
||||||
expanded_files.extend(_extract_zip_entries(uploaded_file))
|
expanded_files.extend(_extract_zip_entries(uploaded_file))
|
||||||
continue
|
continue
|
||||||
|
if extension == ".7z":
|
||||||
|
expanded_files.extend(_extract_7z_entries(uploaded_file))
|
||||||
|
continue
|
||||||
expanded_files.append(
|
expanded_files.append(
|
||||||
{
|
{
|
||||||
"relative_path": uploaded_file.name,
|
"relative_path": uploaded_file.name,
|
||||||
@@ -255,6 +259,39 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
|
|||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_7z_entries(uploaded_file) -> list[dict]:
|
||||||
|
try:
|
||||||
|
import py7zr
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError("处理 .7z 资料包需要安装 py7zr。") from exc
|
||||||
|
|
||||||
|
archive_bytes = uploaded_file.read()
|
||||||
|
uploaded_file.seek(0)
|
||||||
|
entries = []
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
|
||||||
|
archive.extractall(path=temp_dir)
|
||||||
|
base_path = Path(temp_dir)
|
||||||
|
for file_path in sorted(base_path.rglob("*")):
|
||||||
|
if not file_path.is_file():
|
||||||
|
continue
|
||||||
|
relative_path = file_path.relative_to(base_path).as_posix()
|
||||||
|
extension = Path(relative_path).suffix.lower()
|
||||||
|
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
||||||
|
continue
|
||||||
|
extracted_file = SimpleUploadedFile(
|
||||||
|
file_path.name,
|
||||||
|
file_path.read_bytes(),
|
||||||
|
)
|
||||||
|
entries.append(
|
||||||
|
{
|
||||||
|
"relative_path": relative_path,
|
||||||
|
"uploaded_file": extracted_file,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
def _detect_document_role(file_name: str) -> str:
|
def _detect_document_role(file_name: str) -> str:
|
||||||
normalized = file_name.lower()
|
normalized = file_name.lower()
|
||||||
if "申请表" in file_name:
|
if "申请表" in file_name:
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ def upload(request):
|
|||||||
"form": form,
|
"form": form,
|
||||||
"scenarios": list_scenarios(),
|
"scenarios": list_scenarios(),
|
||||||
"upload_checks": [
|
"upload_checks": [
|
||||||
"文件格式支持 PDF、DOCX、MD、TXT 与 ZIP 资料包",
|
"文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包",
|
||||||
"业务资料与法规依据资料需分开归属",
|
"业务资料与法规依据资料需分开归属",
|
||||||
"支持一次上传多份文件并归并到同一个资料包",
|
"支持一次上传多份文件并归并到同一个资料包",
|
||||||
"目录类文件会优先参与完整性校验",
|
"目录类文件会优先参与完整性校验",
|
||||||
|
|||||||
@@ -3,3 +3,4 @@ PyYAML>=6.0,<7.0
|
|||||||
chromadb>=0.5,<1.0
|
chromadb>=0.5,<1.0
|
||||||
pytest>=8.0,<9.0
|
pytest>=8.0,<9.0
|
||||||
pytest-django>=4.9,<5.0
|
pytest-django>=4.9,<5.0
|
||||||
|
py7zr>=0.20,<1.0
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
<div class="section-heading">
|
<div class="section-heading">
|
||||||
<div>
|
<div>
|
||||||
<h2 class="section-title">资料导入向导</h2>
|
<h2 class="section-title">资料导入向导</h2>
|
||||||
<p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx` 与 `.zip` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p>
|
<p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip` 与 `.7z` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<form method="post" enctype="multipart/form-data" class="stack">
|
<form method="post" enctype="multipart/form-data" class="stack">
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from apps.documents.forms import DocumentUploadForm
|
from apps.documents.forms import DocumentUploadForm
|
||||||
@@ -294,3 +297,40 @@ def test_import_submission_batch_supports_zip_package_and_preserves_relative_pat
|
|||||||
"CH1/注册申请表.txt",
|
"CH1/注册申请表.txt",
|
||||||
"CH1/目标产品说明书.txt",
|
"CH1/目标产品说明书.txt",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_submission_batch_supports_7z_package_and_preserves_relative_paths(db, monkeypatch, tmp_path):
|
||||||
|
package = SimpleUploadedFile(
|
||||||
|
"registration-package.7z",
|
||||||
|
b"fake-7z-bytes",
|
||||||
|
content_type="application/x-7z-compressed",
|
||||||
|
)
|
||||||
|
|
||||||
|
class FakeSevenZipFile:
|
||||||
|
def __init__(self, _file_obj, mode="r"):
|
||||||
|
self.mode = mode
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def extractall(self, path):
|
||||||
|
target = Path(path)
|
||||||
|
(target / "CH1").mkdir(parents=True, exist_ok=True)
|
||||||
|
(target / "CH1" / "注册申请表.txt").write_text("产品名称:产品A", encoding="utf-8")
|
||||||
|
(target / "CH1" / "目标产品说明书.txt").write_text("产品名称:产品A", encoding="utf-8")
|
||||||
|
|
||||||
|
fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile)
|
||||||
|
monkeypatch.setitem(sys.modules, "py7zr", fake_module)
|
||||||
|
|
||||||
|
result = import_submission_batch("document_review", [package])
|
||||||
|
|
||||||
|
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
|
||||||
|
documents = list(UploadedDocument.objects.filter(batch=batch).order_by("relative_path"))
|
||||||
|
assert batch.file_count == 2
|
||||||
|
assert [document.relative_path for document in documents] == [
|
||||||
|
"CH1/注册申请表.txt",
|
||||||
|
"CH1/目标产品说明书.txt",
|
||||||
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user