feat: 支持rar资料包导入
This commit is contained in:
@@ -5,7 +5,7 @@ from django import forms
|
||||
from apps.scenarios.services import ScenarioNotFound, get_scenario
|
||||
from apps.scenarios.services import list_scenarios
|
||||
|
||||
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"}
|
||||
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z", ".rar"}
|
||||
|
||||
|
||||
class MultipleFileInput(forms.ClearableFileInput):
|
||||
@@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form):
|
||||
return uploaded_file
|
||||
extension = Path(uploaded_file.name).suffix.lower()
|
||||
if extension not in SUPPORTED_EXTENSIONS:
|
||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
|
||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip、.7z 和 .rar 文件")
|
||||
return uploaded_file
|
||||
|
||||
def clean_files(self):
|
||||
@@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form):
|
||||
for uploaded_file in uploaded_files:
|
||||
extension = Path(uploaded_file.name).suffix.lower()
|
||||
if extension not in SUPPORTED_EXTENSIONS:
|
||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
|
||||
raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip、.7z 和 .rar 文件")
|
||||
return uploaded_files
|
||||
|
||||
def clean(self):
|
||||
|
||||
@@ -282,6 +282,11 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
||||
expanded_files.extend(extraction["files"])
|
||||
warnings.extend(extraction["warnings"])
|
||||
continue
|
||||
if extension == ".rar":
|
||||
extraction = _extract_rar_entries(uploaded_file)
|
||||
expanded_files.extend(extraction["files"])
|
||||
warnings.extend(extraction["warnings"])
|
||||
continue
|
||||
expanded_files.append(
|
||||
{
|
||||
"relative_path": uploaded_file.name,
|
||||
@@ -435,6 +440,39 @@ def _extract_7z_entries(uploaded_file) -> dict:
|
||||
return {"files": entries, "warnings": warnings}
|
||||
|
||||
|
||||
def _extract_rar_entries(uploaded_file) -> dict:
|
||||
try:
|
||||
import rarfile
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("处理 .rar 资料包需要安装 rarfile。") from exc
|
||||
|
||||
archive_bytes = uploaded_file.read()
|
||||
uploaded_file.seek(0)
|
||||
entries = []
|
||||
warnings = []
|
||||
with rarfile.RarFile(BytesIO(archive_bytes)) as archive:
|
||||
for info in archive.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
relative_path = info.filename.replace("\\", "/")
|
||||
extension = Path(relative_path).suffix.lower()
|
||||
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
||||
warnings.append(f"跳过不支持的文件:{relative_path}")
|
||||
continue
|
||||
file_data = archive.read(info.filename)
|
||||
extracted_file = SimpleUploadedFile(
|
||||
Path(relative_path).name,
|
||||
file_data,
|
||||
)
|
||||
entries.append(
|
||||
{
|
||||
"relative_path": relative_path,
|
||||
"uploaded_file": extracted_file,
|
||||
}
|
||||
)
|
||||
return {"files": entries, "warnings": warnings}
|
||||
|
||||
|
||||
def _detect_document_role(file_name: str) -> str:
|
||||
normalized = file_name.lower()
|
||||
if "申请表" in file_name:
|
||||
|
||||
@@ -76,7 +76,7 @@ def upload(request):
|
||||
"form": form,
|
||||
"scenarios": list_scenarios(),
|
||||
"upload_checks": [
|
||||
"文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包",
|
||||
"文件格式支持 PDF、DOCX、MD、TXT、ZIP、7Z 与 RAR 资料包",
|
||||
"业务资料与法规依据资料需分开归属",
|
||||
"支持一次上传多份文件并归并到同一个资料包",
|
||||
"目录类文件会优先参与完整性校验",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
<div class="section-heading">
|
||||
<div>
|
||||
<h2 class="section-title">资料导入向导</h2>
|
||||
<p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip` 与 `.7z` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p>
|
||||
<p class="section-copy">当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip`、`.7z` 与 `.rar` 资料包。上传成功后会直接形成一个资料包并绑定会话。</p>
|
||||
</div>
|
||||
</div>
|
||||
<form method="post" enctype="multipart/form-data" class="stack">
|
||||
@@ -32,7 +32,7 @@
|
||||
{% if form.files.errors %}
|
||||
<p class="notice notice-error">{{ form.files.errors|join:" " }}</p>
|
||||
{% endif %}
|
||||
<p class="help-text">可一次选择多份文件,或上传一个 zip 资料包。</p>
|
||||
<p class="help-text">可一次选择多份文件,或上传一个 zip / 7z / rar 资料包。</p>
|
||||
</div>
|
||||
{% if form.errors %}
|
||||
<div class="notice notice-error">{{ form.errors }}</div>
|
||||
|
||||
@@ -111,6 +111,21 @@ def test_document_upload_form_builds_scenario_choices():
|
||||
assert "quality_analysis" in choice_values
|
||||
|
||||
|
||||
def test_document_upload_form_accepts_rar_package():
|
||||
form = DocumentUploadForm(
|
||||
data={"scenario_id": "knowledge_qa"},
|
||||
files={
|
||||
"file": SimpleUploadedFile(
|
||||
"registration-package.rar",
|
||||
b"fake-rar-bytes",
|
||||
content_type="application/vnd.rar",
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
assert form.is_valid()
|
||||
|
||||
|
||||
def test_index_failure_message_is_visible_on_document_list(client, db, monkeypatch):
|
||||
document = UploadedDocument.objects.create(
|
||||
scenario_id="knowledge_qa",
|
||||
@@ -336,6 +351,54 @@ def test_import_submission_batch_supports_7z_package_and_preserves_relative_path
|
||||
]
|
||||
|
||||
|
||||
def test_import_submission_batch_supports_rar_package_and_preserves_relative_paths(db, monkeypatch):
|
||||
package = SimpleUploadedFile(
|
||||
"registration-package.rar",
|
||||
b"fake-rar-bytes",
|
||||
content_type="application/vnd.rar",
|
||||
)
|
||||
|
||||
class FakeRarInfo:
|
||||
def __init__(self, filename, is_dir=False):
|
||||
self.filename = filename
|
||||
self._is_dir = is_dir
|
||||
|
||||
def is_dir(self):
|
||||
return self._is_dir
|
||||
|
||||
class FakeRarFile:
|
||||
def __init__(self, _file_obj):
|
||||
self.entries = {
|
||||
"CH1/注册申请表.txt": "产品名称:产品A".encode("utf-8"),
|
||||
"CH1/目标产品说明书.txt": "产品名称:产品A".encode("utf-8"),
|
||||
}
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def infolist(self):
|
||||
return [FakeRarInfo(name) for name in self.entries]
|
||||
|
||||
def read(self, name):
|
||||
return self.entries[name]
|
||||
|
||||
fake_module = types.SimpleNamespace(RarFile=FakeRarFile)
|
||||
monkeypatch.setitem(sys.modules, "rarfile", fake_module)
|
||||
|
||||
result = import_submission_batch("document_review", [package])
|
||||
|
||||
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
|
||||
documents = list(UploadedDocument.objects.filter(batch=batch).order_by("relative_path"))
|
||||
assert batch.file_count == 2
|
||||
assert [document.relative_path for document in documents] == [
|
||||
"CH1/注册申请表.txt",
|
||||
"CH1/目标产品说明书.txt",
|
||||
]
|
||||
|
||||
|
||||
def test_import_submission_batch_records_warnings_for_unsupported_zip_entries(db):
|
||||
archive = BytesIO()
|
||||
with ZipFile(archive, "w") as zip_file:
|
||||
|
||||
Reference in New Issue
Block a user