diff --git a/apps/documents/forms.py b/apps/documents/forms.py index 8687ae1..7d1bc34 100644 --- a/apps/documents/forms.py +++ b/apps/documents/forms.py @@ -5,7 +5,7 @@ from django import forms from apps.scenarios.services import ScenarioNotFound, get_scenario from apps.scenarios.services import list_scenarios -SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z"} +SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".zip", ".7z", ".rar"} class MultipleFileInput(forms.ClearableFileInput): @@ -52,7 +52,7 @@ class DocumentUploadForm(forms.Form): return uploaded_file extension = Path(uploaded_file.name).suffix.lower() if extension not in SUPPORTED_EXTENSIONS: - raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件") + raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip、.7z 和 .rar 文件") return uploaded_file def clean_files(self): @@ -60,7 +60,7 @@ class DocumentUploadForm(forms.Form): for uploaded_file in uploaded_files: extension = Path(uploaded_file.name).suffix.lower() if extension not in SUPPORTED_EXTENSIONS: - raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件") + raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip、.7z 和 .rar 文件") return uploaded_files def clean(self): diff --git a/apps/documents/services.py b/apps/documents/services.py index 429d581..ddbcab7 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -282,6 +282,11 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]: expanded_files.extend(extraction["files"]) warnings.extend(extraction["warnings"]) continue + if extension == ".rar": + extraction = _extract_rar_entries(uploaded_file) + expanded_files.extend(extraction["files"]) + warnings.extend(extraction["warnings"]) + continue expanded_files.append( { "relative_path": uploaded_file.name, @@ -435,6 +440,39 @@ def _extract_7z_entries(uploaded_file) -> dict: return {"files": entries, "warnings": warnings} +def _extract_rar_entries(uploaded_file) -> dict: + try: + import rarfile + except ImportError as exc: + raise RuntimeError("处理 .rar 资料包需要安装 rarfile。") from exc + + archive_bytes = uploaded_file.read() + uploaded_file.seek(0) + entries = [] + warnings = [] + with rarfile.RarFile(BytesIO(archive_bytes)) as archive: + for info in archive.infolist(): + if info.is_dir(): + continue + relative_path = info.filename.replace("\\", "/") + extension = Path(relative_path).suffix.lower() + if extension not in {".txt", ".md", ".pdf", ".docx"}: + warnings.append(f"跳过不支持的文件:{relative_path}") + continue + file_data = archive.read(info.filename) + extracted_file = SimpleUploadedFile( + Path(relative_path).name, + file_data, + ) + entries.append( + { + "relative_path": relative_path, + "uploaded_file": extracted_file, + } + ) + return {"files": entries, "warnings": warnings} + + def _detect_document_role(file_name: str) -> str: normalized = file_name.lower() if "申请表" in file_name: diff --git a/apps/documents/views.py b/apps/documents/views.py index 37276ea..8056a9f 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -76,7 +76,7 @@ def upload(request): "form": form, "scenarios": list_scenarios(), "upload_checks": [ - "文件格式支持 PDF、DOCX、MD、TXT、ZIP 与 7Z 资料包", + "文件格式支持 PDF、DOCX、MD、TXT、ZIP、7Z 与 RAR 资料包", "业务资料与法规依据资料需分开归属", "支持一次上传多份文件并归并到同一个资料包", "目录类文件会优先参与完整性校验", diff --git a/templates/documents/upload.html b/templates/documents/upload.html index bd1b5ca..c696a85 100644 --- a/templates/documents/upload.html +++ b/templates/documents/upload.html @@ -14,7 +14,7 @@

资料导入向导

-

当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip` 与 `.7z` 资料包。上传成功后会直接形成一个资料包并绑定会话。

+

当前支持多文件上传,以及 `.txt`、`.md`、`.pdf`、`.docx`、`.zip`、`.7z` 与 `.rar` 资料包。上传成功后会直接形成一个资料包并绑定会话。

@@ -32,7 +32,7 @@ {% if form.files.errors %}

{{ form.files.errors|join:" " }}

{% endif %} -

可一次选择多份文件,或上传一个 zip 资料包。

+

可一次选择多份文件,或上传一个 zip / 7z / rar 资料包。

{% if form.errors %}
{{ form.errors }}
diff --git a/tests/test_documents.py b/tests/test_documents.py index d12e8fd..7b4bd9f 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -111,6 +111,21 @@ def test_document_upload_form_builds_scenario_choices(): assert "quality_analysis" in choice_values +def test_document_upload_form_accepts_rar_package(): + form = DocumentUploadForm( + data={"scenario_id": "knowledge_qa"}, + files={ + "file": SimpleUploadedFile( + "registration-package.rar", + b"fake-rar-bytes", + content_type="application/vnd.rar", + ) + }, + ) + + assert form.is_valid() + + def test_index_failure_message_is_visible_on_document_list(client, db, monkeypatch): document = UploadedDocument.objects.create( scenario_id="knowledge_qa", @@ -336,6 +351,54 @@ def test_import_submission_batch_supports_7z_package_and_preserves_relative_path ] +def test_import_submission_batch_supports_rar_package_and_preserves_relative_paths(db, monkeypatch): + package = SimpleUploadedFile( + "registration-package.rar", + b"fake-rar-bytes", + content_type="application/vnd.rar", + ) + + class FakeRarInfo: + def __init__(self, filename, is_dir=False): + self.filename = filename + self._is_dir = is_dir + + def is_dir(self): + return self._is_dir + + class FakeRarFile: + def __init__(self, _file_obj): + self.entries = { + "CH1/注册申请表.txt": "产品名称:产品A".encode("utf-8"), + "CH1/目标产品说明书.txt": "产品名称:产品A".encode("utf-8"), + } + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def infolist(self): + return [FakeRarInfo(name) for name in self.entries] + + def read(self, name): + return self.entries[name] + + fake_module = types.SimpleNamespace(RarFile=FakeRarFile) + monkeypatch.setitem(sys.modules, "rarfile", fake_module) + + result = import_submission_batch("document_review", [package]) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + documents = list(UploadedDocument.objects.filter(batch=batch).order_by("relative_path")) + assert batch.file_count == 2 + assert [document.relative_path for document in documents] == [ + "CH1/注册申请表.txt", + "CH1/目标产品说明书.txt", + ] + + def test_import_submission_batch_records_warnings_for_unsupported_zip_entries(db): archive = BytesIO() with ZipFile(archive, "w") as zip_file: