From 0b7322aa65401f75c2ea791bd19bd7d48b1b391a Mon Sep 17 00:00:00 2001 From: bruce Date: Thu, 4 Jun 2026 01:37:11 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=BC=BA=E8=B5=84=E6=96=99?= =?UTF-8?q?=E5=8C=85=E5=8E=8B=E7=BC=A9=E5=AF=BC=E5=85=A5=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/documents/services.py | 43 +++++++++++++++------ tests/test_documents.py | 78 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 12 deletions(-) diff --git a/apps/documents/services.py b/apps/documents/services.py index 8cd6207..29862f6 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -57,8 +57,11 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: candidates = [] chapter_summary = {} total_pages = 0 + warnings = [] - expanded_files = _expand_uploaded_files(uploaded_files) + expanded_result = _expand_uploaded_files(uploaded_files) + expanded_files = expanded_result["files"] + warnings.extend(expanded_result["warnings"]) for uploaded_item in expanded_files: uploaded_file = uploaded_item["uploaded_file"] relative_path = uploaded_item["relative_path"] @@ -93,9 +96,13 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1 candidates.extend(_extract_product_candidates(document.relative_path, text)) - product_name, warnings = _select_product_name(candidates) + product_name, product_warnings = _select_product_name(candidates) + warnings.extend(product_warnings) conversation = create_conversation_for_batch(batch.batch_id, product_name) + if not documents: + warnings.append("未发现可导入的支持文件,请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。") + batch.product_name = product_name batch.conversation_id = conversation.conversation_id batch.file_count = len(documents) @@ -105,9 +112,12 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: for chapter_code, count in sorted(chapter_summary.items()) ] batch.exception_count = len(warnings) - batch.import_status = ( - SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED - ) + if not documents: + batch.import_status = SubmissionBatch.STATUS_FAILED + elif warnings: + batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED + else: + batch.import_status = SubmissionBatch.STATUS_COMPLETED batch.save( update_fields=[ "product_name", @@ -216,13 +226,18 @@ def _estimate_page_count(text: str) -> int: def _expand_uploaded_files(uploaded_files: list) -> list[dict]: expanded_files = [] + warnings = [] for uploaded_file in uploaded_files: extension = Path(uploaded_file.name).suffix.lower() if extension == ".zip": - expanded_files.extend(_extract_zip_entries(uploaded_file)) + extraction = _extract_zip_entries(uploaded_file) + expanded_files.extend(extraction["files"]) + warnings.extend(extraction["warnings"]) continue if extension == ".7z": - expanded_files.extend(_extract_7z_entries(uploaded_file)) + extraction = _extract_7z_entries(uploaded_file) + expanded_files.extend(extraction["files"]) + warnings.extend(extraction["warnings"]) continue expanded_files.append( { @@ -230,13 +245,14 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]: "uploaded_file": uploaded_file, } ) - return expanded_files + return {"files": expanded_files, "warnings": warnings} -def _extract_zip_entries(uploaded_file) -> list[dict]: +def _extract_zip_entries(uploaded_file) -> dict: archive_bytes = uploaded_file.read() uploaded_file.seek(0) entries = [] + warnings = [] with ZipFile(BytesIO(archive_bytes)) as archive: for info in archive.infolist(): if info.is_dir(): @@ -244,6 +260,7 @@ def _extract_zip_entries(uploaded_file) -> list[dict]: relative_path = info.filename.replace("\\", "/") extension = Path(relative_path).suffix.lower() if extension not in {".txt", ".md", ".pdf", ".docx"}: + warnings.append(f"跳过不支持的文件:{relative_path}") continue file_data = archive.read(info.filename) extracted_file = SimpleUploadedFile( @@ -256,10 +273,10 @@ def _extract_zip_entries(uploaded_file) -> list[dict]: "uploaded_file": extracted_file, } ) - return entries + return {"files": entries, "warnings": warnings} -def _extract_7z_entries(uploaded_file) -> list[dict]: +def _extract_7z_entries(uploaded_file) -> dict: try: import py7zr except ImportError as exc: @@ -268,6 +285,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]: archive_bytes = uploaded_file.read() uploaded_file.seek(0) entries = [] + warnings = [] with tempfile.TemporaryDirectory() as temp_dir: with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive: archive.extractall(path=temp_dir) @@ -278,6 +296,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]: relative_path = file_path.relative_to(base_path).as_posix() extension = Path(relative_path).suffix.lower() if extension not in {".txt", ".md", ".pdf", ".docx"}: + warnings.append(f"跳过不支持的文件:{relative_path}") continue extracted_file = SimpleUploadedFile( file_path.name, @@ -289,7 +308,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]: "uploaded_file": extracted_file, } ) - return entries + return {"files": entries, "warnings": warnings} def _detect_document_role(file_name: str) -> str: diff --git a/tests/test_documents.py b/tests/test_documents.py index 565ca63..d12e8fd 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -334,3 +334,81 @@ def test_import_submission_batch_supports_7z_package_and_preserves_relative_path "CH1/注册申请表.txt", "CH1/目标产品说明书.txt", ] + + +def test_import_submission_batch_records_warnings_for_unsupported_zip_entries(db): + archive = BytesIO() + with ZipFile(archive, "w") as zip_file: + zip_file.writestr("CH1/注册申请表.txt", "产品名称:产品A") + zip_file.writestr("CH1/忽略图片.png", b"binary-image-data") + archive.seek(0) + package = SimpleUploadedFile( + "registration-package.zip", + archive.read(), + content_type="application/zip", + ) + + result = import_submission_batch("document_review", [package]) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + warnings = result["registration_overview_report"]["warnings"] + assert batch.file_count == 1 + assert batch.exception_count == 1 + assert "跳过不支持的文件" in warnings[0] + assert "CH1/忽略图片.png" in warnings[0] + + +def test_import_submission_batch_marks_failed_when_zip_has_no_supported_files(db): + archive = BytesIO() + with ZipFile(archive, "w") as zip_file: + zip_file.writestr("assets/readme.png", b"binary-image-data") + archive.seek(0) + package = SimpleUploadedFile( + "empty-registration-package.zip", + archive.read(), + content_type="application/zip", + ) + + result = import_submission_batch("document_review", [package]) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + warnings = result["registration_overview_report"]["warnings"] + assert batch.file_count == 0 + assert batch.import_status == SubmissionBatch.STATUS_FAILED + assert batch.exception_count == len(warnings) + assert any("未发现可导入的支持文件" in warning for warning in warnings) + + +def test_import_submission_batch_records_warnings_for_unsupported_7z_entries(db, monkeypatch): + package = SimpleUploadedFile( + "registration-package.7z", + b"fake-7z-bytes", + content_type="application/x-7z-compressed", + ) + + class FakeSevenZipFile: + def __init__(self, _file_obj, mode="r"): + self.mode = mode + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def extractall(self, path): + target = Path(path) + (target / "CH1").mkdir(parents=True, exist_ok=True) + (target / "CH1" / "注册申请表.txt").write_text("产品名称:产品A", encoding="utf-8") + (target / "CH1" / "忽略图片.png").write_bytes(b"binary-image-data") + + fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile) + monkeypatch.setitem(sys.modules, "py7zr", fake_module) + + result = import_submission_batch("document_review", [package]) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + warnings = result["registration_overview_report"]["warnings"] + assert batch.file_count == 1 + assert batch.exception_count == 1 + assert any("CH1/忽略图片.png" in warning for warning in warnings)