From 0b7322aa65401f75c2ea791bd19bd7d48b1b391a Mon Sep 17 00:00:00 2001
From: bruce <sunzhiye01@outlook.com>
Date: Thu, 4 Jun 2026 01:37:11 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=BC=BA=E8=B5=84=E6=96=99?=
 =?UTF-8?q?=E5=8C=85=E5=8E=8B=E7=BC=A9=E5=AF=BC=E5=85=A5=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 apps/documents/services.py | 43 +++++++++++++++------
 tests/test_documents.py    | 78 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/apps/documents/services.py b/apps/documents/services.py
index 8cd6207..29862f6 100644
--- a/apps/documents/services.py
+++ b/apps/documents/services.py
@@ -57,8 +57,11 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
     candidates = []
     chapter_summary = {}
     total_pages = 0
+    warnings = []
 
-    expanded_files = _expand_uploaded_files(uploaded_files)
+    expanded_result = _expand_uploaded_files(uploaded_files)
+    expanded_files = expanded_result["files"]
+    warnings.extend(expanded_result["warnings"])
     for uploaded_item in expanded_files:
         uploaded_file = uploaded_item["uploaded_file"]
         relative_path = uploaded_item["relative_path"]
@@ -93,9 +96,13 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
         chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
         candidates.extend(_extract_product_candidates(document.relative_path, text))
 
-    product_name, warnings = _select_product_name(candidates)
+    product_name, product_warnings = _select_product_name(candidates)
+    warnings.extend(product_warnings)
     conversation = create_conversation_for_batch(batch.batch_id, product_name)
 
+    if not documents:
+        warnings.append("未发现可导入的支持文件，请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
+
     batch.product_name = product_name
     batch.conversation_id = conversation.conversation_id
     batch.file_count = len(documents)
@@ -105,9 +112,12 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
         for chapter_code, count in sorted(chapter_summary.items())
     ]
     batch.exception_count = len(warnings)
-    batch.import_status = (
-        SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED
-    )
+    if not documents:
+        batch.import_status = SubmissionBatch.STATUS_FAILED
+    elif warnings:
+        batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
+    else:
+        batch.import_status = SubmissionBatch.STATUS_COMPLETED
     batch.save(
         update_fields=[
             "product_name",
@@ -216,13 +226,18 @@ def _estimate_page_count(text: str) -> int:
 
 def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
     expanded_files = []
+    warnings = []
     for uploaded_file in uploaded_files:
         extension = Path(uploaded_file.name).suffix.lower()
         if extension == ".zip":
-            expanded_files.extend(_extract_zip_entries(uploaded_file))
+            extraction = _extract_zip_entries(uploaded_file)
+            expanded_files.extend(extraction["files"])
+            warnings.extend(extraction["warnings"])
             continue
         if extension == ".7z":
-            expanded_files.extend(_extract_7z_entries(uploaded_file))
+            extraction = _extract_7z_entries(uploaded_file)
+            expanded_files.extend(extraction["files"])
+            warnings.extend(extraction["warnings"])
             continue
         expanded_files.append(
             {
@@ -230,13 +245,14 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
                 "uploaded_file": uploaded_file,
             }
         )
-    return expanded_files
+    return {"files": expanded_files, "warnings": warnings}
 
 
-def _extract_zip_entries(uploaded_file) -> list[dict]:
+def _extract_zip_entries(uploaded_file) -> dict:
     archive_bytes = uploaded_file.read()
     uploaded_file.seek(0)
     entries = []
+    warnings = []
     with ZipFile(BytesIO(archive_bytes)) as archive:
         for info in archive.infolist():
             if info.is_dir():
@@ -244,6 +260,7 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
             relative_path = info.filename.replace("\\", "/")
             extension = Path(relative_path).suffix.lower()
             if extension not in {".txt", ".md", ".pdf", ".docx"}:
+                warnings.append(f"跳过不支持的文件：{relative_path}")
                 continue
             file_data = archive.read(info.filename)
             extracted_file = SimpleUploadedFile(
@@ -256,10 +273,10 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
                     "uploaded_file": extracted_file,
                 }
             )
-    return entries
+    return {"files": entries, "warnings": warnings}
 
 
-def _extract_7z_entries(uploaded_file) -> list[dict]:
+def _extract_7z_entries(uploaded_file) -> dict:
     try:
         import py7zr
     except ImportError as exc:
@@ -268,6 +285,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
     archive_bytes = uploaded_file.read()
     uploaded_file.seek(0)
     entries = []
+    warnings = []
     with tempfile.TemporaryDirectory() as temp_dir:
         with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
             archive.extractall(path=temp_dir)
@@ -278,6 +296,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
             relative_path = file_path.relative_to(base_path).as_posix()
             extension = Path(relative_path).suffix.lower()
             if extension not in {".txt", ".md", ".pdf", ".docx"}:
+                warnings.append(f"跳过不支持的文件：{relative_path}")
                 continue
             extracted_file = SimpleUploadedFile(
                 file_path.name,
@@ -289,7 +308,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
                     "uploaded_file": extracted_file,
                 }
             )
-    return entries
+    return {"files": entries, "warnings": warnings}
 
 
 def _detect_document_role(file_name: str) -> str:
diff --git a/tests/test_documents.py b/tests/test_documents.py
index 565ca63..d12e8fd 100644
--- a/tests/test_documents.py
+++ b/tests/test_documents.py
@@ -334,3 +334,81 @@ def test_import_submission_batch_supports_7z_package_and_preserves_relative_path
         "CH1/注册申请表.txt",
         "CH1/目标产品说明书.txt",
     ]
+
+
+def test_import_submission_batch_records_warnings_for_unsupported_zip_entries(db):
+    archive = BytesIO()
+    with ZipFile(archive, "w") as zip_file:
+        zip_file.writestr("CH1/注册申请表.txt", "产品名称：产品A")
+        zip_file.writestr("CH1/忽略图片.png", b"binary-image-data")
+    archive.seek(0)
+    package = SimpleUploadedFile(
+        "registration-package.zip",
+        archive.read(),
+        content_type="application/zip",
+    )
+
+    result = import_submission_batch("document_review", [package])
+
+    batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
+    warnings = result["registration_overview_report"]["warnings"]
+    assert batch.file_count == 1
+    assert batch.exception_count == 1
+    assert "跳过不支持的文件" in warnings[0]
+    assert "CH1/忽略图片.png" in warnings[0]
+
+
+def test_import_submission_batch_marks_failed_when_zip_has_no_supported_files(db):
+    archive = BytesIO()
+    with ZipFile(archive, "w") as zip_file:
+        zip_file.writestr("assets/readme.png", b"binary-image-data")
+    archive.seek(0)
+    package = SimpleUploadedFile(
+        "empty-registration-package.zip",
+        archive.read(),
+        content_type="application/zip",
+    )
+
+    result = import_submission_batch("document_review", [package])
+
+    batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
+    warnings = result["registration_overview_report"]["warnings"]
+    assert batch.file_count == 0
+    assert batch.import_status == SubmissionBatch.STATUS_FAILED
+    assert batch.exception_count == len(warnings)
+    assert any("未发现可导入的支持文件" in warning for warning in warnings)
+
+
+def test_import_submission_batch_records_warnings_for_unsupported_7z_entries(db, monkeypatch):
+    package = SimpleUploadedFile(
+        "registration-package.7z",
+        b"fake-7z-bytes",
+        content_type="application/x-7z-compressed",
+    )
+
+    class FakeSevenZipFile:
+        def __init__(self, _file_obj, mode="r"):
+            self.mode = mode
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def extractall(self, path):
+            target = Path(path)
+            (target / "CH1").mkdir(parents=True, exist_ok=True)
+            (target / "CH1" / "注册申请表.txt").write_text("产品名称：产品A", encoding="utf-8")
+            (target / "CH1" / "忽略图片.png").write_bytes(b"binary-image-data")
+
+    fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile)
+    monkeypatch.setitem(sys.modules, "py7zr", fake_module)
+
+    result = import_submission_batch("document_review", [package])
+
+    batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
+    warnings = result["registration_overview_report"]["warnings"]
+    assert batch.file_count == 1
+    assert batch.exception_count == 1
+    assert any("CH1/忽略图片.png" in warning for warning in warnings)