feat: 增强资料包压缩导入异常提示

2026-06-04 01:37:11 +08:00
parent e7e3202714
commit 0b7322aa65
2 changed files with 109 additions and 12 deletions
--- a/apps/documents/services.py
+++ b/apps/documents/services.py
@@ -57,8 +57,11 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
    candidates = []
    chapter_summary = {}
    total_pages = 0
+    warnings = []

-    expanded_files = _expand_uploaded_files(uploaded_files)
+    expanded_result = _expand_uploaded_files(uploaded_files)
+    expanded_files = expanded_result["files"]
+    warnings.extend(expanded_result["warnings"])
    for uploaded_item in expanded_files:
        uploaded_file = uploaded_item["uploaded_file"]
        relative_path = uploaded_item["relative_path"]
@@ -93,9 +96,13 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
        chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
        candidates.extend(_extract_product_candidates(document.relative_path, text))

-    product_name, warnings = _select_product_name(candidates)
+    product_name, product_warnings = _select_product_name(candidates)
+    warnings.extend(product_warnings)
    conversation = create_conversation_for_batch(batch.batch_id, product_name)

+    if not documents:
+        warnings.append("未发现可导入的支持文件，请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
+
    batch.product_name = product_name
    batch.conversation_id = conversation.conversation_id
    batch.file_count = len(documents)
@@ -105,9 +112,12 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
        for chapter_code, count in sorted(chapter_summary.items())
    ]
    batch.exception_count = len(warnings)
-    batch.import_status = (
-        SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED
-    )
+    if not documents:
+        batch.import_status = SubmissionBatch.STATUS_FAILED
+    elif warnings:
+        batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
+    else:
+        batch.import_status = SubmissionBatch.STATUS_COMPLETED
    batch.save(
        update_fields=[
            "product_name",
@@ -216,13 +226,18 @@ def _estimate_page_count(text: str) -> int:

 def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
    expanded_files = []
+    warnings = []
    for uploaded_file in uploaded_files:
        extension = Path(uploaded_file.name).suffix.lower()
        if extension == ".zip":
-            expanded_files.extend(_extract_zip_entries(uploaded_file))
+            extraction = _extract_zip_entries(uploaded_file)
+            expanded_files.extend(extraction["files"])
+            warnings.extend(extraction["warnings"])
            continue
        if extension == ".7z":
-            expanded_files.extend(_extract_7z_entries(uploaded_file))
+            extraction = _extract_7z_entries(uploaded_file)
+            expanded_files.extend(extraction["files"])
+            warnings.extend(extraction["warnings"])
            continue
        expanded_files.append(
            {
@@ -230,13 +245,14 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
                "uploaded_file": uploaded_file,
            }
        )
-    return expanded_files
+    return {"files": expanded_files, "warnings": warnings}


-def _extract_zip_entries(uploaded_file) -> list[dict]:
+def _extract_zip_entries(uploaded_file) -> dict:
    archive_bytes = uploaded_file.read()
    uploaded_file.seek(0)
    entries = []
+    warnings = []
    with ZipFile(BytesIO(archive_bytes)) as archive:
        for info in archive.infolist():
            if info.is_dir():
@@ -244,6 +260,7 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
            relative_path = info.filename.replace("\\", "/")
            extension = Path(relative_path).suffix.lower()
            if extension not in {".txt", ".md", ".pdf", ".docx"}:
+                warnings.append(f"跳过不支持的文件：{relative_path}")
                continue
            file_data = archive.read(info.filename)
            extracted_file = SimpleUploadedFile(
@@ -256,10 +273,10 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
                    "uploaded_file": extracted_file,
                }
            )
-    return entries
+    return {"files": entries, "warnings": warnings}


-def _extract_7z_entries(uploaded_file) -> list[dict]:
+def _extract_7z_entries(uploaded_file) -> dict:
    try:
        import py7zr
    except ImportError as exc:
@@ -268,6 +285,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
    archive_bytes = uploaded_file.read()
    uploaded_file.seek(0)
    entries = []
+    warnings = []
    with tempfile.TemporaryDirectory() as temp_dir:
        with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
            archive.extractall(path=temp_dir)
@@ -278,6 +296,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
            relative_path = file_path.relative_to(base_path).as_posix()
            extension = Path(relative_path).suffix.lower()
            if extension not in {".txt", ".md", ".pdf", ".docx"}:
+                warnings.append(f"跳过不支持的文件：{relative_path}")
                continue
            extracted_file = SimpleUploadedFile(
                file_path.name,
@@ -289,7 +308,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
                    "uploaded_file": extracted_file,
                }
            )
-    return entries
+    return {"files": entries, "warnings": warnings}


 def _detect_document_role(file_name: str) -> str: