feat: 增强资料包压缩导入异常提示
This commit is contained in:
@@ -57,8 +57,11 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
|||||||
candidates = []
|
candidates = []
|
||||||
chapter_summary = {}
|
chapter_summary = {}
|
||||||
total_pages = 0
|
total_pages = 0
|
||||||
|
warnings = []
|
||||||
|
|
||||||
expanded_files = _expand_uploaded_files(uploaded_files)
|
expanded_result = _expand_uploaded_files(uploaded_files)
|
||||||
|
expanded_files = expanded_result["files"]
|
||||||
|
warnings.extend(expanded_result["warnings"])
|
||||||
for uploaded_item in expanded_files:
|
for uploaded_item in expanded_files:
|
||||||
uploaded_file = uploaded_item["uploaded_file"]
|
uploaded_file = uploaded_item["uploaded_file"]
|
||||||
relative_path = uploaded_item["relative_path"]
|
relative_path = uploaded_item["relative_path"]
|
||||||
@@ -93,9 +96,13 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
|||||||
chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
|
chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
|
||||||
candidates.extend(_extract_product_candidates(document.relative_path, text))
|
candidates.extend(_extract_product_candidates(document.relative_path, text))
|
||||||
|
|
||||||
product_name, warnings = _select_product_name(candidates)
|
product_name, product_warnings = _select_product_name(candidates)
|
||||||
|
warnings.extend(product_warnings)
|
||||||
conversation = create_conversation_for_batch(batch.batch_id, product_name)
|
conversation = create_conversation_for_batch(batch.batch_id, product_name)
|
||||||
|
|
||||||
|
if not documents:
|
||||||
|
warnings.append("未发现可导入的支持文件,请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
|
||||||
|
|
||||||
batch.product_name = product_name
|
batch.product_name = product_name
|
||||||
batch.conversation_id = conversation.conversation_id
|
batch.conversation_id = conversation.conversation_id
|
||||||
batch.file_count = len(documents)
|
batch.file_count = len(documents)
|
||||||
@@ -105,9 +112,12 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
|||||||
for chapter_code, count in sorted(chapter_summary.items())
|
for chapter_code, count in sorted(chapter_summary.items())
|
||||||
]
|
]
|
||||||
batch.exception_count = len(warnings)
|
batch.exception_count = len(warnings)
|
||||||
batch.import_status = (
|
if not documents:
|
||||||
SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED
|
batch.import_status = SubmissionBatch.STATUS_FAILED
|
||||||
)
|
elif warnings:
|
||||||
|
batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
|
||||||
|
else:
|
||||||
|
batch.import_status = SubmissionBatch.STATUS_COMPLETED
|
||||||
batch.save(
|
batch.save(
|
||||||
update_fields=[
|
update_fields=[
|
||||||
"product_name",
|
"product_name",
|
||||||
@@ -216,13 +226,18 @@ def _estimate_page_count(text: str) -> int:
|
|||||||
|
|
||||||
def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
||||||
expanded_files = []
|
expanded_files = []
|
||||||
|
warnings = []
|
||||||
for uploaded_file in uploaded_files:
|
for uploaded_file in uploaded_files:
|
||||||
extension = Path(uploaded_file.name).suffix.lower()
|
extension = Path(uploaded_file.name).suffix.lower()
|
||||||
if extension == ".zip":
|
if extension == ".zip":
|
||||||
expanded_files.extend(_extract_zip_entries(uploaded_file))
|
extraction = _extract_zip_entries(uploaded_file)
|
||||||
|
expanded_files.extend(extraction["files"])
|
||||||
|
warnings.extend(extraction["warnings"])
|
||||||
continue
|
continue
|
||||||
if extension == ".7z":
|
if extension == ".7z":
|
||||||
expanded_files.extend(_extract_7z_entries(uploaded_file))
|
extraction = _extract_7z_entries(uploaded_file)
|
||||||
|
expanded_files.extend(extraction["files"])
|
||||||
|
warnings.extend(extraction["warnings"])
|
||||||
continue
|
continue
|
||||||
expanded_files.append(
|
expanded_files.append(
|
||||||
{
|
{
|
||||||
@@ -230,13 +245,14 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
|||||||
"uploaded_file": uploaded_file,
|
"uploaded_file": uploaded_file,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return expanded_files
|
return {"files": expanded_files, "warnings": warnings}
|
||||||
|
|
||||||
|
|
||||||
def _extract_zip_entries(uploaded_file) -> list[dict]:
|
def _extract_zip_entries(uploaded_file) -> dict:
|
||||||
archive_bytes = uploaded_file.read()
|
archive_bytes = uploaded_file.read()
|
||||||
uploaded_file.seek(0)
|
uploaded_file.seek(0)
|
||||||
entries = []
|
entries = []
|
||||||
|
warnings = []
|
||||||
with ZipFile(BytesIO(archive_bytes)) as archive:
|
with ZipFile(BytesIO(archive_bytes)) as archive:
|
||||||
for info in archive.infolist():
|
for info in archive.infolist():
|
||||||
if info.is_dir():
|
if info.is_dir():
|
||||||
@@ -244,6 +260,7 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
|
|||||||
relative_path = info.filename.replace("\\", "/")
|
relative_path = info.filename.replace("\\", "/")
|
||||||
extension = Path(relative_path).suffix.lower()
|
extension = Path(relative_path).suffix.lower()
|
||||||
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
||||||
|
warnings.append(f"跳过不支持的文件:{relative_path}")
|
||||||
continue
|
continue
|
||||||
file_data = archive.read(info.filename)
|
file_data = archive.read(info.filename)
|
||||||
extracted_file = SimpleUploadedFile(
|
extracted_file = SimpleUploadedFile(
|
||||||
@@ -256,10 +273,10 @@ def _extract_zip_entries(uploaded_file) -> list[dict]:
|
|||||||
"uploaded_file": extracted_file,
|
"uploaded_file": extracted_file,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return entries
|
return {"files": entries, "warnings": warnings}
|
||||||
|
|
||||||
|
|
||||||
def _extract_7z_entries(uploaded_file) -> list[dict]:
|
def _extract_7z_entries(uploaded_file) -> dict:
|
||||||
try:
|
try:
|
||||||
import py7zr
|
import py7zr
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
@@ -268,6 +285,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
|
|||||||
archive_bytes = uploaded_file.read()
|
archive_bytes = uploaded_file.read()
|
||||||
uploaded_file.seek(0)
|
uploaded_file.seek(0)
|
||||||
entries = []
|
entries = []
|
||||||
|
warnings = []
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
|
with py7zr.SevenZipFile(BytesIO(archive_bytes), mode="r") as archive:
|
||||||
archive.extractall(path=temp_dir)
|
archive.extractall(path=temp_dir)
|
||||||
@@ -278,6 +296,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
|
|||||||
relative_path = file_path.relative_to(base_path).as_posix()
|
relative_path = file_path.relative_to(base_path).as_posix()
|
||||||
extension = Path(relative_path).suffix.lower()
|
extension = Path(relative_path).suffix.lower()
|
||||||
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
if extension not in {".txt", ".md", ".pdf", ".docx"}:
|
||||||
|
warnings.append(f"跳过不支持的文件:{relative_path}")
|
||||||
continue
|
continue
|
||||||
extracted_file = SimpleUploadedFile(
|
extracted_file = SimpleUploadedFile(
|
||||||
file_path.name,
|
file_path.name,
|
||||||
@@ -289,7 +308,7 @@ def _extract_7z_entries(uploaded_file) -> list[dict]:
|
|||||||
"uploaded_file": extracted_file,
|
"uploaded_file": extracted_file,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return entries
|
return {"files": entries, "warnings": warnings}
|
||||||
|
|
||||||
|
|
||||||
def _detect_document_role(file_name: str) -> str:
|
def _detect_document_role(file_name: str) -> str:
|
||||||
|
|||||||
@@ -334,3 +334,81 @@ def test_import_submission_batch_supports_7z_package_and_preserves_relative_path
|
|||||||
"CH1/注册申请表.txt",
|
"CH1/注册申请表.txt",
|
||||||
"CH1/目标产品说明书.txt",
|
"CH1/目标产品说明书.txt",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_submission_batch_records_warnings_for_unsupported_zip_entries(db):
|
||||||
|
archive = BytesIO()
|
||||||
|
with ZipFile(archive, "w") as zip_file:
|
||||||
|
zip_file.writestr("CH1/注册申请表.txt", "产品名称:产品A")
|
||||||
|
zip_file.writestr("CH1/忽略图片.png", b"binary-image-data")
|
||||||
|
archive.seek(0)
|
||||||
|
package = SimpleUploadedFile(
|
||||||
|
"registration-package.zip",
|
||||||
|
archive.read(),
|
||||||
|
content_type="application/zip",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = import_submission_batch("document_review", [package])
|
||||||
|
|
||||||
|
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
|
||||||
|
warnings = result["registration_overview_report"]["warnings"]
|
||||||
|
assert batch.file_count == 1
|
||||||
|
assert batch.exception_count == 1
|
||||||
|
assert "跳过不支持的文件" in warnings[0]
|
||||||
|
assert "CH1/忽略图片.png" in warnings[0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_submission_batch_marks_failed_when_zip_has_no_supported_files(db):
|
||||||
|
archive = BytesIO()
|
||||||
|
with ZipFile(archive, "w") as zip_file:
|
||||||
|
zip_file.writestr("assets/readme.png", b"binary-image-data")
|
||||||
|
archive.seek(0)
|
||||||
|
package = SimpleUploadedFile(
|
||||||
|
"empty-registration-package.zip",
|
||||||
|
archive.read(),
|
||||||
|
content_type="application/zip",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = import_submission_batch("document_review", [package])
|
||||||
|
|
||||||
|
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
|
||||||
|
warnings = result["registration_overview_report"]["warnings"]
|
||||||
|
assert batch.file_count == 0
|
||||||
|
assert batch.import_status == SubmissionBatch.STATUS_FAILED
|
||||||
|
assert batch.exception_count == len(warnings)
|
||||||
|
assert any("未发现可导入的支持文件" in warning for warning in warnings)
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_submission_batch_records_warnings_for_unsupported_7z_entries(db, monkeypatch):
|
||||||
|
package = SimpleUploadedFile(
|
||||||
|
"registration-package.7z",
|
||||||
|
b"fake-7z-bytes",
|
||||||
|
content_type="application/x-7z-compressed",
|
||||||
|
)
|
||||||
|
|
||||||
|
class FakeSevenZipFile:
|
||||||
|
def __init__(self, _file_obj, mode="r"):
|
||||||
|
self.mode = mode
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def extractall(self, path):
|
||||||
|
target = Path(path)
|
||||||
|
(target / "CH1").mkdir(parents=True, exist_ok=True)
|
||||||
|
(target / "CH1" / "注册申请表.txt").write_text("产品名称:产品A", encoding="utf-8")
|
||||||
|
(target / "CH1" / "忽略图片.png").write_bytes(b"binary-image-data")
|
||||||
|
|
||||||
|
fake_module = types.SimpleNamespace(SevenZipFile=FakeSevenZipFile)
|
||||||
|
monkeypatch.setitem(sys.modules, "py7zr", fake_module)
|
||||||
|
|
||||||
|
result = import_submission_batch("document_review", [package])
|
||||||
|
|
||||||
|
batch = SubmissionBatch.objects.get(batch_id=result["batch_id"])
|
||||||
|
warnings = result["registration_overview_report"]["warnings"]
|
||||||
|
assert batch.file_count == 1
|
||||||
|
assert batch.exception_count == 1
|
||||||
|
assert any("CH1/忽略图片.png" in warning for warning in warnings)
|
||||||
|
|||||||
Reference in New Issue
Block a user