diff --git a/apps/documents/services.py b/apps/documents/services.py index 1dd03ba..ee87e9b 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -247,6 +247,50 @@ def build_batch_rows(batches) -> list[dict]: return rows +def build_document_list_context(*, keyword: str = "") -> dict: + """ + 组装资料包列表页所需的筛选结果与展示上下文。 + + View 只负责读取 query params,批次搜索、统计和异常聚合统一放到服务层。 + """ + batches = SubmissionBatch.objects.all() + if keyword: + batches = batches.filter(product_name__icontains=keyword) | batches.filter( + batch_id__icontains=keyword + ) + batches = list(batches) + documents = list(UploadedDocument.objects.all()) + status_counts = { + "pending": sum( + 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING + ), + "completed": sum( + 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED + ), + "review_required": sum( + 1 + for batch in batches + if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED + ), + "total": len(batches), + } + return { + "documents": documents, + "batches": batches, + "batch_rows": build_batch_rows(batches), + "keyword": keyword, + "status_counts": status_counts, + "processing_pipeline": [ + {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"}, + {"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"}, + {"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"}, + {"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"}, + {"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"}, + ], + "exception_items": build_exception_items(batches, documents), + } + + def build_exception_items(batches, documents) -> list[dict]: """ 聚合资料包页需要关注的异常提示。 diff --git a/apps/documents/views.py b/apps/documents/views.py index 3d3b434..6f45a24 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -1,15 +1,13 @@ from django.contrib import messages -from django.db.models import Q from django.shortcuts import get_object_or_404, redirect, render from django.views.decorators.http import require_POST from apps.scenarios.services import list_scenarios from .forms import DocumentUploadForm -from .models import SubmissionBatch, UploadedDocument +from .models import UploadedDocument from .services import ( - build_batch_rows, - build_exception_items, + build_document_list_context, import_submission_batch, index_document, ) @@ -17,48 +15,8 @@ from .services import ( def document_list(request): # 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。 - keyword = (request.GET.get("keyword") or "").strip() - batches = SubmissionBatch.objects.all() - if keyword: - batches = batches.filter( - Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword) - ) - batches = list(batches) - documents = list(UploadedDocument.objects.all()) - status_counts = { - "pending": sum( - 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING - ), - "completed": sum( - 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED - ), - "review_required": sum( - 1 - for batch in batches - if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED - ), - "total": len(batches), - } - processing_pipeline = [ - {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"}, - {"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"}, - {"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"}, - {"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"}, - {"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"}, - ] - return render( - request, - "documents/document_list.html", - { - "documents": documents, - "batches": batches, - "batch_rows": build_batch_rows(batches), - "keyword": keyword, - "status_counts": status_counts, - "processing_pipeline": processing_pipeline, - "exception_items": build_exception_items(batches, documents), - }, - ) + context = build_document_list_context(keyword=(request.GET.get("keyword") or "").strip()) + return render(request, "documents/document_list.html", context) def upload(request): diff --git a/tests/test_documents.py b/tests/test_documents.py index f2fb3b6..ef0ea94 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -9,7 +9,12 @@ from zipfile import ZipFile from apps.documents.forms import DocumentUploadForm from apps.documents.models import ExportedDocument, SubmissionBatch, UploadedDocument -from apps.documents.services import extract_text, import_submission_batch, index_document +from apps.documents.services import ( + build_document_list_context, + extract_text, + import_submission_batch, + index_document, +) from apps.chat.models import Conversation @@ -246,6 +251,34 @@ def test_document_list_supports_batch_id_search(client, db): assert "SUB-20260604-001" not in content +def test_build_document_list_context_filters_batches_by_keyword(db): + target_batch = SubmissionBatch.objects.create( + batch_id="SUB-20260604-101", + product_name="产品A", + workflow_type="registration", + conversation_id="conv-101", + file_count=2, + page_count=12, + import_status="completed", + ) + SubmissionBatch.objects.create( + batch_id="SUB-20260604-102", + product_name="产品B", + workflow_type="registration", + conversation_id="conv-102", + file_count=3, + page_count=20, + import_status="review_required", + ) + + context = build_document_list_context(keyword="产品A") + + assert context["keyword"] == "产品A" + assert len(context["batches"]) == 1 + assert context["batches"][0].id == target_batch.id + assert context["status_counts"]["total"] == 1 + + def test_import_submission_batch_marks_manual_review_when_product_names_conflict(db): files = [ SimpleUploadedFile(