refactor: 下沉资料包列表上下文到 documents 服务层

This commit is contained in:
2026-06-04 04:36:41 +08:00
parent a49524fd93
commit 80dc10ce6d
3 changed files with 82 additions and 47 deletions

View File

@@ -247,6 +247,50 @@ def build_batch_rows(batches) -> list[dict]:
return rows return rows
def build_document_list_context(*, keyword: str = "") -> dict:
"""
组装资料包列表页所需的筛选结果与展示上下文。
View 只负责读取 query params批次搜索、统计和异常聚合统一放到服务层。
"""
batches = SubmissionBatch.objects.all()
if keyword:
batches = batches.filter(product_name__icontains=keyword) | batches.filter(
batch_id__icontains=keyword
)
batches = list(batches)
documents = list(UploadedDocument.objects.all())
status_counts = {
"pending": sum(
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING
),
"completed": sum(
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED
),
"review_required": sum(
1
for batch in batches
if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED
),
"total": len(batches),
}
return {
"documents": documents,
"batches": batches,
"batch_rows": build_batch_rows(batches),
"keyword": keyword,
"status_counts": status_counts,
"processing_pipeline": [
{"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"},
{"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"},
{"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"},
{"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"},
{"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"},
],
"exception_items": build_exception_items(batches, documents),
}
def build_exception_items(batches, documents) -> list[dict]: def build_exception_items(batches, documents) -> list[dict]:
""" """
聚合资料包页需要关注的异常提示。 聚合资料包页需要关注的异常提示。

View File

@@ -1,15 +1,13 @@
from django.contrib import messages from django.contrib import messages
from django.db.models import Q
from django.shortcuts import get_object_or_404, redirect, render from django.shortcuts import get_object_or_404, redirect, render
from django.views.decorators.http import require_POST from django.views.decorators.http import require_POST
from apps.scenarios.services import list_scenarios from apps.scenarios.services import list_scenarios
from .forms import DocumentUploadForm from .forms import DocumentUploadForm
from .models import SubmissionBatch, UploadedDocument from .models import UploadedDocument
from .services import ( from .services import (
build_batch_rows, build_document_list_context,
build_exception_items,
import_submission_batch, import_submission_batch,
index_document, index_document,
) )
@@ -17,48 +15,8 @@ from .services import (
def document_list(request): def document_list(request):
# 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。 # 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。
keyword = (request.GET.get("keyword") or "").strip() context = build_document_list_context(keyword=(request.GET.get("keyword") or "").strip())
batches = SubmissionBatch.objects.all() return render(request, "documents/document_list.html", context)
if keyword:
batches = batches.filter(
Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword)
)
batches = list(batches)
documents = list(UploadedDocument.objects.all())
status_counts = {
"pending": sum(
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING
),
"completed": sum(
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED
),
"review_required": sum(
1
for batch in batches
if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED
),
"total": len(batches),
}
processing_pipeline = [
{"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"},
{"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"},
{"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"},
{"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"},
{"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"},
]
return render(
request,
"documents/document_list.html",
{
"documents": documents,
"batches": batches,
"batch_rows": build_batch_rows(batches),
"keyword": keyword,
"status_counts": status_counts,
"processing_pipeline": processing_pipeline,
"exception_items": build_exception_items(batches, documents),
},
)
def upload(request): def upload(request):

View File

@@ -9,7 +9,12 @@ from zipfile import ZipFile
from apps.documents.forms import DocumentUploadForm from apps.documents.forms import DocumentUploadForm
from apps.documents.models import ExportedDocument, SubmissionBatch, UploadedDocument from apps.documents.models import ExportedDocument, SubmissionBatch, UploadedDocument
from apps.documents.services import extract_text, import_submission_batch, index_document from apps.documents.services import (
build_document_list_context,
extract_text,
import_submission_batch,
index_document,
)
from apps.chat.models import Conversation from apps.chat.models import Conversation
@@ -246,6 +251,34 @@ def test_document_list_supports_batch_id_search(client, db):
assert "SUB-20260604-001" not in content assert "SUB-20260604-001" not in content
def test_build_document_list_context_filters_batches_by_keyword(db):
target_batch = SubmissionBatch.objects.create(
batch_id="SUB-20260604-101",
product_name="产品A",
workflow_type="registration",
conversation_id="conv-101",
file_count=2,
page_count=12,
import_status="completed",
)
SubmissionBatch.objects.create(
batch_id="SUB-20260604-102",
product_name="产品B",
workflow_type="registration",
conversation_id="conv-102",
file_count=3,
page_count=20,
import_status="review_required",
)
context = build_document_list_context(keyword="产品A")
assert context["keyword"] == "产品A"
assert len(context["batches"]) == 1
assert context["batches"][0].id == target_batch.id
assert context["status_counts"]["total"] == 1
def test_import_submission_batch_marks_manual_review_when_product_names_conflict(db): def test_import_submission_batch_marks_manual_review_when_product_names_conflict(db):
files = [ files = [
SimpleUploadedFile( SimpleUploadedFile(