from django.contrib import messages from django.db.models import Q from django.shortcuts import get_object_or_404, redirect, render from django.views.decorators.http import require_POST from apps.scenarios.services import list_scenarios from .forms import DocumentUploadForm from .models import SubmissionBatch, UploadedDocument from .services import ( build_batch_rows, build_exception_items, import_submission_batch, index_document, ) def document_list(request): # 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。 keyword = (request.GET.get("keyword") or "").strip() batches = SubmissionBatch.objects.all() if keyword: batches = batches.filter( Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword) ) batches = list(batches) documents = list(UploadedDocument.objects.all()) status_counts = { "pending": sum( 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING ), "completed": sum( 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED ), "review_required": sum( 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED ), "total": len(batches), } processing_pipeline = [ {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"}, {"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"}, {"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"}, {"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"}, {"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"}, ] return render( request, "documents/document_list.html", { "documents": documents, "batches": batches, "batch_rows": build_batch_rows(batches), "keyword": keyword, "status_counts": status_counts, "processing_pipeline": processing_pipeline, "exception_items": build_exception_items(batches, documents), }, ) def upload(request): # 上传成功后直接创建资料包并绑定主会话。 if request.method == "POST": form = DocumentUploadForm(request.POST, request.FILES) if form.is_valid(): result = import_submission_batch( form.cleaned_data["scenario_id"], form.cleaned_data["uploaded_files"], ) messages.success( request, f"资料包已导入,已绑定会话 {result['conversation_id']}。", ) return redirect("documents:list") else: form = DocumentUploadForm() return render( request, "documents/upload.html", { "form": form, "scenarios": list_scenarios(), "upload_checks": [ "文件格式支持 PDF、DOCX、MD、TXT、ZIP、7Z 与 RAR 资料包", "业务资料与法规依据资料需分开归属", "支持一次上传多份文件并归并到同一个资料包", "目录类文件会优先参与完整性校验", "上传完成后建议立即进入解析与入库流程", ], }, ) @require_POST def index(request, document_id: int): document = get_object_or_404(UploadedDocument, pk=document_id) document = index_document(document) if document.status == UploadedDocument.STATUS_INDEXED: messages.success(request, "文档入库成功,当前文档已可参与检索。") else: messages.error(request, "文档入库失败,请检查错误原因后重试。") return redirect("documents:list")