106 lines
4.0 KiB
Python
106 lines
4.0 KiB
Python
from django.contrib import messages
|
|
from django.db.models import Q
|
|
from django.shortcuts import get_object_or_404, redirect, render
|
|
from django.views.decorators.http import require_POST
|
|
|
|
from apps.scenarios.services import list_scenarios
|
|
|
|
from .forms import DocumentUploadForm
|
|
from .models import SubmissionBatch, UploadedDocument
|
|
from .services import (
|
|
build_batch_rows,
|
|
build_exception_items,
|
|
import_submission_batch,
|
|
index_document,
|
|
)
|
|
|
|
|
|
def document_list(request):
|
|
# 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。
|
|
keyword = (request.GET.get("keyword") or "").strip()
|
|
batches = SubmissionBatch.objects.all()
|
|
if keyword:
|
|
batches = batches.filter(
|
|
Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword)
|
|
)
|
|
batches = list(batches)
|
|
documents = list(UploadedDocument.objects.all())
|
|
status_counts = {
|
|
"pending": sum(
|
|
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING
|
|
),
|
|
"completed": sum(
|
|
1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED
|
|
),
|
|
"review_required": sum(
|
|
1
|
|
for batch in batches
|
|
if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED
|
|
),
|
|
"total": len(batches),
|
|
}
|
|
processing_pipeline = [
|
|
{"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"},
|
|
{"title": "文本与表格抽取", "detail": "按 PDF / DOCX / MD / TXT 使用不同解析策略。"},
|
|
{"title": "页数统计与可信度评估", "detail": "对 Word 页数采用估算与可信度标记。"},
|
|
{"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"},
|
|
{"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"},
|
|
]
|
|
return render(
|
|
request,
|
|
"documents/document_list.html",
|
|
{
|
|
"documents": documents,
|
|
"batches": batches,
|
|
"batch_rows": build_batch_rows(batches),
|
|
"keyword": keyword,
|
|
"status_counts": status_counts,
|
|
"processing_pipeline": processing_pipeline,
|
|
"exception_items": build_exception_items(batches, documents),
|
|
},
|
|
)
|
|
|
|
|
|
def upload(request):
|
|
# 上传成功后直接创建资料包并绑定主会话。
|
|
if request.method == "POST":
|
|
form = DocumentUploadForm(request.POST, request.FILES)
|
|
if form.is_valid():
|
|
result = import_submission_batch(
|
|
form.cleaned_data["scenario_id"],
|
|
form.cleaned_data["uploaded_files"],
|
|
)
|
|
messages.success(
|
|
request,
|
|
f"资料包已导入,已绑定会话 {result['conversation_id']}。",
|
|
)
|
|
return redirect("documents:list")
|
|
else:
|
|
form = DocumentUploadForm()
|
|
return render(
|
|
request,
|
|
"documents/upload.html",
|
|
{
|
|
"form": form,
|
|
"scenarios": list_scenarios(),
|
|
"upload_checks": [
|
|
"文件格式支持 PDF、DOCX、MD、TXT、ZIP、7Z 与 RAR 资料包",
|
|
"业务资料与法规依据资料需分开归属",
|
|
"支持一次上传多份文件并归并到同一个资料包",
|
|
"目录类文件会优先参与完整性校验",
|
|
"上传完成后建议立即进入解析与入库流程",
|
|
],
|
|
},
|
|
)
|
|
|
|
|
|
@require_POST
|
|
def index(request, document_id: int):
|
|
document = get_object_or_404(UploadedDocument, pk=document_id)
|
|
document = index_document(document)
|
|
if document.status == UploadedDocument.STATUS_INDEXED:
|
|
messages.success(request, "文档入库成功,当前文档已可参与检索。")
|
|
else:
|
|
messages.error(request, "文档入库失败,请检查错误原因后重试。")
|
|
return redirect("documents:list")
|