feat: 支持会话内补传资料并保持绑定
This commit is contained in:
@@ -53,51 +53,14 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
||||
workflow_type="registration",
|
||||
import_status=SubmissionBatch.STATUS_PROCESSING,
|
||||
)
|
||||
documents = []
|
||||
candidates = []
|
||||
chapter_summary = {}
|
||||
total_pages = 0
|
||||
warnings = []
|
||||
|
||||
expanded_result = _expand_uploaded_files(uploaded_files)
|
||||
expanded_files = expanded_result["files"]
|
||||
warnings.extend(expanded_result["warnings"])
|
||||
for uploaded_item in expanded_files:
|
||||
uploaded_file = uploaded_item["uploaded_file"]
|
||||
relative_path = uploaded_item["relative_path"]
|
||||
document = create_uploaded_document(
|
||||
scenario_id,
|
||||
uploaded_file,
|
||||
batch=batch,
|
||||
relative_path=relative_path,
|
||||
)
|
||||
text = extract_text(document)
|
||||
page_count = _estimate_page_count(text)
|
||||
document.page_count = page_count
|
||||
document.page_count_confidence = "estimated"
|
||||
document.document_role = _detect_document_role(document.relative_path)
|
||||
document.chapter_code = _detect_chapter_code(document.relative_path, text)
|
||||
document.chapter_match_status = "matched" if document.chapter_code else "unknown"
|
||||
document.needs_manual_review = not bool(document.chapter_code)
|
||||
document.save(
|
||||
update_fields=[
|
||||
"page_count",
|
||||
"page_count_confidence",
|
||||
"document_role",
|
||||
"chapter_code",
|
||||
"chapter_match_status",
|
||||
"needs_manual_review",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
documents.append(document)
|
||||
total_pages += page_count
|
||||
chapter_key = document.chapter_code or "UNCLASSIFIED"
|
||||
chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
|
||||
candidates.extend(_extract_product_candidates(document.relative_path, text))
|
||||
|
||||
product_name, product_warnings = _select_product_name(candidates)
|
||||
warnings.extend(product_warnings)
|
||||
ingest_result = _ingest_files_into_batch(
|
||||
batch=batch,
|
||||
scenario_id=scenario_id,
|
||||
uploaded_files=uploaded_files,
|
||||
)
|
||||
documents = ingest_result["documents"]
|
||||
warnings = ingest_result["warnings"]
|
||||
product_name = ingest_result["product_name"]
|
||||
conversation = create_conversation_for_batch(batch.batch_id, product_name)
|
||||
|
||||
if not documents:
|
||||
@@ -106,11 +69,8 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
||||
batch.product_name = product_name
|
||||
batch.conversation_id = conversation.conversation_id
|
||||
batch.file_count = len(documents)
|
||||
batch.page_count = total_pages
|
||||
batch.chapter_summary = [
|
||||
{"chapter_code": chapter_code, "document_count": count}
|
||||
for chapter_code, count in sorted(chapter_summary.items())
|
||||
]
|
||||
batch.page_count = ingest_result["page_count"]
|
||||
batch.chapter_summary = ingest_result["chapter_summary"]
|
||||
batch.exception_count = len(warnings)
|
||||
if not documents:
|
||||
batch.import_status = SubmissionBatch.STATUS_FAILED
|
||||
@@ -155,6 +115,89 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def append_documents_to_batch(
|
||||
scenario_id: str,
|
||||
batch: SubmissionBatch,
|
||||
uploaded_files: list,
|
||||
) -> dict:
|
||||
"""
|
||||
在既有资料包下继续补传文件,并保持会话绑定不变。
|
||||
|
||||
该服务只负责 Documents 侧的数据更新:
|
||||
- 新文件继续归属原 batch
|
||||
- conversation_id 不变
|
||||
- 如原产品名为空,可用新增文件补齐
|
||||
- 如新增文件产品名与原产品名冲突,则转为待复核
|
||||
"""
|
||||
ingest_result = _ingest_files_into_batch(
|
||||
batch=batch,
|
||||
scenario_id=scenario_id,
|
||||
uploaded_files=uploaded_files,
|
||||
keep_existing_product_name=True,
|
||||
)
|
||||
warnings = list(ingest_result["warnings"])
|
||||
all_documents = list(batch.documents.order_by("id"))
|
||||
|
||||
if not all_documents:
|
||||
warnings.append("未发现可导入的支持文件,请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
|
||||
batch.import_status = SubmissionBatch.STATUS_FAILED
|
||||
elif warnings:
|
||||
batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
|
||||
else:
|
||||
batch.import_status = SubmissionBatch.STATUS_COMPLETED
|
||||
|
||||
batch.product_name = ingest_result["product_name"]
|
||||
batch.file_count = len(all_documents)
|
||||
batch.page_count = ingest_result["page_count"]
|
||||
batch.chapter_summary = ingest_result["chapter_summary"]
|
||||
batch.exception_count = len(warnings)
|
||||
batch.save(
|
||||
update_fields=[
|
||||
"product_name",
|
||||
"file_count",
|
||||
"page_count",
|
||||
"chapter_summary",
|
||||
"exception_count",
|
||||
"import_status",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
|
||||
if batch.conversation_id:
|
||||
from apps.chat.models import Conversation
|
||||
|
||||
conversation = Conversation.objects.filter(conversation_id=batch.conversation_id).first()
|
||||
if conversation:
|
||||
conversation.product_name = batch.product_name
|
||||
if batch.product_name:
|
||||
conversation.title = batch.product_name
|
||||
conversation.save(update_fields=["product_name", "title", "updated_at"])
|
||||
|
||||
return {
|
||||
"batch_id": batch.batch_id,
|
||||
"conversation_id": batch.conversation_id,
|
||||
"product_name": batch.product_name,
|
||||
"registration_overview_report": {
|
||||
"batch_id": batch.batch_id,
|
||||
"product_name": batch.product_name,
|
||||
"file_count": batch.file_count,
|
||||
"total_page_count": batch.page_count,
|
||||
"chapter_summary": batch.chapter_summary,
|
||||
"documents": [
|
||||
{
|
||||
"document_id": document.id,
|
||||
"original_name": document.original_name,
|
||||
"chapter_code": document.chapter_code,
|
||||
"page_count": document.page_count,
|
||||
"document_role": document.document_role,
|
||||
}
|
||||
for document in all_documents
|
||||
],
|
||||
"warnings": warnings,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def extract_text(document: UploadedDocument) -> str:
|
||||
"""
|
||||
根据文档类型选择合适的文本抽取策略。
|
||||
@@ -248,6 +291,87 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
|
||||
return {"files": expanded_files, "warnings": warnings}
|
||||
|
||||
|
||||
def _ingest_files_into_batch(
|
||||
*,
|
||||
batch: SubmissionBatch,
|
||||
scenario_id: str,
|
||||
uploaded_files: list,
|
||||
keep_existing_product_name: bool = False,
|
||||
) -> dict:
|
||||
expanded_result = _expand_uploaded_files(uploaded_files)
|
||||
expanded_files = expanded_result["files"]
|
||||
warnings = list(expanded_result["warnings"])
|
||||
new_documents = []
|
||||
new_candidates = []
|
||||
|
||||
for uploaded_item in expanded_files:
|
||||
uploaded_file = uploaded_item["uploaded_file"]
|
||||
relative_path = uploaded_item["relative_path"]
|
||||
document = create_uploaded_document(
|
||||
scenario_id,
|
||||
uploaded_file,
|
||||
batch=batch,
|
||||
relative_path=relative_path,
|
||||
)
|
||||
text = extract_text(document)
|
||||
page_count = _estimate_page_count(text)
|
||||
document.page_count = page_count
|
||||
document.page_count_confidence = "estimated"
|
||||
document.document_role = _detect_document_role(document.relative_path)
|
||||
document.chapter_code = _detect_chapter_code(document.relative_path, text)
|
||||
document.chapter_match_status = "matched" if document.chapter_code else "unknown"
|
||||
document.needs_manual_review = not bool(document.chapter_code)
|
||||
document.save(
|
||||
update_fields=[
|
||||
"page_count",
|
||||
"page_count_confidence",
|
||||
"document_role",
|
||||
"chapter_code",
|
||||
"chapter_match_status",
|
||||
"needs_manual_review",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
new_documents.append(document)
|
||||
new_candidates.extend(_extract_product_candidates(document.relative_path, text))
|
||||
|
||||
all_documents = list(batch.documents.order_by("id"))
|
||||
chapter_summary = {}
|
||||
total_pages = 0
|
||||
for document in all_documents:
|
||||
total_pages += document.page_count
|
||||
chapter_key = document.chapter_code or "UNCLASSIFIED"
|
||||
chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
|
||||
|
||||
product_name = batch.product_name
|
||||
if keep_existing_product_name and batch.product_name:
|
||||
conflict_names = {
|
||||
item["product_name"] for item in new_candidates if item["product_name"] != batch.product_name
|
||||
}
|
||||
if conflict_names:
|
||||
warnings.append(
|
||||
"新增文件与当前资料包产品名称不一致:"
|
||||
+ " / ".join([batch.product_name, *sorted(conflict_names)])
|
||||
)
|
||||
else:
|
||||
product_name, product_warnings = _select_product_name(new_candidates)
|
||||
warnings.extend(product_warnings)
|
||||
if keep_existing_product_name and not product_name:
|
||||
product_name = batch.product_name
|
||||
|
||||
return {
|
||||
"documents": all_documents if keep_existing_product_name else new_documents,
|
||||
"new_documents": new_documents,
|
||||
"warnings": warnings,
|
||||
"product_name": product_name,
|
||||
"page_count": total_pages if keep_existing_product_name else total_pages,
|
||||
"chapter_summary": [
|
||||
{"chapter_code": chapter_code, "document_count": count}
|
||||
for chapter_code, count in sorted(chapter_summary.items())
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _extract_zip_entries(uploaded_file) -> dict:
|
||||
archive_bytes = uploaded_file.read()
|
||||
uploaded_file.seek(0)
|
||||
|
||||
Reference in New Issue
Block a user