fix(knowledge-base): 停用文档时同步清理索引

This commit is contained in:
2026-06-09 08:22:57 +08:00
parent 18548eb78f
commit 42187bf8e9
2 changed files with 86 additions and 3 deletions

View File

@@ -153,6 +153,7 @@ def create_document_from_upload(
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
update_fields = []
active_changed = False
if "display_name" in payload:
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
update_fields.append("display_name")
@@ -160,12 +161,21 @@ def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) ->
document.description = str(payload.get("description") or "").strip()
update_fields.append("description")
if "is_active" in payload:
document.is_active = bool(payload.get("is_active"))
document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED
next_is_active = bool(payload.get("is_active"))
active_changed = document.is_active != next_is_active
document.is_active = next_is_active
document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED
update_fields.extend(["is_active", "status"])
if not next_is_active:
remove_managed_document_from_index(document)
document.indexed_chunk_count = 0
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
update_fields.extend(["indexed_chunk_count", "metadata"])
if update_fields:
update_fields.append("updated_at")
document.save(update_fields=update_fields)
if active_changed and document.is_active:
index_managed_document(document)
return document
@@ -198,6 +208,12 @@ def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
def index_managed_document(document: KnowledgeBaseDocument) -> int:
if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active:
remove_managed_document_from_index(document)
document.indexed_chunk_count = 0
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
return 0
path = Path(document.storage_path)
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / document.storage_path

View File

@@ -2,7 +2,13 @@ import pytest
from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base
from review_agent.knowledge_base import (
build_knowledge_base_context,
delete_document,
index_managed_document,
search_knowledge_base,
update_document,
)
from review_agent.views import rebuild_knowledge_base_index
from review_agent.models import KnowledgeBaseDocument
@@ -232,6 +238,67 @@ def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_u
assert deleted_filters == [{"document_id": document.pk}]
def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
document = KnowledgeBaseDocument.objects.create(
user=user,
display_name="孙之烨简历",
original_name="孙之烨-260510.pdf",
storage_path="knowledge_base/resume.pdf",
file_size=1,
status=KnowledgeBaseDocument.Status.ACTIVE,
is_active=True,
indexed_chunk_count=7,
metadata={"index_status": "indexed", "index_error": ""},
)
deleted_filters = []
class FakeCollection:
def delete(self, where):
deleted_filters.append(where)
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
update_document(document, {"is_active": False})
document.refresh_from_db()
assert document.status == KnowledgeBaseDocument.Status.DISABLED
assert document.is_active is False
assert document.indexed_chunk_count == 0
assert document.metadata["index_status"] == "disabled"
assert deleted_filters == [{"document_id": document.pk}]
def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
document = KnowledgeBaseDocument.objects.create(
user=user,
display_name="孙之烨简历",
original_name="孙之烨-260510.pdf",
storage_path="knowledge_base/resume.pdf",
file_size=1,
status=KnowledgeBaseDocument.Status.DISABLED,
is_active=False,
indexed_chunk_count=7,
metadata={"index_status": "indexed", "index_error": ""},
)
deleted_filters = []
class FakeCollection:
def delete(self, where):
deleted_filters.append(where)
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
chunk_count = index_managed_document(document)
document.refresh_from_db()
assert chunk_count == 0
assert document.indexed_chunk_count == 0
assert document.metadata["index_status"] == "disabled"
assert deleted_filters == [{"document_id": document.pk}]
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")