fix(knowledge-base): 停用文档时同步清理索引
This commit is contained in:
@@ -153,6 +153,7 @@ def create_document_from_upload(
|
||||
|
||||
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
||||
update_fields = []
|
||||
active_changed = False
|
||||
if "display_name" in payload:
|
||||
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
||||
update_fields.append("display_name")
|
||||
@@ -160,12 +161,21 @@ def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) ->
|
||||
document.description = str(payload.get("description") or "").strip()
|
||||
update_fields.append("description")
|
||||
if "is_active" in payload:
|
||||
document.is_active = bool(payload.get("is_active"))
|
||||
document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||
next_is_active = bool(payload.get("is_active"))
|
||||
active_changed = document.is_active != next_is_active
|
||||
document.is_active = next_is_active
|
||||
document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||
update_fields.extend(["is_active", "status"])
|
||||
if not next_is_active:
|
||||
remove_managed_document_from_index(document)
|
||||
document.indexed_chunk_count = 0
|
||||
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||
update_fields.extend(["indexed_chunk_count", "metadata"])
|
||||
if update_fields:
|
||||
update_fields.append("updated_at")
|
||||
document.save(update_fields=update_fields)
|
||||
if active_changed and document.is_active:
|
||||
index_managed_document(document)
|
||||
return document
|
||||
|
||||
|
||||
@@ -198,6 +208,12 @@ def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
|
||||
|
||||
|
||||
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||
if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active:
|
||||
remove_managed_document_from_index(document)
|
||||
document.indexed_chunk_count = 0
|
||||
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||
return 0
|
||||
path = Path(document.storage_path)
|
||||
if not path.is_absolute():
|
||||
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
||||
|
||||
@@ -2,7 +2,13 @@ import pytest
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.urls import reverse
|
||||
|
||||
from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base
|
||||
from review_agent.knowledge_base import (
|
||||
build_knowledge_base_context,
|
||||
delete_document,
|
||||
index_managed_document,
|
||||
search_knowledge_base,
|
||||
update_document,
|
||||
)
|
||||
from review_agent.views import rebuild_knowledge_base_index
|
||||
from review_agent.models import KnowledgeBaseDocument
|
||||
|
||||
@@ -232,6 +238,67 @@ def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_u
|
||||
assert deleted_filters == [{"document_id": document.pk}]
|
||||
|
||||
|
||||
def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
document = KnowledgeBaseDocument.objects.create(
|
||||
user=user,
|
||||
display_name="孙之烨简历",
|
||||
original_name="孙之烨-260510.pdf",
|
||||
storage_path="knowledge_base/resume.pdf",
|
||||
file_size=1,
|
||||
status=KnowledgeBaseDocument.Status.ACTIVE,
|
||||
is_active=True,
|
||||
indexed_chunk_count=7,
|
||||
metadata={"index_status": "indexed", "index_error": ""},
|
||||
)
|
||||
deleted_filters = []
|
||||
|
||||
class FakeCollection:
|
||||
def delete(self, where):
|
||||
deleted_filters.append(where)
|
||||
|
||||
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||
|
||||
update_document(document, {"is_active": False})
|
||||
|
||||
document.refresh_from_db()
|
||||
assert document.status == KnowledgeBaseDocument.Status.DISABLED
|
||||
assert document.is_active is False
|
||||
assert document.indexed_chunk_count == 0
|
||||
assert document.metadata["index_status"] == "disabled"
|
||||
assert deleted_filters == [{"document_id": document.pk}]
|
||||
|
||||
|
||||
def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
document = KnowledgeBaseDocument.objects.create(
|
||||
user=user,
|
||||
display_name="孙之烨简历",
|
||||
original_name="孙之烨-260510.pdf",
|
||||
storage_path="knowledge_base/resume.pdf",
|
||||
file_size=1,
|
||||
status=KnowledgeBaseDocument.Status.DISABLED,
|
||||
is_active=False,
|
||||
indexed_chunk_count=7,
|
||||
metadata={"index_status": "indexed", "index_error": ""},
|
||||
)
|
||||
deleted_filters = []
|
||||
|
||||
class FakeCollection:
|
||||
def delete(self, where):
|
||||
deleted_filters.append(where)
|
||||
|
||||
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||
|
||||
chunk_count = index_managed_document(document)
|
||||
|
||||
document.refresh_from_db()
|
||||
assert chunk_count == 0
|
||||
assert document.indexed_chunk_count == 0
|
||||
assert document.metadata["index_status"] == "disabled"
|
||||
assert deleted_filters == [{"document_id": document.pk}]
|
||||
|
||||
|
||||
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
|
||||
Reference in New Issue
Block a user