fix(knowledge-base): 停用文档时同步清理索引
This commit is contained in:
@@ -153,6 +153,7 @@ def create_document_from_upload(
|
|||||||
|
|
||||||
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
||||||
update_fields = []
|
update_fields = []
|
||||||
|
active_changed = False
|
||||||
if "display_name" in payload:
|
if "display_name" in payload:
|
||||||
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
||||||
update_fields.append("display_name")
|
update_fields.append("display_name")
|
||||||
@@ -160,12 +161,21 @@ def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) ->
|
|||||||
document.description = str(payload.get("description") or "").strip()
|
document.description = str(payload.get("description") or "").strip()
|
||||||
update_fields.append("description")
|
update_fields.append("description")
|
||||||
if "is_active" in payload:
|
if "is_active" in payload:
|
||||||
document.is_active = bool(payload.get("is_active"))
|
next_is_active = bool(payload.get("is_active"))
|
||||||
document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED
|
active_changed = document.is_active != next_is_active
|
||||||
|
document.is_active = next_is_active
|
||||||
|
document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
update_fields.extend(["is_active", "status"])
|
update_fields.extend(["is_active", "status"])
|
||||||
|
if not next_is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
update_fields.extend(["indexed_chunk_count", "metadata"])
|
||||||
if update_fields:
|
if update_fields:
|
||||||
update_fields.append("updated_at")
|
update_fields.append("updated_at")
|
||||||
document.save(update_fields=update_fields)
|
document.save(update_fields=update_fields)
|
||||||
|
if active_changed and document.is_active:
|
||||||
|
index_managed_document(document)
|
||||||
return document
|
return document
|
||||||
|
|
||||||
|
|
||||||
@@ -198,6 +208,12 @@ def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||||
|
if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
path = Path(document.storage_path)
|
path = Path(document.storage_path)
|
||||||
if not path.is_absolute():
|
if not path.is_absolute():
|
||||||
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
||||||
|
|||||||
@@ -2,7 +2,13 @@ import pytest
|
|||||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base
|
from review_agent.knowledge_base import (
|
||||||
|
build_knowledge_base_context,
|
||||||
|
delete_document,
|
||||||
|
index_managed_document,
|
||||||
|
search_knowledge_base,
|
||||||
|
update_document,
|
||||||
|
)
|
||||||
from review_agent.views import rebuild_knowledge_base_index
|
from review_agent.views import rebuild_knowledge_base_index
|
||||||
from review_agent.models import KnowledgeBaseDocument
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
|
|
||||||
@@ -232,6 +238,67 @@ def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_u
|
|||||||
assert deleted_filters == [{"document_id": document.pk}]
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
status=KnowledgeBaseDocument.Status.ACTIVE,
|
||||||
|
is_active=True,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
metadata={"index_status": "indexed", "index_error": ""},
|
||||||
|
)
|
||||||
|
deleted_filters = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def delete(self, where):
|
||||||
|
deleted_filters.append(where)
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||||
|
|
||||||
|
update_document(document, {"is_active": False})
|
||||||
|
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert document.status == KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
assert document.is_active is False
|
||||||
|
assert document.indexed_chunk_count == 0
|
||||||
|
assert document.metadata["index_status"] == "disabled"
|
||||||
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
status=KnowledgeBaseDocument.Status.DISABLED,
|
||||||
|
is_active=False,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
metadata={"index_status": "indexed", "index_error": ""},
|
||||||
|
)
|
||||||
|
deleted_filters = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def delete(self, where):
|
||||||
|
deleted_filters.append(where)
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||||
|
|
||||||
|
chunk_count = index_managed_document(document)
|
||||||
|
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert chunk_count == 0
|
||||||
|
assert document.indexed_chunk_count == 0
|
||||||
|
assert document.metadata["index_status"] == "disabled"
|
||||||
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
||||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
|
|||||||
Reference in New Issue
Block a user