From 42187bf8e96c0621991f3e58da0c920c69d56611 Mon Sep 17 00:00:00 2001 From: bruce Date: Tue, 9 Jun 2026 08:22:57 +0800 Subject: [PATCH] =?UTF-8?q?fix(knowledge-base):=20=E5=81=9C=E7=94=A8?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E6=97=B6=E5=90=8C=E6=AD=A5=E6=B8=85=E7=90=86?= =?UTF-8?q?=E7=B4=A2=E5=BC=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- review_agent/knowledge_base.py | 20 +++++++++- tests/test_knowledge_base.py | 69 +++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/review_agent/knowledge_base.py b/review_agent/knowledge_base.py index 79f3aba..de2714b 100644 --- a/review_agent/knowledge_base.py +++ b/review_agent/knowledge_base.py @@ -153,6 +153,7 @@ def create_document_from_upload( def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument: update_fields = [] + active_changed = False if "display_name" in payload: document.display_name = str(payload.get("display_name") or "").strip() or document.original_name update_fields.append("display_name") @@ -160,12 +161,21 @@ def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> document.description = str(payload.get("description") or "").strip() update_fields.append("description") if "is_active" in payload: - document.is_active = bool(payload.get("is_active")) - document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED + next_is_active = bool(payload.get("is_active")) + active_changed = document.is_active != next_is_active + document.is_active = next_is_active + document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED update_fields.extend(["is_active", "status"]) + if not next_is_active: + remove_managed_document_from_index(document) + document.indexed_chunk_count = 0 + document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""} + update_fields.extend(["indexed_chunk_count", "metadata"]) if update_fields: update_fields.append("updated_at") document.save(update_fields=update_fields) + if active_changed and document.is_active: + index_managed_document(document) return document @@ -198,6 +208,12 @@ def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]: def index_managed_document(document: KnowledgeBaseDocument) -> int: + if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active: + remove_managed_document_from_index(document) + document.indexed_chunk_count = 0 + document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""} + document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"]) + return 0 path = Path(document.storage_path) if not path.is_absolute(): path = Path(settings.MEDIA_ROOT) / document.storage_path diff --git a/tests/test_knowledge_base.py b/tests/test_knowledge_base.py index 21df46f..dec1515 100644 --- a/tests/test_knowledge_base.py +++ b/tests/test_knowledge_base.py @@ -2,7 +2,13 @@ import pytest from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse -from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base +from review_agent.knowledge_base import ( + build_knowledge_base_context, + delete_document, + index_managed_document, + search_knowledge_base, + update_document, +) from review_agent.views import rebuild_knowledge_base_index from review_agent.models import KnowledgeBaseDocument @@ -232,6 +238,67 @@ def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_u assert deleted_filters == [{"document_id": document.pk}] +def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + document = KnowledgeBaseDocument.objects.create( + user=user, + display_name="孙之烨简历", + original_name="孙之烨-260510.pdf", + storage_path="knowledge_base/resume.pdf", + file_size=1, + status=KnowledgeBaseDocument.Status.ACTIVE, + is_active=True, + indexed_chunk_count=7, + metadata={"index_status": "indexed", "index_error": ""}, + ) + deleted_filters = [] + + class FakeCollection: + def delete(self, where): + deleted_filters.append(where) + + monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection()) + + update_document(document, {"is_active": False}) + + document.refresh_from_db() + assert document.status == KnowledgeBaseDocument.Status.DISABLED + assert document.is_active is False + assert document.indexed_chunk_count == 0 + assert document.metadata["index_status"] == "disabled" + assert deleted_filters == [{"document_id": document.pk}] + + +def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + document = KnowledgeBaseDocument.objects.create( + user=user, + display_name="孙之烨简历", + original_name="孙之烨-260510.pdf", + storage_path="knowledge_base/resume.pdf", + file_size=1, + status=KnowledgeBaseDocument.Status.DISABLED, + is_active=False, + indexed_chunk_count=7, + metadata={"index_status": "indexed", "index_error": ""}, + ) + deleted_filters = [] + + class FakeCollection: + def delete(self, where): + deleted_filters.append(where) + + monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection()) + + chunk_count = index_managed_document(document) + + document.refresh_from_db() + assert chunk_count == 0 + assert document.indexed_chunk_count == 0 + assert document.metadata["index_status"] == "disabled" + assert deleted_filters == [{"document_id": document.pk}] + + def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model): owner = django_user_model.objects.create_user(username="owner", password="pass") other = django_user_model.objects.create_user(username="other", password="pass")