346 lines
13 KiB
Python
346 lines
13 KiB
Python
import pytest
|
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
|
from django.urls import reverse
|
|
|
|
from review_agent.knowledge_base import (
|
|
build_knowledge_base_context,
|
|
delete_document,
|
|
index_managed_document,
|
|
search_knowledge_base,
|
|
update_document,
|
|
)
|
|
from review_agent.views import rebuild_knowledge_base_index
|
|
from review_agent.models import KnowledgeBaseDocument
|
|
|
|
|
|
pytestmark = pytest.mark.django_db
|
|
|
|
|
|
def test_knowledge_base_context_reports_rule_and_sources():
|
|
context = build_knowledge_base_context()
|
|
|
|
assert context["rule"]["code"] == "nmpa_ivd_registration_v1"
|
|
assert context["rule"]["requirement_count"] > 0
|
|
assert context["source_count"] > 0
|
|
assert context["collection_name"] == "nmpa_ivd_registration_v1"
|
|
assert not any("模拟题二" in source["relative_path"] for source in context["sources"])
|
|
|
|
|
|
def test_knowledge_base_page_requires_login(client):
|
|
response = client.get(reverse("knowledge_base_manager"))
|
|
|
|
assert response.status_code == 302
|
|
|
|
|
|
def test_knowledge_base_page_renders_for_user(client, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
|
|
response = client.get(reverse("knowledge_base_manager"))
|
|
|
|
assert response.status_code == 200
|
|
assert "知识库管理" in response.content.decode("utf-8")
|
|
assert "RAG 检索测试" in response.content.decode("utf-8")
|
|
content = response.content.decode("utf-8")
|
|
tabbar = content[content.index('<div class="tabbar"') : content.index("</div>", content.index('<div class="tabbar"'))]
|
|
assert tabbar.index("审核智能体") < tabbar.index("知识库管理") < tabbar.index("附件管理")
|
|
assert "data-rebuild-url=" in content
|
|
assert 'id="knowledgeRebuildIndexButton"' in content
|
|
assert "重建索引" in content
|
|
assert 'data-source-action="index"' in content
|
|
assert "手动入库" in content
|
|
|
|
|
|
def test_knowledge_base_status_api(client, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
|
|
response = client.get(reverse("knowledge_base_status"))
|
|
|
|
assert response.status_code == 200
|
|
assert response.json()["rule"]["code"] == "nmpa_ivd_registration_v1"
|
|
|
|
|
|
def test_knowledge_base_rebuild_index_api(client, django_user_model, monkeypatch):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
calls = []
|
|
|
|
monkeypatch.setattr(
|
|
"review_agent.views.rebuild_knowledge_base_index",
|
|
lambda: calls.append("rebuild") or {"chunk_count": 12},
|
|
)
|
|
|
|
response = client.post(reverse("knowledge_base_rebuild_index"))
|
|
|
|
assert response.status_code == 200
|
|
assert response.json()["chunk_count"] == 12
|
|
assert response.json()["knowledge_base"]["collection"]["count"] >= 0
|
|
assert calls == ["rebuild"]
|
|
|
|
|
|
def test_rebuild_knowledge_base_index_requests_reset(settings, tmp_path, monkeypatch):
|
|
settings.MEDIA_ROOT = tmp_path
|
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
|
settings.REGULATORY_RAG_CHROMA_PATH.mkdir()
|
|
stale_file = settings.REGULATORY_RAG_CHROMA_PATH / "chroma.sqlite3"
|
|
stale_file.write_text("stale", encoding="utf-8")
|
|
calls = []
|
|
|
|
monkeypatch.setattr("review_agent.views.load_rule_file", lambda: {"source_material_dir": "docs/0.原始材料"})
|
|
monkeypatch.setattr("review_agent.views.get_embedding_provider", lambda: "provider")
|
|
monkeypatch.setattr(
|
|
"review_agent.views.build_chroma_index",
|
|
lambda source_dir, embedding_provider, reset=False: calls.append(
|
|
{
|
|
"source_dir": source_dir,
|
|
"embedding_provider": embedding_provider,
|
|
"reset": reset,
|
|
}
|
|
)
|
|
or 8,
|
|
)
|
|
|
|
payload = rebuild_knowledge_base_index()
|
|
|
|
assert payload["chunk_count"] == 8
|
|
assert calls[0]["embedding_provider"] == "provider"
|
|
assert calls[0]["reset"] is True
|
|
|
|
|
|
def test_knowledge_base_search_rejects_blank_query():
|
|
payload = search_knowledge_base("")
|
|
|
|
assert payload["results"] == []
|
|
assert "请输入" in payload["error_message"]
|
|
|
|
|
|
def test_knowledge_base_search_filters_deleted_managed_documents(monkeypatch, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
deleted_document = KnowledgeBaseDocument.objects.create(
|
|
user=user,
|
|
display_name="孙之烨简历",
|
|
original_name="孙之烨-260510.pdf",
|
|
storage_path="knowledge_base/resume.pdf",
|
|
file_size=1,
|
|
status=KnowledgeBaseDocument.Status.DELETED,
|
|
is_active=False,
|
|
indexed_chunk_count=7,
|
|
)
|
|
|
|
monkeypatch.setattr(
|
|
"review_agent.knowledge_base.retrieve_citations",
|
|
lambda *args, **kwargs: [
|
|
{
|
|
"source": "用户知识库/1/1/孙之烨-260510.pdf",
|
|
"text": "孙之烨负责审核智能体项目。",
|
|
"score": 0.2,
|
|
"metadata": {"source_type": "managed_document", "document_id": deleted_document.pk},
|
|
},
|
|
{
|
|
"source": "法规材料.doc",
|
|
"text": "注册检验报告要求。",
|
|
"score": 0.3,
|
|
"metadata": {"source_type": "regulatory_document"},
|
|
},
|
|
],
|
|
)
|
|
|
|
payload = search_knowledge_base("孙之烨是谁")
|
|
|
|
assert [item["source"] for item in payload["results"]] == ["法规材料.doc"]
|
|
|
|
|
|
def test_knowledge_base_search_api_returns_payload(client, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
|
|
response = client.post(reverse("knowledge_base_search"), {"query": "注册检验报告要求"})
|
|
|
|
assert response.status_code == 200
|
|
assert set(response.json()) == {"query", "results", "error_message"}
|
|
|
|
|
|
def test_knowledge_base_document_crud_api(client, settings, tmp_path, django_user_model):
|
|
settings.MEDIA_ROOT = tmp_path
|
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
|
settings.REGULATORY_RAG_PROVIDER = "deterministic"
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
|
|
upload_response = client.post(
|
|
reverse("knowledge_base_document_list"),
|
|
{
|
|
"display_name": "注册检验报告要求",
|
|
"description": "用于法规依据检索",
|
|
"is_active": "true",
|
|
"file": SimpleUploadedFile("report.md", b"# report", content_type="text/markdown"),
|
|
},
|
|
)
|
|
|
|
assert upload_response.status_code == 200
|
|
document_id = upload_response.json()["document"]["id"]
|
|
document = KnowledgeBaseDocument.objects.get(pk=document_id)
|
|
assert document.display_name == "注册检验报告要求"
|
|
assert document.indexed_chunk_count > 0
|
|
|
|
list_response = client.get(reverse("knowledge_base_document_list"))
|
|
assert list_response.status_code == 200
|
|
assert list_response.json()["documents"][0]["display_name"] == "注册检验报告要求"
|
|
|
|
detail_response = client.get(reverse("knowledge_base_document_detail", args=[document_id]))
|
|
assert detail_response.status_code == 200
|
|
assert detail_response.json()["document"]["original_name"] == "report.md"
|
|
assert "已入库" in detail_response.json()["document"]["indexed_label"]
|
|
|
|
patch_response = client.patch(
|
|
reverse("knowledge_base_document_detail", args=[document_id]),
|
|
data='{"display_name": "更新后的法规材料", "is_active": false}',
|
|
content_type="application/json",
|
|
)
|
|
|
|
assert patch_response.status_code == 200
|
|
assert patch_response.json()["document"]["display_name"] == "更新后的法规材料"
|
|
assert patch_response.json()["document"]["is_active"] is False
|
|
|
|
delete_response = client.delete(reverse("knowledge_base_document_detail", args=[document_id]))
|
|
|
|
assert delete_response.status_code == 200
|
|
assert KnowledgeBaseDocument.objects.get(pk=document_id).status == KnowledgeBaseDocument.Status.DELETED
|
|
|
|
|
|
def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
document = KnowledgeBaseDocument.objects.create(
|
|
user=user,
|
|
display_name="孙之烨简历",
|
|
original_name="孙之烨-260510.pdf",
|
|
storage_path="knowledge_base/resume.pdf",
|
|
file_size=1,
|
|
indexed_chunk_count=7,
|
|
metadata={"index_status": "indexed", "index_error": ""},
|
|
)
|
|
deleted_filters = []
|
|
|
|
class FakeCollection:
|
|
def delete(self, where):
|
|
deleted_filters.append(where)
|
|
|
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
|
|
|
delete_document(document)
|
|
|
|
document.refresh_from_db()
|
|
assert document.status == KnowledgeBaseDocument.Status.DELETED
|
|
assert document.is_active is False
|
|
assert document.indexed_chunk_count == 0
|
|
assert document.metadata["index_status"] == "deleted"
|
|
assert deleted_filters == [{"document_id": document.pk}]
|
|
|
|
|
|
def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
document = KnowledgeBaseDocument.objects.create(
|
|
user=user,
|
|
display_name="孙之烨简历",
|
|
original_name="孙之烨-260510.pdf",
|
|
storage_path="knowledge_base/resume.pdf",
|
|
file_size=1,
|
|
status=KnowledgeBaseDocument.Status.ACTIVE,
|
|
is_active=True,
|
|
indexed_chunk_count=7,
|
|
metadata={"index_status": "indexed", "index_error": ""},
|
|
)
|
|
deleted_filters = []
|
|
|
|
class FakeCollection:
|
|
def delete(self, where):
|
|
deleted_filters.append(where)
|
|
|
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
|
|
|
update_document(document, {"is_active": False})
|
|
|
|
document.refresh_from_db()
|
|
assert document.status == KnowledgeBaseDocument.Status.DISABLED
|
|
assert document.is_active is False
|
|
assert document.indexed_chunk_count == 0
|
|
assert document.metadata["index_status"] == "disabled"
|
|
assert deleted_filters == [{"document_id": document.pk}]
|
|
|
|
|
|
def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model):
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
document = KnowledgeBaseDocument.objects.create(
|
|
user=user,
|
|
display_name="孙之烨简历",
|
|
original_name="孙之烨-260510.pdf",
|
|
storage_path="knowledge_base/resume.pdf",
|
|
file_size=1,
|
|
status=KnowledgeBaseDocument.Status.DISABLED,
|
|
is_active=False,
|
|
indexed_chunk_count=7,
|
|
metadata={"index_status": "indexed", "index_error": ""},
|
|
)
|
|
deleted_filters = []
|
|
|
|
class FakeCollection:
|
|
def delete(self, where):
|
|
deleted_filters.append(where)
|
|
|
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
|
|
|
chunk_count = index_managed_document(document)
|
|
|
|
document.refresh_from_db()
|
|
assert chunk_count == 0
|
|
assert document.indexed_chunk_count == 0
|
|
assert document.metadata["index_status"] == "disabled"
|
|
assert deleted_filters == [{"document_id": document.pk}]
|
|
|
|
|
|
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
|
document = KnowledgeBaseDocument.objects.create(
|
|
user=owner,
|
|
display_name="法规材料",
|
|
original_name="a.md",
|
|
storage_path="knowledge_base/a.md",
|
|
file_size=1,
|
|
)
|
|
client.force_login(other)
|
|
|
|
response = client.patch(
|
|
reverse("knowledge_base_document_detail", args=[document.pk]),
|
|
data='{"display_name": "越权修改"}',
|
|
content_type="application/json",
|
|
)
|
|
|
|
assert response.status_code == 404
|
|
|
|
|
|
def test_knowledge_base_document_manual_index_api(client, settings, tmp_path, django_user_model):
|
|
settings.MEDIA_ROOT = tmp_path
|
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
|
settings.REGULATORY_RAG_PROVIDER = "deterministic"
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
client.force_login(user)
|
|
source_path = tmp_path / "manual.md"
|
|
source_path.write_text("# manual\n注册检验报告要求", encoding="utf-8")
|
|
document = KnowledgeBaseDocument.objects.create(
|
|
user=user,
|
|
display_name="manual.md",
|
|
original_name="manual.md",
|
|
storage_path=str(source_path),
|
|
file_size=source_path.stat().st_size,
|
|
indexed_chunk_count=0,
|
|
)
|
|
|
|
response = client.post(reverse("knowledge_base_document_index", args=[document.pk]))
|
|
|
|
assert response.status_code == 200
|
|
document.refresh_from_db()
|
|
assert document.indexed_chunk_count > 0
|
|
assert "已入库" in response.json()["document"]["indexed_label"]
|