feat(knowledge-base): 增加全局知识库管理
This commit is contained in:
@@ -2,10 +2,11 @@ from django.contrib import admin
|
|||||||
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
|
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
|
||||||
from django.urls import include, path
|
from django.urls import include, path
|
||||||
|
|
||||||
from review_agent.views import attachment_manager, stream_chat, workspace
|
from review_agent.views import attachment_manager, knowledge_base_manager, stream_chat, workspace
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path("", workspace, name="home"),
|
path("", workspace, name="home"),
|
||||||
|
path("knowledge-base/", knowledge_base_manager, name="knowledge_base_manager"),
|
||||||
path("attachments/", attachment_manager, name="attachment_manager"),
|
path("attachments/", attachment_manager, name="attachment_manager"),
|
||||||
path("", include("review_agent.urls")),
|
path("", include("review_agent.urls")),
|
||||||
path("chat/stream/", stream_chat, name="chat_stream"),
|
path("chat/stream/", stream_chat, name="chat_stream"),
|
||||||
|
|||||||
397
review_agent/knowledge_base.py
Normal file
397
review_agent/knowledge_base.py
Normal file
@@ -0,0 +1,397 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.files.uploadedfile import UploadedFile
|
||||||
|
|
||||||
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
|
from review_agent.regulatory_review.services.rag_citation import RagIndexUnavailable, retrieve_citations
|
||||||
|
from review_agent.regulatory_review.services.rag_embedding import DeterministicEmbeddingProvider
|
||||||
|
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import DEFAULT_RULE_PATH, compute_file_sha256, load_rule_file
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_SOURCE_SUFFIXES = {".doc", ".docx", ".pdf", ".txt", ".md", ".pptx", ".xlsx"}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ChromaCollectionState:
|
||||||
|
exists: bool
|
||||||
|
count: int = 0
|
||||||
|
error_message: str = ""
|
||||||
|
sample_metadatas: list[dict[str, Any]] | None = None
|
||||||
|
source_chunk_counts: dict[str, int] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_base_context() -> dict[str, Any]:
|
||||||
|
rule_info = _rule_info()
|
||||||
|
source_dir = Path(settings.BASE_DIR) / str(rule_info.get("source_material_dir") or "docs/0.原始材料")
|
||||||
|
sources = list_source_documents(source_dir)
|
||||||
|
collection = get_chroma_collection_state()
|
||||||
|
return {
|
||||||
|
"name": "NMPA IVD 注册资料法规库",
|
||||||
|
"description": "用于体外诊断试剂注册资料法规核查的结构化规则和 RAG 依据检索。",
|
||||||
|
"provider": settings.REGULATORY_RAG_PROVIDER,
|
||||||
|
"collection_name": settings.REGULATORY_RAG_COLLECTION,
|
||||||
|
"chroma_path": settings.REGULATORY_RAG_CHROMA_PATH,
|
||||||
|
"rule": rule_info,
|
||||||
|
"source_dir": str(source_dir),
|
||||||
|
"sources": sources,
|
||||||
|
"source_count": len(sources),
|
||||||
|
"supported_source_count": sum(1 for item in sources if item["supported"]),
|
||||||
|
"collection": {
|
||||||
|
"exists": collection.exists,
|
||||||
|
"count": collection.count,
|
||||||
|
"error_message": collection.error_message,
|
||||||
|
"sample_metadatas": collection.sample_metadatas or [],
|
||||||
|
},
|
||||||
|
"status": _status_label(collection),
|
||||||
|
"build_commands": [
|
||||||
|
"python manage.py regulatory_rag_build --provider deterministic",
|
||||||
|
"python manage.py regulatory_rag_build --provider siliconflow",
|
||||||
|
],
|
||||||
|
"managed_documents": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_base_context_for_user(user) -> dict[str, Any]:
|
||||||
|
context = build_knowledge_base_context()
|
||||||
|
documents = list_documents_for_user(user)
|
||||||
|
context["managed_documents"] = documents
|
||||||
|
context["managed_document_count"] = len(documents)
|
||||||
|
context["active_managed_document_count"] = sum(1 for item in documents if item["is_active"])
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
def list_source_documents(source_dir: Path) -> list[dict[str, Any]]:
|
||||||
|
if not source_dir.exists():
|
||||||
|
return []
|
||||||
|
collection = get_chroma_collection_state()
|
||||||
|
source_chunk_counts = collection.source_chunk_counts or {}
|
||||||
|
documents: list[dict[str, Any]] = []
|
||||||
|
for path in sorted(source_dir.rglob("*")):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
relative_path = str(path.relative_to(source_dir))
|
||||||
|
indexed_chunk_count = source_chunk_counts.get(relative_path, 0)
|
||||||
|
documents.append(
|
||||||
|
{
|
||||||
|
"name": path.name,
|
||||||
|
"relative_path": relative_path,
|
||||||
|
"suffix": suffix.lstrip(".") or "unknown",
|
||||||
|
"size": path.stat().st_size,
|
||||||
|
"supported": suffix in SUPPORTED_SOURCE_SUFFIXES,
|
||||||
|
"indexed": indexed_chunk_count > 0,
|
||||||
|
"indexed_chunk_count": indexed_chunk_count,
|
||||||
|
"indexed_label": f"已入库 {indexed_chunk_count} 片" if indexed_chunk_count else "未入库",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return documents
|
||||||
|
|
||||||
|
|
||||||
|
def search_knowledge_base(query: str, *, n_results: int = 3) -> dict[str, Any]:
|
||||||
|
normalized = (query or "").strip()
|
||||||
|
if not normalized:
|
||||||
|
return {"query": normalized, "results": [], "error_message": "请输入检索问题。"}
|
||||||
|
try:
|
||||||
|
results = retrieve_citations(
|
||||||
|
normalized,
|
||||||
|
embedding_provider=DeterministicEmbeddingProvider(),
|
||||||
|
n_results=n_results,
|
||||||
|
)
|
||||||
|
except RagIndexUnavailable as exc:
|
||||||
|
return {"query": normalized, "results": [], "error_message": str(exc)}
|
||||||
|
except Exception as exc:
|
||||||
|
return {"query": normalized, "results": [], "error_message": f"检索失败:{exc}"}
|
||||||
|
return {"query": normalized, "results": filter_active_knowledge_results(results), "error_message": ""}
|
||||||
|
|
||||||
|
|
||||||
|
def list_documents_for_user(user) -> list[dict[str, Any]]:
|
||||||
|
return [
|
||||||
|
serialize_document(document)
|
||||||
|
for document in KnowledgeBaseDocument.objects.filter(user=user).exclude(status=KnowledgeBaseDocument.Status.DELETED)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_document_from_upload(
|
||||||
|
*,
|
||||||
|
user,
|
||||||
|
uploaded_file: UploadedFile,
|
||||||
|
display_name: str = "",
|
||||||
|
description: str = "",
|
||||||
|
is_active: bool = True,
|
||||||
|
) -> KnowledgeBaseDocument:
|
||||||
|
root = Path(settings.MEDIA_ROOT) / "knowledge_base" / "users" / str(user.pk)
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
target = _unique_target_path(root, uploaded_file.name)
|
||||||
|
with target.open("wb") as handle:
|
||||||
|
for chunk in uploaded_file.chunks():
|
||||||
|
handle.write(chunk)
|
||||||
|
status = KnowledgeBaseDocument.Status.ACTIVE if is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name=(display_name or uploaded_file.name).strip(),
|
||||||
|
original_name=uploaded_file.name,
|
||||||
|
storage_path=str(target),
|
||||||
|
file_size=target.stat().st_size,
|
||||||
|
content_type=getattr(uploaded_file, "content_type", "") or "",
|
||||||
|
description=description.strip(),
|
||||||
|
status=status,
|
||||||
|
is_active=is_active,
|
||||||
|
)
|
||||||
|
if is_active:
|
||||||
|
index_managed_document(document)
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
||||||
|
update_fields = []
|
||||||
|
if "display_name" in payload:
|
||||||
|
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
||||||
|
update_fields.append("display_name")
|
||||||
|
if "description" in payload:
|
||||||
|
document.description = str(payload.get("description") or "").strip()
|
||||||
|
update_fields.append("description")
|
||||||
|
if "is_active" in payload:
|
||||||
|
document.is_active = bool(payload.get("is_active"))
|
||||||
|
document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
update_fields.extend(["is_active", "status"])
|
||||||
|
if update_fields:
|
||||||
|
update_fields.append("updated_at")
|
||||||
|
document.save(update_fields=update_fields)
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def delete_document(document: KnowledgeBaseDocument) -> KnowledgeBaseDocument:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.status = KnowledgeBaseDocument.Status.DELETED
|
||||||
|
document.is_active = False
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "deleted", "index_error": ""}
|
||||||
|
document.save(update_fields=["status", "is_active", "indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
|
||||||
|
indexed_label = f"已入库 {document.indexed_chunk_count} 片" if document.indexed_chunk_count else "未入库"
|
||||||
|
return {
|
||||||
|
"id": document.pk,
|
||||||
|
"display_name": document.display_name,
|
||||||
|
"original_name": document.original_name,
|
||||||
|
"description": document.description,
|
||||||
|
"file_size": document.file_size,
|
||||||
|
"content_type": document.content_type,
|
||||||
|
"status": document.status,
|
||||||
|
"is_active": document.is_active,
|
||||||
|
"indexed_chunk_count": document.indexed_chunk_count,
|
||||||
|
"indexed_label": indexed_label,
|
||||||
|
"created_at": document.created_at.isoformat() if document.created_at else "",
|
||||||
|
"updated_at": document.updated_at.isoformat() if document.updated_at else "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||||
|
path = Path(document.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
||||||
|
try:
|
||||||
|
text = extract_text_from_path(path)
|
||||||
|
source = f"用户知识库/{document.user_id}/{document.pk}/{document.original_name}"
|
||||||
|
chunks = chunk_text(text, source=source)
|
||||||
|
if not chunks:
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "empty", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
|
collection = _load_chroma_collection()
|
||||||
|
texts = [chunk.text for chunk in chunks]
|
||||||
|
embeddings = DeterministicEmbeddingProvider()(texts)
|
||||||
|
ids = [
|
||||||
|
hashlib.sha256(f"managed:{document.pk}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
||||||
|
for chunk in chunks
|
||||||
|
]
|
||||||
|
metadatas = [
|
||||||
|
{
|
||||||
|
**chunk.metadata,
|
||||||
|
"source_type": "managed_document",
|
||||||
|
"document_id": document.pk,
|
||||||
|
"user_id": document.user_id,
|
||||||
|
"original_name": document.original_name,
|
||||||
|
}
|
||||||
|
for chunk in chunks
|
||||||
|
]
|
||||||
|
collection.upsert(ids=ids, documents=texts, metadatas=metadatas, embeddings=embeddings)
|
||||||
|
document.indexed_chunk_count = len(chunks)
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "indexed", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return len(chunks)
|
||||||
|
except Exception as exc:
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "failed", "index_error": str(exc)}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def remove_managed_document_from_index(document: KnowledgeBaseDocument) -> None:
|
||||||
|
try:
|
||||||
|
collection = _load_chroma_collection()
|
||||||
|
collection.delete(where={"document_id": document.pk})
|
||||||
|
except Exception as exc:
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_delete_error": str(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def filter_active_knowledge_results(results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
managed_ids = {
|
||||||
|
int((item.get("metadata") or {}).get("document_id"))
|
||||||
|
for item in results
|
||||||
|
if (item.get("metadata") or {}).get("source_type") == "managed_document"
|
||||||
|
and (item.get("metadata") or {}).get("document_id") is not None
|
||||||
|
}
|
||||||
|
if not managed_ids:
|
||||||
|
return results
|
||||||
|
active_ids = set(
|
||||||
|
KnowledgeBaseDocument.objects.filter(
|
||||||
|
pk__in=managed_ids,
|
||||||
|
status=KnowledgeBaseDocument.Status.ACTIVE,
|
||||||
|
is_active=True,
|
||||||
|
).values_list("pk", flat=True)
|
||||||
|
)
|
||||||
|
filtered = []
|
||||||
|
for item in results:
|
||||||
|
metadata = item.get("metadata") or {}
|
||||||
|
if metadata.get("source_type") != "managed_document":
|
||||||
|
filtered.append(item)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
document_id = int(metadata.get("document_id"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if document_id in active_ids:
|
||||||
|
filtered.append(item)
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def _load_chroma_collection():
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError("chromadb 未安装。") from exc
|
||||||
|
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
persist_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
return chromadb.PersistentClient(path=str(persist_path)).get_or_create_collection(
|
||||||
|
settings.REGULATORY_RAG_COLLECTION
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_chroma_collection_state() -> ChromaCollectionState:
|
||||||
|
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
if not persist_path.exists():
|
||||||
|
return ChromaCollectionState(exists=False, error_message="法规 RAG 索引目录不存在。")
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError:
|
||||||
|
return ChromaCollectionState(exists=False, error_message="chromadb 未安装。")
|
||||||
|
try:
|
||||||
|
collection = chromadb.PersistentClient(path=str(persist_path)).get_collection(settings.REGULATORY_RAG_COLLECTION)
|
||||||
|
count = collection.count()
|
||||||
|
metadatas = _load_collection_metadatas(collection, count)
|
||||||
|
return ChromaCollectionState(
|
||||||
|
exists=True,
|
||||||
|
count=count,
|
||||||
|
sample_metadatas=metadatas[:10],
|
||||||
|
source_chunk_counts=_count_chunks_by_source(metadatas),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return ChromaCollectionState(exists=False, error_message=f"法规 RAG collection 不可用:{exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_collection_metadatas(collection, count: int) -> list[dict[str, Any]]:
|
||||||
|
metadatas: list[dict[str, Any]] = []
|
||||||
|
if count <= 0:
|
||||||
|
return metadatas
|
||||||
|
page_size = 500
|
||||||
|
for offset in range(0, count, page_size):
|
||||||
|
payload = collection.get(
|
||||||
|
include=["metadatas"],
|
||||||
|
limit=min(page_size, count - offset),
|
||||||
|
offset=offset,
|
||||||
|
)
|
||||||
|
metadatas.extend(payload.get("metadatas") or [])
|
||||||
|
return metadatas
|
||||||
|
|
||||||
|
|
||||||
|
def _count_chunks_by_source(metadatas: list[dict[str, Any]]) -> dict[str, int]:
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
for metadata in metadatas:
|
||||||
|
source = str((metadata or {}).get("source") or "")
|
||||||
|
if source:
|
||||||
|
counts[source] = counts.get(source, 0) + 1
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
def _rule_info() -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
payload = load_rule_file()
|
||||||
|
requirements = payload.get("requirements") or []
|
||||||
|
severity_counts: dict[str, int] = {}
|
||||||
|
chapter_codes = set()
|
||||||
|
for requirement in requirements:
|
||||||
|
severity = str(requirement.get("severity") or "unknown")
|
||||||
|
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
||||||
|
attachment4_code = str(requirement.get("attachment4_code") or "")
|
||||||
|
if attachment4_code:
|
||||||
|
chapter_codes.add(attachment4_code.split(".")[0])
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"code": payload.get("code", ""),
|
||||||
|
"name": payload.get("name", ""),
|
||||||
|
"path": str(DEFAULT_RULE_PATH),
|
||||||
|
"hash": compute_file_sha256(DEFAULT_RULE_PATH),
|
||||||
|
"rag_collection": payload.get("rag_collection", ""),
|
||||||
|
"source_material_dir": payload.get("source_material_dir", "docs/0.原始材料"),
|
||||||
|
"requirement_count": len(requirements),
|
||||||
|
"chapter_count": len(chapter_codes),
|
||||||
|
"severity_counts": severity_counts,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
return {
|
||||||
|
"status": "failed",
|
||||||
|
"code": "",
|
||||||
|
"name": "",
|
||||||
|
"path": str(DEFAULT_RULE_PATH),
|
||||||
|
"hash": "",
|
||||||
|
"rag_collection": "",
|
||||||
|
"source_material_dir": "docs/0.原始材料",
|
||||||
|
"requirement_count": 0,
|
||||||
|
"chapter_count": 0,
|
||||||
|
"severity_counts": {},
|
||||||
|
"error_message": str(exc),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _status_label(collection: ChromaCollectionState) -> dict[str, str]:
|
||||||
|
if not collection.exists:
|
||||||
|
return {"code": "missing", "label": "未构建", "message": collection.error_message}
|
||||||
|
if collection.count < 20:
|
||||||
|
return {"code": "thin", "label": "索引过少", "message": "RAG 能力已打通,但当前索引内容较少,建议补齐材料后重建。"}
|
||||||
|
return {"code": "ready", "label": "可用", "message": "RAG 索引已构建,可用于法规依据辅助检索。"}
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_target_path(root: Path, original_name: str) -> Path:
|
||||||
|
safe_name = Path(original_name).name or "document"
|
||||||
|
target = root / safe_name
|
||||||
|
if not target.exists():
|
||||||
|
return target
|
||||||
|
stem = target.stem
|
||||||
|
suffix = target.suffix
|
||||||
|
index = 2
|
||||||
|
while True:
|
||||||
|
candidate = root / f"{stem}-{index}{suffix}"
|
||||||
|
if not candidate.exists():
|
||||||
|
return candidate
|
||||||
|
index += 1
|
||||||
80
review_agent/migrations/0008_knowledgebasedocument.py
Normal file
80
review_agent/migrations/0008_knowledgebasedocument.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Generated by Django 5.2.14 on 2026-06-08 11:58
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("review_agent", "0007_feishuaccesstokencache_feishuusermapping_and_more"),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="KnowledgeBaseDocument",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("display_name", models.CharField(max_length=255)),
|
||||||
|
("original_name", models.CharField(max_length=255)),
|
||||||
|
("storage_path", models.CharField(max_length=500)),
|
||||||
|
("file_size", models.BigIntegerField(default=0)),
|
||||||
|
(
|
||||||
|
"content_type",
|
||||||
|
models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
("description", models.TextField(blank=True, default="")),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("active", "启用"),
|
||||||
|
("disabled", "停用"),
|
||||||
|
("deleted", "已删除"),
|
||||||
|
],
|
||||||
|
default="active",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("is_active", models.BooleanField(default=True)),
|
||||||
|
("indexed_chunk_count", models.PositiveIntegerField(default=0)),
|
||||||
|
("metadata", models.JSONField(blank=True, default=dict)),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("updated_at", models.DateTimeField(auto_now=True)),
|
||||||
|
(
|
||||||
|
"user",
|
||||||
|
models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="knowledge_base_documents",
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_knowledge_base_document",
|
||||||
|
"ordering": ["-updated_at", "-id"],
|
||||||
|
"indexes": [
|
||||||
|
models.Index(
|
||||||
|
fields=["user", "status"], name="idx_ra_kb_doc_user_status"
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
fields=["user", "created_at"], name="idx_ra_kb_doc_user_created"
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
fields=["status", "updated_at"],
|
||||||
|
name="idx_ra_kb_doc_status_updated",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -399,6 +399,45 @@ class RegulatoryRuleVersion(models.Model):
|
|||||||
return self.code
|
return self.code
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeBaseDocument(models.Model):
|
||||||
|
"""Stores user-managed knowledge-base source documents."""
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
ACTIVE = "active", "启用"
|
||||||
|
DISABLED = "disabled", "停用"
|
||||||
|
DELETED = "deleted", "已删除"
|
||||||
|
|
||||||
|
user = models.ForeignKey(
|
||||||
|
settings.AUTH_USER_MODEL,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="knowledge_base_documents",
|
||||||
|
)
|
||||||
|
display_name = models.CharField(max_length=255)
|
||||||
|
original_name = models.CharField(max_length=255)
|
||||||
|
storage_path = models.CharField(max_length=500)
|
||||||
|
file_size = models.BigIntegerField(default=0)
|
||||||
|
content_type = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
description = models.TextField(blank=True, default="")
|
||||||
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.ACTIVE)
|
||||||
|
is_active = models.BooleanField(default=True)
|
||||||
|
indexed_chunk_count = models.PositiveIntegerField(default=0)
|
||||||
|
metadata = models.JSONField(default=dict, blank=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_knowledge_base_document"
|
||||||
|
ordering = ["-updated_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["user", "status"], name="idx_ra_kb_doc_user_status"),
|
||||||
|
models.Index(fields=["user", "created_at"], name="idx_ra_kb_doc_user_created"),
|
||||||
|
models.Index(fields=["status", "updated_at"], name="idx_ra_kb_doc_status_updated"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.display_name
|
||||||
|
|
||||||
|
|
||||||
class ApplicationFormFillBatch(models.Model):
|
class ApplicationFormFillBatch(models.Model):
|
||||||
"""Tracks one application-form auto-fill workflow run."""
|
"""Tracks one application-form auto-fill workflow run."""
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ def retrieve_citations(
|
|||||||
"source": metadata.get("source", "法规材料"),
|
"source": metadata.get("source", "法规材料"),
|
||||||
"text": document,
|
"text": document,
|
||||||
"score": distance,
|
"score": distance,
|
||||||
|
"metadata": metadata,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return citations
|
return citations
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -102,6 +103,33 @@ def _iter_docx_blocks(document):
|
|||||||
|
|
||||||
|
|
||||||
def _extract_legacy_doc_with_libreoffice(path: Path) -> str:
|
def _extract_legacy_doc_with_libreoffice(path: Path) -> str:
|
||||||
|
cached = _cached_docx_path(path)
|
||||||
|
if cached.exists():
|
||||||
|
return extract_text_from_path(cached)
|
||||||
|
try:
|
||||||
|
return _extract_legacy_doc_with_libreoffice_convert(path)
|
||||||
|
except RuntimeError as libreoffice_error:
|
||||||
|
try:
|
||||||
|
return _extract_legacy_doc_with_word_com(path)
|
||||||
|
except RuntimeError as word_error:
|
||||||
|
try:
|
||||||
|
return _extract_legacy_doc_with_powershell_word_com(path)
|
||||||
|
except RuntimeError as powershell_error:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"无法转换法规 .doc 材料:{path.name};"
|
||||||
|
f"LibreOffice 错误:{libreoffice_error};"
|
||||||
|
f"Word COM 错误:{word_error};"
|
||||||
|
f"PowerShell Word COM 错误:{powershell_error}"
|
||||||
|
) from powershell_error
|
||||||
|
|
||||||
|
|
||||||
|
def _cached_docx_path(path: Path) -> Path:
|
||||||
|
digest = hashlib.sha256(str(path.resolve()).encode("utf-8")).hexdigest()[:12]
|
||||||
|
cache_dir = Path(settings.MEDIA_ROOT) / "regulatory_review" / "docx_cache"
|
||||||
|
return cache_dir / f"{path.stem}-{digest}.docx"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_legacy_doc_with_libreoffice_convert(path: Path) -> str:
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
target_dir = Path(tmp_dir)
|
target_dir = Path(tmp_dir)
|
||||||
try:
|
try:
|
||||||
@@ -128,6 +156,72 @@ def _extract_legacy_doc_with_libreoffice(path: Path) -> str:
|
|||||||
return extract_text_from_path(converted)
|
return extract_text_from_path(converted)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_legacy_doc_with_word_com(path: Path) -> str:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
target_dir = Path(tmp_dir)
|
||||||
|
converted = target_dir / f"{path.stem}.docx"
|
||||||
|
word = None
|
||||||
|
try:
|
||||||
|
import pythoncom
|
||||||
|
import win32com.client
|
||||||
|
|
||||||
|
pythoncom.CoInitialize()
|
||||||
|
word = win32com.client.DispatchEx("Word.Application")
|
||||||
|
word.Visible = False
|
||||||
|
document = word.Documents.Open(str(path.resolve()), ReadOnly=True)
|
||||||
|
document.SaveAs(str(converted.resolve()), FileFormat=16)
|
||||||
|
document.Close(False)
|
||||||
|
except Exception as exc:
|
||||||
|
raise RuntimeError(f"无法通过 Word COM 转换法规 .doc 材料:{path.name}") from exc
|
||||||
|
finally:
|
||||||
|
if word is not None:
|
||||||
|
try:
|
||||||
|
word.Quit()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
pythoncom.CoUninitialize()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if not converted.exists():
|
||||||
|
raise RuntimeError(f"Word COM 未生成 docx:{path.name}")
|
||||||
|
return extract_text_from_path(converted)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_legacy_doc_with_powershell_word_com(path: Path) -> str:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
target_dir = Path(tmp_dir)
|
||||||
|
converted = target_dir / f"{path.stem}.docx"
|
||||||
|
source_path = str(path.resolve()).replace("'", "''")
|
||||||
|
target_path = str(converted.resolve()).replace("'", "''")
|
||||||
|
script = (
|
||||||
|
"$ErrorActionPreference = 'Stop';"
|
||||||
|
"$word = New-Object -ComObject Word.Application;"
|
||||||
|
"$word.Visible = $false;"
|
||||||
|
"try {"
|
||||||
|
f"$doc = $word.Documents.Open('{source_path}', $false, $true);"
|
||||||
|
f"$doc.SaveAs([ref]'{target_path}', [ref]16);"
|
||||||
|
"$doc.Close([ref]$false);"
|
||||||
|
"} finally { $word.Quit() }"
|
||||||
|
)
|
||||||
|
powershell = shutil.which("powershell") or shutil.which("pwsh")
|
||||||
|
if not powershell:
|
||||||
|
raise RuntimeError("PowerShell 不可用,无法调用 Word COM。")
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[powershell, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", script],
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=90,
|
||||||
|
)
|
||||||
|
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc:
|
||||||
|
raise RuntimeError(f"无法通过 PowerShell Word COM 转换法规 .doc 材料:{path.name}") from exc
|
||||||
|
if not converted.exists():
|
||||||
|
raise RuntimeError(f"PowerShell Word COM 未生成 docx:{path.name}")
|
||||||
|
return extract_text_from_path(converted)
|
||||||
|
|
||||||
|
|
||||||
def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
||||||
chunks: list[TextChunk] = []
|
chunks: list[TextChunk] = []
|
||||||
for path in sorted(source_dir.rglob("*")):
|
for path in sorted(source_dir.rglob("*")):
|
||||||
|
|||||||
@@ -20,6 +20,13 @@ from .application_form_fill.views import (
|
|||||||
batch_status as application_form_fill_batch_status,
|
batch_status as application_form_fill_batch_status,
|
||||||
start as application_form_fill_start,
|
start as application_form_fill_start,
|
||||||
)
|
)
|
||||||
|
from .views import (
|
||||||
|
knowledge_base_document_detail,
|
||||||
|
knowledge_base_document_index,
|
||||||
|
knowledge_base_documents,
|
||||||
|
knowledge_base_search,
|
||||||
|
knowledge_base_status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
@@ -98,4 +105,29 @@ urlpatterns = [
|
|||||||
application_form_fill_batch_status,
|
application_form_fill_batch_status,
|
||||||
name="application_form_fill_batch_status",
|
name="application_form_fill_batch_status",
|
||||||
),
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/status/",
|
||||||
|
knowledge_base_status,
|
||||||
|
name="knowledge_base_status",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/search/",
|
||||||
|
knowledge_base_search,
|
||||||
|
name="knowledge_base_search",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/documents/",
|
||||||
|
knowledge_base_documents,
|
||||||
|
name="knowledge_base_document_list",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/documents/<int:document_id>/",
|
||||||
|
knowledge_base_document_detail,
|
||||||
|
name="knowledge_base_document_detail",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/documents/<int:document_id>/index/",
|
||||||
|
knowledge_base_document_index,
|
||||||
|
name="knowledge_base_document_index",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
from django.db.models import Count, Q
|
from django.db.models import Count, Q
|
||||||
|
import json
|
||||||
|
|
||||||
from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
|
from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
|
||||||
from django.shortcuts import redirect, render
|
from django.shortcuts import redirect, render
|
||||||
from django.views.decorators.http import require_http_methods
|
from django.views.decorators.http import require_http_methods
|
||||||
@@ -12,6 +14,17 @@ from .services import (
|
|||||||
stream_message,
|
stream_message,
|
||||||
)
|
)
|
||||||
from .models import ApplicationFormFillBatch, Conversation, FileAttachment, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
from .models import ApplicationFormFillBatch, Conversation, FileAttachment, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
||||||
|
from .knowledge_base import build_knowledge_base_context, search_knowledge_base
|
||||||
|
from .knowledge_base import (
|
||||||
|
build_knowledge_base_context_for_user,
|
||||||
|
create_document_from_upload,
|
||||||
|
delete_document,
|
||||||
|
index_managed_document,
|
||||||
|
list_documents_for_user,
|
||||||
|
serialize_document,
|
||||||
|
update_document,
|
||||||
|
)
|
||||||
|
from .models import KnowledgeBaseDocument
|
||||||
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
||||||
|
|
||||||
|
|
||||||
@@ -94,6 +107,101 @@ def attachment_manager(request: HttpRequest) -> HttpResponse:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def knowledge_base_manager(request: HttpRequest) -> HttpResponse:
|
||||||
|
context = build_knowledge_base_context_for_user(request.user)
|
||||||
|
return render(
|
||||||
|
request,
|
||||||
|
"knowledge_base.html",
|
||||||
|
{
|
||||||
|
"page_title": "知识库管理",
|
||||||
|
"knowledge_base": context,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def knowledge_base_status(request: HttpRequest) -> JsonResponse:
|
||||||
|
return JsonResponse(build_knowledge_base_context_for_user(request.user))
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
def knowledge_base_search(request: HttpRequest) -> JsonResponse:
|
||||||
|
if request.content_type == "application/json":
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
payload = {}
|
||||||
|
query = payload.get("query", "")
|
||||||
|
else:
|
||||||
|
query = request.POST.get("query", "")
|
||||||
|
return JsonResponse(search_knowledge_base(str(query)))
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET", "POST"])
|
||||||
|
def knowledge_base_documents(request: HttpRequest) -> JsonResponse:
|
||||||
|
if request.method == "GET":
|
||||||
|
return JsonResponse({"documents": list_documents_for_user(request.user)})
|
||||||
|
uploaded_file = request.FILES.get("file")
|
||||||
|
if uploaded_file is None:
|
||||||
|
return JsonResponse({"error": "请上传知识库材料。"}, status=400)
|
||||||
|
is_active = str(request.POST.get("is_active", "true")).lower() not in {"0", "false", "off"}
|
||||||
|
document = create_document_from_upload(
|
||||||
|
user=request.user,
|
||||||
|
uploaded_file=uploaded_file,
|
||||||
|
display_name=request.POST.get("display_name", ""),
|
||||||
|
description=request.POST.get("description", ""),
|
||||||
|
is_active=is_active,
|
||||||
|
)
|
||||||
|
return JsonResponse({"document": serialize_document(document)})
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET", "PATCH", "DELETE"])
|
||||||
|
def knowledge_base_document_detail(request: HttpRequest, document_id: int) -> JsonResponse:
|
||||||
|
try:
|
||||||
|
document = KnowledgeBaseDocument.objects.get(
|
||||||
|
pk=document_id,
|
||||||
|
user=request.user,
|
||||||
|
)
|
||||||
|
except KnowledgeBaseDocument.DoesNotExist:
|
||||||
|
return JsonResponse({"error": "知识库材料不存在。"}, status=404)
|
||||||
|
if document.status == KnowledgeBaseDocument.Status.DELETED:
|
||||||
|
return JsonResponse({"error": "知识库材料不存在。"}, status=404)
|
||||||
|
if request.method == "GET":
|
||||||
|
return JsonResponse({"document": serialize_document(document)})
|
||||||
|
if request.method == "DELETE":
|
||||||
|
delete_document(document)
|
||||||
|
return JsonResponse({"document": serialize_document(document)})
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
payload = {}
|
||||||
|
update_document(document, payload)
|
||||||
|
return JsonResponse({"document": serialize_document(document)})
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
def knowledge_base_document_index(request: HttpRequest, document_id: int) -> JsonResponse:
|
||||||
|
try:
|
||||||
|
document = KnowledgeBaseDocument.objects.get(
|
||||||
|
pk=document_id,
|
||||||
|
user=request.user,
|
||||||
|
)
|
||||||
|
except KnowledgeBaseDocument.DoesNotExist:
|
||||||
|
return JsonResponse({"error": "知识库材料不存在。"}, status=404)
|
||||||
|
if document.status == KnowledgeBaseDocument.Status.DELETED:
|
||||||
|
return JsonResponse({"error": "知识库材料不存在。"}, status=404)
|
||||||
|
chunk_count = index_managed_document(document)
|
||||||
|
document.refresh_from_db()
|
||||||
|
return JsonResponse({"document": serialize_document(document), "chunk_count": chunk_count})
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@require_http_methods(["POST"])
|
@require_http_methods(["POST"])
|
||||||
def stream_chat(request: HttpRequest) -> HttpResponse:
|
def stream_chat(request: HttpRequest) -> HttpResponse:
|
||||||
|
|||||||
238
static/js/knowledge_base.js
Normal file
238
static/js/knowledge_base.js
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
(function () {
|
||||||
|
var page = document.querySelector(".knowledge-page");
|
||||||
|
if (!page) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var documentForm = document.getElementById("knowledgeDocumentForm");
|
||||||
|
var documentStatus = document.getElementById("knowledgeDocumentStatus");
|
||||||
|
var documentTable = document.getElementById("knowledgeDocumentTable");
|
||||||
|
var documentSearch = document.getElementById("knowledgeDocumentSearch");
|
||||||
|
var searchForm = document.getElementById("knowledgeSearchForm");
|
||||||
|
var queryInput = document.getElementById("knowledgeSearchQuery");
|
||||||
|
var results = document.getElementById("knowledgeSearchResults");
|
||||||
|
var sourceSearch = document.getElementById("knowledgeSourceSearch");
|
||||||
|
var sourceTable = document.getElementById("knowledgeSourceTable");
|
||||||
|
var documentFileInput = document.getElementById("knowledgeDocumentFile");
|
||||||
|
var uploadDropzone = document.getElementById("knowledgeUploadDropzone");
|
||||||
|
|
||||||
|
function csrfToken() {
|
||||||
|
var cookie = document.cookie.split("; ").find(function (item) {
|
||||||
|
return item.indexOf("csrftoken=") === 0;
|
||||||
|
});
|
||||||
|
return cookie ? decodeURIComponent(cookie.split("=")[1]) : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeHtml(value) {
|
||||||
|
return String(value || "")
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">")
|
||||||
|
.replace(/"/g, """)
|
||||||
|
.replace(/'/g, "'");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function patchDocument(row, payload) {
|
||||||
|
var response = await fetch(row.getAttribute("data-detail-url"), {
|
||||||
|
method: "PATCH",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-CSRFToken": csrfToken(),
|
||||||
|
},
|
||||||
|
body: JSON.stringify(payload),
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("知识库材料更新失败。");
|
||||||
|
}
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteDocument(row) {
|
||||||
|
var response = await fetch(row.getAttribute("data-detail-url"), {
|
||||||
|
method: "DELETE",
|
||||||
|
headers: { "X-CSRFToken": csrfToken() },
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("知识库材料删除失败。");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function indexDocument(row) {
|
||||||
|
var response = await fetch(row.getAttribute("data-index-url"), {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "X-CSRFToken": csrfToken() },
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("知识库材料解析入库失败。");
|
||||||
|
}
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderResults(payload) {
|
||||||
|
if (!results) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (payload.error_message) {
|
||||||
|
results.innerHTML = '<p class="knowledge-search-error">' + escapeHtml(payload.error_message) + "</p>";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!payload.results || !payload.results.length) {
|
||||||
|
results.innerHTML = '<p class="panel-empty">未检索到依据片段。</p>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
results.innerHTML = payload.results
|
||||||
|
.map(function (item, index) {
|
||||||
|
return [
|
||||||
|
'<article class="knowledge-result">',
|
||||||
|
"<header><strong>结果 " + (index + 1) + "</strong><span>" + escapeHtml(item.source || "法规材料") + "</span></header>",
|
||||||
|
"<p>" + escapeHtml(item.text || "").slice(0, 600) + "</p>",
|
||||||
|
item.score === null || item.score === undefined ? "" : "<em>score: " + escapeHtml(item.score) + "</em>",
|
||||||
|
"</article>",
|
||||||
|
].join("");
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (documentForm) {
|
||||||
|
documentForm.addEventListener("submit", async function (event) {
|
||||||
|
event.preventDefault();
|
||||||
|
var formData = new FormData(documentForm);
|
||||||
|
if (documentStatus) {
|
||||||
|
documentStatus.textContent = "上传并解析入库中...";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
var response = await fetch(page.getAttribute("data-document-url"), {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "X-CSRFToken": csrfToken() },
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("新增材料失败。");
|
||||||
|
}
|
||||||
|
window.location.reload();
|
||||||
|
} catch (error) {
|
||||||
|
if (documentStatus) {
|
||||||
|
documentStatus.textContent = error.message || "新增材料失败。";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (documentFileInput && documentStatus) {
|
||||||
|
documentFileInput.addEventListener("change", function () {
|
||||||
|
var file = documentFileInput.files && documentFileInput.files[0];
|
||||||
|
documentStatus.textContent = file
|
||||||
|
? "已选择:" + file.name
|
||||||
|
: "上传后会进入当前账号的全局知识库。";
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uploadDropzone && documentFileInput) {
|
||||||
|
uploadDropzone.addEventListener("click", function () {
|
||||||
|
documentFileInput.click();
|
||||||
|
});
|
||||||
|
uploadDropzone.addEventListener("keydown", function (event) {
|
||||||
|
if (event.key === "Enter" || event.key === " ") {
|
||||||
|
event.preventDefault();
|
||||||
|
documentFileInput.click();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
["dragenter", "dragover"].forEach(function (eventName) {
|
||||||
|
uploadDropzone.addEventListener(eventName, function (event) {
|
||||||
|
event.preventDefault();
|
||||||
|
uploadDropzone.classList.add("dragging");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
["dragleave", "drop"].forEach(function (eventName) {
|
||||||
|
uploadDropzone.addEventListener(eventName, function (event) {
|
||||||
|
event.preventDefault();
|
||||||
|
uploadDropzone.classList.remove("dragging");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
uploadDropzone.addEventListener("drop", function (event) {
|
||||||
|
var files = event.dataTransfer && event.dataTransfer.files;
|
||||||
|
if (!files || !files.length) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
documentFileInput.files = files;
|
||||||
|
documentFileInput.dispatchEvent(new Event("change", { bubbles: true }));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (documentTable) {
|
||||||
|
documentTable.addEventListener("click", async function (event) {
|
||||||
|
var button = event.target.closest("[data-kb-action]");
|
||||||
|
if (!button) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var row = button.closest("tr[data-document-id]");
|
||||||
|
if (!row) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var action = button.getAttribute("data-kb-action");
|
||||||
|
try {
|
||||||
|
if (action === "edit") {
|
||||||
|
var nameCell = row.querySelector(".attachment-name");
|
||||||
|
var nextName = window.prompt("请输入新的材料名称", nameCell ? nameCell.textContent.trim() : "");
|
||||||
|
if (nextName) {
|
||||||
|
await patchDocument(row, { display_name: nextName });
|
||||||
|
window.location.reload();
|
||||||
|
}
|
||||||
|
} else if (action === "toggle") {
|
||||||
|
await patchDocument(row, { is_active: button.textContent.trim() === "启用" });
|
||||||
|
window.location.reload();
|
||||||
|
} else if (action === "index") {
|
||||||
|
button.disabled = true;
|
||||||
|
button.textContent = "解析中";
|
||||||
|
await indexDocument(row);
|
||||||
|
window.location.reload();
|
||||||
|
} else if (action === "delete" && window.confirm("确认删除该知识库材料?")) {
|
||||||
|
await deleteDocument(row);
|
||||||
|
window.location.reload();
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
window.alert(error.message || "知识库材料操作失败。");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (searchForm && queryInput) {
|
||||||
|
searchForm.addEventListener("submit", async function (event) {
|
||||||
|
event.preventDefault();
|
||||||
|
var query = queryInput.value.trim();
|
||||||
|
if (!query) {
|
||||||
|
renderResults({ error_message: "请输入检索问题。" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
results.innerHTML = '<p class="panel-empty">检索中...</p>';
|
||||||
|
try {
|
||||||
|
var response = await fetch(page.getAttribute("data-search-url"), {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-CSRFToken": csrfToken(),
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ query: query }),
|
||||||
|
});
|
||||||
|
renderResults(await response.json());
|
||||||
|
} catch (error) {
|
||||||
|
renderResults({ error_message: "检索失败,请稍后重试。" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function bindTableSearch(input, table, selector) {
|
||||||
|
if (!input || !table) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
input.addEventListener("input", function () {
|
||||||
|
var keyword = input.value.trim().toLowerCase();
|
||||||
|
table.querySelectorAll(selector).forEach(function (row) {
|
||||||
|
row.hidden = keyword && row.textContent.toLowerCase().indexOf(keyword) === -1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
bindTableSearch(documentSearch, documentTable, "tbody tr[data-document-id]");
|
||||||
|
bindTableSearch(sourceSearch, sourceTable, "tbody tr[data-source-name]");
|
||||||
|
})();
|
||||||
@@ -10,8 +10,8 @@
|
|||||||
<div class="topbar-left">
|
<div class="topbar-left">
|
||||||
<div class="tabbar" role="tablist" aria-label="页面切换">
|
<div class="tabbar" role="tablist" aria-label="页面切换">
|
||||||
<a class="tab" href="/" role="tab" aria-selected="false">首页</a>
|
<a class="tab" href="/" role="tab" aria-selected="false">首页</a>
|
||||||
<button class="tab" type="button" role="tab" aria-selected="false">知识库管理</button>
|
|
||||||
<a class="tab" href="/" role="tab" aria-selected="false">审核智能体</a>
|
<a class="tab" href="/" role="tab" aria-selected="false">审核智能体</a>
|
||||||
|
<a class="tab" href="{% url 'knowledge_base_manager' %}" role="tab" aria-selected="false">知识库管理</a>
|
||||||
<a class="tab active" href="{% url 'attachment_manager' %}" role="tab" aria-selected="true">附件管理</a>
|
<a class="tab active" href="{% url 'attachment_manager' %}" role="tab" aria-selected="true">附件管理</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
213
templates/knowledge_base.html
Normal file
213
templates/knowledge_base.html
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% load static %}
|
||||||
|
|
||||||
|
{% block title %}知识库管理 - DEMO-AGENT V2{% endblock %}
|
||||||
|
{% block body_class %}app-body{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<main class="app-shell">
|
||||||
|
<header class="topbar">
|
||||||
|
<div class="topbar-left">
|
||||||
|
<div class="tabbar" role="tablist" aria-label="页面切换">
|
||||||
|
<a class="tab" href="/" role="tab" aria-selected="false">首页</a>
|
||||||
|
<a class="tab" href="/" role="tab" aria-selected="false">审核智能体</a>
|
||||||
|
<a class="tab active" href="{% url 'knowledge_base_manager' %}" role="tab" aria-selected="true">知识库管理</a>
|
||||||
|
<a class="tab" href="{% url 'attachment_manager' %}" role="tab" aria-selected="false">附件管理</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="topbar-right">
|
||||||
|
<div class="user-menu">
|
||||||
|
<button class="user-menu-trigger" type="button">
|
||||||
|
<span class="avatar large">{{ request.user.username|slice:":1"|upper }}</span>
|
||||||
|
<div class="user-copy">
|
||||||
|
<strong>{{ request.user.username }}</strong>
|
||||||
|
<span>当前登录用户</span>
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<section
|
||||||
|
class="knowledge-page"
|
||||||
|
data-document-url="{% url 'knowledge_base_document_list' %}"
|
||||||
|
data-search-url="{% url 'knowledge_base_search' %}"
|
||||||
|
>
|
||||||
|
<header class="attachment-manager-hero attachment-manager-toolbar">
|
||||||
|
<div>
|
||||||
|
<p class="eyebrow">知识库管理</p>
|
||||||
|
<h1>知识库管理</h1>
|
||||||
|
<p>管理当前账号所有对话可调用的法规、制度、模板和审查依据。</p>
|
||||||
|
</div>
|
||||||
|
<div class="knowledge-hero-actions">
|
||||||
|
<span class="knowledge-status status-{{ knowledge_base.status.code }}">{{ knowledge_base.status.label }}</span>
|
||||||
|
<a class="return-chat-link" href="{% url 'home' %}">返回对话</a>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="attachment-manager-content attachment-manager-split knowledge-workbench">
|
||||||
|
<aside class="knowledge-left-rail">
|
||||||
|
<section class="attachment-manager-panel knowledge-panel knowledge-upload-panel">
|
||||||
|
<div class="summary-subheading">
|
||||||
|
<h3>上传知识</h3>
|
||||||
|
<span>所有对话可调用</span>
|
||||||
|
</div>
|
||||||
|
<form class="knowledge-document-form" id="knowledgeDocumentForm">
|
||||||
|
{% csrf_token %}
|
||||||
|
<div
|
||||||
|
class="upload-dropzone manager-upload-dropzone knowledge-upload-dropzone"
|
||||||
|
id="knowledgeUploadDropzone"
|
||||||
|
tabindex="0"
|
||||||
|
role="button"
|
||||||
|
aria-controls="knowledgeDocumentFile"
|
||||||
|
>
|
||||||
|
<input id="knowledgeDocumentFile" name="file" type="file" required hidden>
|
||||||
|
<strong>点击选择文件,或拖拽到这里</strong>
|
||||||
|
<span>支持 doc、docx、xls、xlsx、ppt、pptx、pdf、txt、md</span>
|
||||||
|
</div>
|
||||||
|
<div class="knowledge-inline-actions">
|
||||||
|
<label class="knowledge-checkbox">
|
||||||
|
<input name="is_active" type="checkbox" checked>
|
||||||
|
<span>上传后启用</span>
|
||||||
|
</label>
|
||||||
|
<button type="submit">上传并解析</button>
|
||||||
|
</div>
|
||||||
|
<p class="upload-status" id="knowledgeDocumentStatus">上传后会进入当前账号的全局知识库。</p>
|
||||||
|
</form>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="attachment-manager-panel knowledge-panel knowledge-parse-panel">
|
||||||
|
<div class="summary-subheading">
|
||||||
|
<h3>解析与索引</h3>
|
||||||
|
<span class="knowledge-status status-{{ knowledge_base.status.code }}">{{ knowledge_base.status.label }}</span>
|
||||||
|
</div>
|
||||||
|
<dl class="knowledge-compact-stats">
|
||||||
|
<div>
|
||||||
|
<dt>向量片段</dt>
|
||||||
|
<dd>{{ knowledge_base.collection.count }}</dd>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<dt>用户材料</dt>
|
||||||
|
<dd>{{ knowledge_base.managed_document_count|default:0 }}</dd>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<dt>内置法规</dt>
|
||||||
|
<dd>{{ knowledge_base.source_count }}</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
<p class="knowledge-panel-note">{{ knowledge_base.status.message }}</p>
|
||||||
|
<div class="knowledge-form-actions">
|
||||||
|
<button type="button" onclick="window.location.reload()">刷新状态</button>
|
||||||
|
<button type="button" disabled>重建索引</button>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="attachment-manager-panel knowledge-panel knowledge-search-panel">
|
||||||
|
<div class="summary-subheading">
|
||||||
|
<h3>RAG 检索测试</h3>
|
||||||
|
<span>Top 3</span>
|
||||||
|
</div>
|
||||||
|
<form class="knowledge-search-form" id="knowledgeSearchForm">
|
||||||
|
{% csrf_token %}
|
||||||
|
<label class="sr-only" for="knowledgeSearchQuery">检索问题</label>
|
||||||
|
<input id="knowledgeSearchQuery" name="query" type="search" placeholder="输入审查问题或关键词">
|
||||||
|
<button type="submit">测试检索</button>
|
||||||
|
</form>
|
||||||
|
<div class="knowledge-search-results" id="knowledgeSearchResults">
|
||||||
|
<p class="panel-empty">输入问题后查看命中材料、依据片段和相似度。</p>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</aside>
|
||||||
|
|
||||||
|
<section class="knowledge-right-display">
|
||||||
|
<section class="attachment-manager-panel knowledge-panel knowledge-document-list-panel">
|
||||||
|
<div class="summary-subheading">
|
||||||
|
<h3>知识库材料列表</h3>
|
||||||
|
<input class="attachment-search" id="knowledgeDocumentSearch" type="search" placeholder="搜索文件名">
|
||||||
|
</div>
|
||||||
|
<div class="attachment-table-wrap">
|
||||||
|
<table class="attachment-table knowledge-document-table" id="knowledgeDocumentTable">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>状态</th>
|
||||||
|
<th>材料名称</th>
|
||||||
|
<th>文件名</th>
|
||||||
|
<th>大小</th>
|
||||||
|
<th>入库状态</th>
|
||||||
|
<th>更新时间</th>
|
||||||
|
<th>操作</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for document in knowledge_base.managed_documents %}
|
||||||
|
<tr
|
||||||
|
data-document-id="{{ document.id }}"
|
||||||
|
data-detail-url="/api/review-agent/knowledge-base/documents/{{ document.id }}/"
|
||||||
|
data-index-url="/api/review-agent/knowledge-base/documents/{{ document.id }}/index/"
|
||||||
|
>
|
||||||
|
<td>{% if document.is_active %}启用{% else %}停用{% endif %}</td>
|
||||||
|
<td class="attachment-name">{{ document.display_name }}</td>
|
||||||
|
<td>{{ document.original_name }}</td>
|
||||||
|
<td>{{ document.file_size }} bytes</td>
|
||||||
|
<td>{{ document.indexed_label }}</td>
|
||||||
|
<td>{{ document.updated_at|slice:":19" }}</td>
|
||||||
|
<td class="attachment-actions">
|
||||||
|
<button type="button" data-kb-action="index">解析入库</button>
|
||||||
|
<button type="button" data-kb-action="edit">编辑</button>
|
||||||
|
<button type="button" data-kb-action="toggle">{% if document.is_active %}停用{% else %}启用{% endif %}</button>
|
||||||
|
<button type="button" data-kb-action="delete">删除</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% empty %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="7" class="table-empty">当前知识库暂无材料</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="attachment-manager-panel knowledge-panel knowledge-source-panel">
|
||||||
|
<div class="summary-subheading">
|
||||||
|
<h3>内置法规材料</h3>
|
||||||
|
<input class="attachment-search" id="knowledgeSourceSearch" type="search" placeholder="搜索内置材料">
|
||||||
|
</div>
|
||||||
|
<div class="attachment-table-wrap">
|
||||||
|
<table class="attachment-table knowledge-source-table" id="knowledgeSourceTable">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>状态</th>
|
||||||
|
<th>文件</th>
|
||||||
|
<th>类型</th>
|
||||||
|
<th>大小</th>
|
||||||
|
<th>索引</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for source in knowledge_base.sources %}
|
||||||
|
<tr data-source-name="{{ source.name }}">
|
||||||
|
<td>{% if source.supported %}可解析{% else %}暂不支持{% endif %}</td>
|
||||||
|
<td class="attachment-name">{{ source.relative_path }}</td>
|
||||||
|
<td>{{ source.suffix }}</td>
|
||||||
|
<td>{{ source.size }} bytes</td>
|
||||||
|
<td>{{ source.indexed_label }}</td>
|
||||||
|
</tr>
|
||||||
|
{% empty %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="5" class="table-empty">暂无法规材料</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
<script src="{% static 'js/knowledge_base.js' %}?v=20260608-kb5"></script>
|
||||||
|
{% endblock %}
|
||||||
220
tests/test_knowledge_base.py
Normal file
220
tests/test_knowledge_base.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
import pytest
|
||||||
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base
|
||||||
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_context_reports_rule_and_sources():
|
||||||
|
context = build_knowledge_base_context()
|
||||||
|
|
||||||
|
assert context["rule"]["code"] == "nmpa_ivd_registration_v1"
|
||||||
|
assert context["rule"]["requirement_count"] > 0
|
||||||
|
assert context["source_count"] > 0
|
||||||
|
assert context["collection_name"] == "nmpa_ivd_registration_v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_page_requires_login(client):
|
||||||
|
response = client.get(reverse("knowledge_base_manager"))
|
||||||
|
|
||||||
|
assert response.status_code == 302
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_page_renders_for_user(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(reverse("knowledge_base_manager"))
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "知识库管理" in response.content.decode("utf-8")
|
||||||
|
assert "RAG 检索测试" in response.content.decode("utf-8")
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
tabbar = content[content.index('<div class="tabbar"') : content.index("</div>", content.index('<div class="tabbar"'))]
|
||||||
|
assert tabbar.index("审核智能体") < tabbar.index("知识库管理") < tabbar.index("附件管理")
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_status_api(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(reverse("knowledge_base_status"))
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["rule"]["code"] == "nmpa_ivd_registration_v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_search_rejects_blank_query():
|
||||||
|
payload = search_knowledge_base("")
|
||||||
|
|
||||||
|
assert payload["results"] == []
|
||||||
|
assert "请输入" in payload["error_message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_search_filters_deleted_managed_documents(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
deleted_document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
status=KnowledgeBaseDocument.Status.DELETED,
|
||||||
|
is_active=False,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.knowledge_base.retrieve_citations",
|
||||||
|
lambda *args, **kwargs: [
|
||||||
|
{
|
||||||
|
"source": "用户知识库/1/1/孙之烨-260510.pdf",
|
||||||
|
"text": "孙之烨负责审核智能体项目。",
|
||||||
|
"score": 0.2,
|
||||||
|
"metadata": {"source_type": "managed_document", "document_id": deleted_document.pk},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"source": "法规材料.doc",
|
||||||
|
"text": "注册检验报告要求。",
|
||||||
|
"score": 0.3,
|
||||||
|
"metadata": {"source_type": "regulatory_document"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = search_knowledge_base("孙之烨是谁")
|
||||||
|
|
||||||
|
assert [item["source"] for item in payload["results"]] == ["法规材料.doc"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_search_api_returns_payload(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.post(reverse("knowledge_base_search"), {"query": "注册检验报告要求"})
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert set(response.json()) == {"query", "results", "error_message"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_document_crud_api(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
upload_response = client.post(
|
||||||
|
reverse("knowledge_base_document_list"),
|
||||||
|
{
|
||||||
|
"display_name": "注册检验报告要求",
|
||||||
|
"description": "用于法规依据检索",
|
||||||
|
"is_active": "true",
|
||||||
|
"file": SimpleUploadedFile("report.md", b"# report", content_type="text/markdown"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert upload_response.status_code == 200
|
||||||
|
document_id = upload_response.json()["document"]["id"]
|
||||||
|
document = KnowledgeBaseDocument.objects.get(pk=document_id)
|
||||||
|
assert document.display_name == "注册检验报告要求"
|
||||||
|
assert document.indexed_chunk_count > 0
|
||||||
|
|
||||||
|
list_response = client.get(reverse("knowledge_base_document_list"))
|
||||||
|
assert list_response.status_code == 200
|
||||||
|
assert list_response.json()["documents"][0]["display_name"] == "注册检验报告要求"
|
||||||
|
|
||||||
|
detail_response = client.get(reverse("knowledge_base_document_detail", args=[document_id]))
|
||||||
|
assert detail_response.status_code == 200
|
||||||
|
assert detail_response.json()["document"]["original_name"] == "report.md"
|
||||||
|
assert "已入库" in detail_response.json()["document"]["indexed_label"]
|
||||||
|
|
||||||
|
patch_response = client.patch(
|
||||||
|
reverse("knowledge_base_document_detail", args=[document_id]),
|
||||||
|
data='{"display_name": "更新后的法规材料", "is_active": false}',
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert patch_response.status_code == 200
|
||||||
|
assert patch_response.json()["document"]["display_name"] == "更新后的法规材料"
|
||||||
|
assert patch_response.json()["document"]["is_active"] is False
|
||||||
|
|
||||||
|
delete_response = client.delete(reverse("knowledge_base_document_detail", args=[document_id]))
|
||||||
|
|
||||||
|
assert delete_response.status_code == 200
|
||||||
|
assert KnowledgeBaseDocument.objects.get(pk=document_id).status == KnowledgeBaseDocument.Status.DELETED
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
metadata={"index_status": "indexed", "index_error": ""},
|
||||||
|
)
|
||||||
|
deleted_filters = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def delete(self, where):
|
||||||
|
deleted_filters.append(where)
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||||
|
|
||||||
|
delete_document(document)
|
||||||
|
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert document.status == KnowledgeBaseDocument.Status.DELETED
|
||||||
|
assert document.is_active is False
|
||||||
|
assert document.indexed_chunk_count == 0
|
||||||
|
assert document.metadata["index_status"] == "deleted"
|
||||||
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
||||||
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=owner,
|
||||||
|
display_name="法规材料",
|
||||||
|
original_name="a.md",
|
||||||
|
storage_path="knowledge_base/a.md",
|
||||||
|
file_size=1,
|
||||||
|
)
|
||||||
|
client.force_login(other)
|
||||||
|
|
||||||
|
response = client.patch(
|
||||||
|
reverse("knowledge_base_document_detail", args=[document.pk]),
|
||||||
|
data='{"display_name": "越权修改"}',
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_document_manual_index_api(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
source_path = tmp_path / "manual.md"
|
||||||
|
source_path.write_text("# manual\n注册检验报告要求", encoding="utf-8")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="manual.md",
|
||||||
|
original_name="manual.md",
|
||||||
|
storage_path=str(source_path),
|
||||||
|
file_size=source_path.stat().st_size,
|
||||||
|
indexed_chunk_count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.post(reverse("knowledge_base_document_index", args=[document.pk]))
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert document.indexed_chunk_count > 0
|
||||||
|
assert "已入库" in response.json()["document"]["indexed_label"]
|
||||||
Reference in New Issue
Block a user