From 37589ebdfb2b85e4f5dc2181e8a061bc7598cbdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E6=A2=93=E6=9F=92?= <1787882683@qq.com> Date: Tue, 13 Jan 2026 00:42:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=AE=B5=E8=90=BD?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E5=8A=A0=E8=BD=BD=E5=8A=9F=E8=83=BD=E5=8F=8A?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/official_configs.py | 3 + src/webui/knowledge_routes.py | 98 ++++++++++++++++++++++++++++++- template/bot_config_template.toml | 3 +- 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/src/config/official_configs.py b/src/config/official_configs.py index a6652e0e..24878f7f 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -697,6 +697,9 @@ class WebUIConfig(ConfigBase): secure_cookie: bool = False """是否启用安全Cookie(仅通过HTTPS传输,默认false)""" + enable_paragraph_content: bool = False + """是否在知识图谱中加载段落完整内容(需要加载embedding store,会占用额外内存)""" + @dataclass class DebugConfig(ConfigBase): diff --git a/src/webui/knowledge_routes.py b/src/webui/knowledge_routes.py index 87b2e7b5..fb540105 100644 --- a/src/webui/knowledge_routes.py +++ b/src/webui/knowledge_routes.py @@ -5,11 +5,83 @@ from fastapi import APIRouter, Query, Depends, Cookie, Header from pydantic import BaseModel import logging from src.webui.auth import verify_auth_token_from_cookie_or_header +from src.config.config import global_config logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/webui/knowledge", tags=["knowledge"]) +# 延迟初始化的轻量级 embedding store(只读,仅用于获取段落完整文本) +_paragraph_store_cache = None + + +def _get_paragraph_store(): + """延迟加载段落 embedding store(只读模式,轻量级) + + Returns: + EmbeddingStore | None: 如果配置启用则返回store,否则返回None + """ + # 检查配置是否启用 + if not global_config.webui.enable_paragraph_content: + return None + + global _paragraph_store_cache + if _paragraph_store_cache is not None: + return _paragraph_store_cache + + try: + from src.chat.knowledge.embedding_store import EmbeddingStore + import os + + # 获取数据路径 + current_dir = os.path.dirname(os.path.abspath(__file__)) + root_path = os.path.abspath(os.path.join(current_dir, "..", "..")) + embedding_dir = os.path.join(root_path, "data/embedding") + + # 只加载段落 embedding store(轻量级) + paragraph_store = EmbeddingStore( + namespace="paragraph", + dir_path=embedding_dir, + max_workers=1, # 只读不需要多线程 + chunk_size=100 + ) + paragraph_store.load_from_file() + + _paragraph_store_cache = paragraph_store + logger.info(f"成功加载段落 embedding store,包含 {len(paragraph_store.store)} 个段落") + return paragraph_store + except Exception as e: + logger.warning(f"加载段落 embedding store 失败: {e}") + return None + + +def _get_paragraph_content(node_id: str) -> tuple[Optional[str], bool]: + """从 embedding store 获取段落完整内容 + + Args: + node_id: 段落节点ID,格式为 'paragraph-{hash}' + + Returns: + tuple[str | None, bool]: (段落完整内容或None, 是否启用了功能) + """ + try: + paragraph_store = _get_paragraph_store() + if paragraph_store is None: + # 功能未启用 + return None, False + + # 从 store 中获取完整内容 + paragraph_item = paragraph_store.store.get(node_id) + if paragraph_item is not None: + # paragraph_item 是 EmbeddingStoreItem,其 str 属性包含完整文本 + content: str = getattr(paragraph_item, 'str', '') + if content: + return content, True + return None, True + except Exception as e: + logger.debug(f"获取段落内容失败: {e}") + return None, True + def require_auth( maibot_session: Optional[str] = Cookie(None), @@ -84,7 +156,14 @@ def _convert_graph_to_json(kg_manager) -> KnowledgeGraph: node_data = graph[node_id] # 节点类型: "ent" -> "entity", "pg" -> "paragraph" node_type = "entity" if ("type" in node_data and node_data["type"] == "ent") else "paragraph" - content = node_data["content"] if "content" in node_data else node_id + + # 对于段落节点,尝试从 embedding store 获取完整内容 + if node_type == "paragraph": + full_content, _ = _get_paragraph_content(node_id) + content = full_content if full_content is not None else (node_data["content"] if "content" in node_data else node_id) + else: + content = node_data["content"] if "content" in node_data else node_id + create_time = node_data["create_time"] if "create_time" in node_data else None nodes.append(KnowledgeNode(id=node_id, type=node_type, content=content, create_time=create_time)) @@ -166,7 +245,14 @@ async def get_knowledge_graph( try: node_data = graph[node_id] node_type_val = "entity" if ("type" in node_data and node_data["type"] == "ent") else "paragraph" - content = node_data["content"] if "content" in node_data else node_id + + # 对于段落节点,尝试从 embedding store 获取完整内容 + if node_type_val == "paragraph": + full_content, _ = _get_paragraph_content(node_id) + content = full_content if full_content is not None else (node_data["content"] if "content" in node_data else node_id) + else: + content = node_data["content"] if "content" in node_data else node_id + create_time = node_data["create_time"] if "create_time" in node_data else None nodes.append(KnowledgeNode(id=node_id, type=node_type_val, content=content, create_time=create_time)) @@ -281,8 +367,14 @@ async def search_knowledge_node(query: str = Query(..., min_length=1), _auth: bo for node_id in node_list: try: node_data = graph[node_id] - content = node_data["content"] if "content" in node_data else node_id node_type = "entity" if ("type" in node_data and node_data["type"] == "ent") else "paragraph" + + # 对于段落节点,尝试从 embedding store 获取完整内容 + if node_type == "paragraph": + full_content, _ = _get_paragraph_content(node_id) + content = full_content if full_content is not None else (node_data["content"] if "content" in node_data else node_id) + else: + content = node_data["content"] if "content" in node_data else node_id if query_lower in content.lower() or query_lower in node_id.lower(): create_time = node_data["create_time"] if "create_time" in node_data else None diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 0de42f50..8e4567de 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "7.4.0" +version = "7.4.1" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- # 如果你想要修改配置文件,请递增version的值 @@ -294,6 +294,7 @@ trusted_proxies = "" # 信任的代理IP列表(逗号分隔),只有来自 trust_xff = false # 是否启用X-Forwarded-For代理解析(默认false) # 启用后,仍要求直连IP在trusted_proxies中才会信任XFF头 secure_cookie = false # 是否启用安全Cookie(仅通过HTTPS传输,默认false) +enable_paragraph_content = false # 是否在知识图谱中加载段落完整内容(需要加载embedding store,会占用额外内存) [experimental] #实验性功能 # 麦麦私聊的说话规则,行为风格(实验性功能)