修改了知识库检索的逻辑，增强了知识库检索的能力，修改内容在prompt_builder.py最下方

2025-03-30 18:54:29 +08:00 · 2025-03-30 18:54:29 +08:00 · df53b5394c
parent 8ce971f230
commit df53b5394c
1 changed files with 45 additions and 6 deletions
--- a/src/plugins/chat/prompt_builder.py
+++ b/src/plugins/chat/prompt_builder.py
@ -239,14 +239,49 @@ class PromptBuilder:
    async def get_prompt_info(self, message: str, threshold: float):
        related_info = ""
        logger.debug(f"获取知识库内容，元消息：{message[:30]}...，消息长度: {len(message)}")
-        embedding = await get_embedding(message)
+        
-        related_info += self.get_info_from_db(embedding, threshold=threshold)
+        # 识别主题
        topics = await hippocampus._identify_topics(message)
        logger.info(f"[知识库查询] 识别出的主题：{topics}")
        logger.info(f"[知识库查询] 主题数量：{len(topics)}")
        # 对每个主题进行知识库查询
        all_related_info = []
        for i, topic in enumerate(topics, 1):
            logger.info(f"[知识库查询] 正在处理第 {i}/{len(topics)} 个主题：{topic}")
            try:
                embedding = await get_embedding(topic)
                if embedding:
                    logger.debug(f"[知识库查询] 主题「{topic}」成功获取embedding")
                    topic_info = self.get_info_from_db(embedding, limit=2, threshold=threshold)
                    if topic_info:
                        logger.info(f"[知识库查询] 主题「{topic}」找到相关知识：\n{topic_info}")
                        all_related_info.append(topic_info)
                    else:
                        logger.debug(f"[知识库查询] 主题「{topic}」未找到相关知识")
                else:
                    logger.warning(f"[知识库查询] 主题「{topic}」获取embedding失败")
            except Exception as e:
                logger.error(f"[知识库查询] 处理主题「{topic}」时发生错误：{str(e)}")
                continue
        # 合并所有相关主题的知识
        if all_related_info:
            related_info = "\n".join(all_related_info)
            logger.info(f"[知识库查询] 最终合并的知识内容：\n{related_info}")
            logger.info(f"[知识库查询] 成功处理的主题数量：{len(all_related_info)}/{len(topics)}")
        else:
            logger.debug("[知识库查询] 未找到任何相关知识")
        return related_info
-    def get_info_from_db(self, query_embedding: list, limit: int = 1, threshold: float = 0.5) -> str:
+    def get_info_from_db(self, query_embedding: list, limit: int = 5, threshold: float = 0.5) -> str:
        if not query_embedding:
            logger.warning("[知识库查询] 查询向量为空")
            return ""
        logger.debug(f"[知识库查询] 开始查询，相似度阈值：{threshold}，返回数量限制：{limit}")
        # 使用余弦相似度计算
        pipeline = [
            {
@ -300,11 +335,15 @@ class PromptBuilder:
        ]
        results = list(db.knowledges.aggregate(pipeline))
        # print(f"\033[1;34m[调试]\033[0m获取知识库内容结果: {results}")
        if not results:
            logger.debug(f"[知识库查询] 未找到相似度大于 {threshold} 的结果")
            return ""
        # 记录每个结果的相似度
        for result in results:
            logger.debug(f"[知识库查询] 找到相似内容 [相似度: {result['similarity']:.3f}]：\n{result['content']}")
        # 返回所有找到的内容，用换行分隔
        return "\n".join(str(result["content"]) for result in results)