diff --git a/review_agent/services.py b/review_agent/services.py index 45b1d74..0bd9c7e 100644 --- a/review_agent/services.py +++ b/review_agent/services.py @@ -108,10 +108,13 @@ def send_message(conversation: Conversation, content: str) -> tuple[Message, Mes user_message = append_user_message(conversation, content) knowledge_context = build_knowledge_context(content) - try: - reply_content = generate_reply(conversation, content, knowledge_context=knowledge_context) - except (LLMConfigurationError, LLMRequestError) as exc: - reply_content = f"模型调用失败:{exc}" + if should_refuse_ungrounded_chat(conversation, content, knowledge_context): + reply_content = out_of_scope_reply() + else: + try: + reply_content = generate_reply(conversation, content, knowledge_context=knowledge_context) + except (LLMConfigurationError, LLMRequestError) as exc: + reply_content = f"模型调用失败:{exc}" assistant_message = append_assistant_message(conversation, reply_content) @@ -127,6 +130,31 @@ def stream_message(conversation: Conversation, content: str): user_message = append_user_message(conversation, content) assistant_parts: list[str] = [] + knowledge_context = build_knowledge_context(content) + + if should_refuse_ungrounded_chat(conversation, content, knowledge_context): + reply_content = out_of_scope_reply() + assistant_message = append_assistant_message(conversation, reply_content) + yield sse_event( + "meta", + { + "conversation_id": conversation.pk, + "title": conversation.title or build_conversation_title(content), + "user_message_id": user_message.pk, + "user_message": user_message.content, + }, + ) + yield sse_event("chunk", {"delta": reply_content}) + yield sse_event( + "done", + { + "assistant_message_id": assistant_message.pk, + "conversation_id": conversation.pk, + "title": conversation.title, + }, + ) + return + route = route_message_intent(conversation, content) logger.info( "Stream message started", @@ -395,7 +423,6 @@ def stream_message(conversation: Conversation, content: str): stream_failed = False stream_error = "" - knowledge_context = build_knowledge_context(content) try: for chunk in stream_reply(conversation, content, knowledge_context=knowledge_context): assistant_parts.append(chunk) @@ -497,6 +524,76 @@ def build_knowledge_context(content: str, *, n_results: int = 5) -> str: return "\n\n".join(lines) +def should_refuse_ungrounded_chat( + conversation: Conversation, + content: str, + knowledge_context: str = "", +) -> bool: + if (knowledge_context or "").strip(): + return False + if _is_business_related_question(content): + return False + if _has_active_attachments(conversation): + return False + return True + + +def out_of_scope_reply() -> str: + return ( + "没有在当前启用的知识库材料中找到可依据的内容,且这个问题与当前主营业务无关。" + "为避免编造,我不能直接回答。请先上传或启用相关知识库材料,或改问体外诊断试剂注册资料审核、" + "文件汇总、法规核查、申报填表等业务范围内的问题。" + ) + + +def _is_business_related_question(content: str) -> bool: + normalized = (content or "").lower() + compact = "".join(normalized.split()) + if not compact: + return True + business_keywords = [ + "审核智能体", + "体外诊断", + "ivd", + "nmpa", + "cmde", + "医疗器械", + "注册资料", + "注册申报", + "注册检验", + "注册证", + "申报资料", + "申报文件", + "法规", + "核查", + "审评", + "审核", + "整改", + "风险", + "说明书", + "临床", + "性能", + "安全", + "适用范围", + "预期用途", + "附件", + "文件", + "压缩包", + "目录", + "页数", + "清单", + "汇总", + "模板", + "填表", + "知识库", + "检索", + "报告", + "材料", + "资料", + ] + return any(keyword in compact for keyword in business_keywords) + + def build_filename_matched_document_context(query: str, *, max_chars: int = 12000) -> str: terms = _knowledge_query_terms(query) if not terms: diff --git a/tests/test_chat_knowledge_context.py b/tests/test_chat_knowledge_context.py index a31a0f3..af12f02 100644 --- a/tests/test_chat_knowledge_context.py +++ b/tests/test_chat_knowledge_context.py @@ -1,7 +1,7 @@ import pytest from review_agent.models import KnowledgeBaseDocument -from review_agent.services import build_knowledge_context +from review_agent.services import build_knowledge_context, send_message, stream_message pytestmark = pytest.mark.django_db @@ -57,3 +57,67 @@ def test_build_knowledge_context_uses_full_document_when_name_matches(settings, assert "全文材料" in context assert "来源:用户知识库/孙之烨-260510.txt" in context assert "完整经历:曾组织技术分享并带队参加竞赛" in context + + +def test_send_message_refuses_out_of_scope_answer_without_knowledge_context(monkeypatch, django_user_model): + from review_agent.models import Conversation + + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + monkeypatch.setattr( + "review_agent.services.search_knowledge_base", + lambda query, n_results=5: {"query": query, "results": [], "error_message": ""}, + ) + monkeypatch.setattr( + "review_agent.services.generate_reply", + lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call LLM"), + ) + + _, assistant_message = send_message(conversation, "孙之烨是谁") + + assert "没有在当前启用的知识库材料中找到" in assistant_message.content + assert "与当前主营业务无关" in assistant_message.content + + +def test_stream_message_refuses_out_of_scope_answer_without_knowledge_context(monkeypatch, django_user_model): + from review_agent.models import Conversation + + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + monkeypatch.setattr( + "review_agent.services.search_knowledge_base", + lambda query, n_results=5: {"query": query, "results": [], "error_message": ""}, + ) + monkeypatch.setattr( + "review_agent.services.stream_reply", + lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call streaming LLM"), + ) + monkeypatch.setattr( + "review_agent.services.generate_reply", + lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call fallback LLM"), + ) + + frames = list(stream_message(conversation, "给我一份红烧肉菜谱")) + + assert any("没有在当前启用的知识库材料中找到" in frame for frame in frames) + assert any("与当前主营业务无关" in frame for frame in frames) + assert any("done" in frame for frame in frames) + + +def test_business_question_without_knowledge_context_can_use_llm(monkeypatch, django_user_model): + from review_agent.models import Conversation + + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + monkeypatch.setattr( + "review_agent.services.search_knowledge_base", + lambda query, n_results=5: {"query": query, "results": [], "error_message": ""}, + ) + monkeypatch.setattr( + "review_agent.services.generate_reply", + lambda *args, **kwargs: "注册检验报告通常用于证明产品性能符合要求。", + ) + + _, assistant_message = send_message(conversation, "注册检验报告有什么作用") + + assert "注册检验报告" in assistant_message.content diff --git a/tests/test_file_summary_workflow.py b/tests/test_file_summary_workflow.py index 9822751..8db3ac1 100644 --- a/tests/test_file_summary_workflow.py +++ b/tests/test_file_summary_workflow.py @@ -286,7 +286,7 @@ def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkey lambda conversation, content, knowledge_context="": "非流式完整回复", ) - frames = list(stream_message(conversation, "普通问题")) + frames = list(stream_message(conversation, "注册检验报告审核要点有哪些")) joined = "".join(frames) assert "已生成部分内容" in joined