DEMO-AGENT/review_agent/llm.py

import json
import logging
from urllib import error, request

from django.conf import settings


class LLMConfigurationError(RuntimeError):
    """Raised when the project has not been configured with a usable LLM provider."""


class LLMRequestError(RuntimeError):
    """Raised when the remote LLM provider call fails."""


logger = logging.getLogger(__name__)


def generate_reply(conversation, user_message: str) -> str:
    """Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""

    if not settings.LLM_API_KEY:
        raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
    if not settings.LLM_MODEL:
        raise LLMConfigurationError("缺少 LLM_MODEL 配置。")

    payload = {
        "model": settings.LLM_MODEL,
        "messages": build_messages(conversation, user_message),
        "temperature": 0.3,
    }
    body = json.dumps(payload).encode("utf-8")
    endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"

    http_request = request.Request(
        endpoint,
        data=body,
        headers={
            "Authorization": f"Bearer {settings.LLM_API_KEY}",
            "Content-Type": "application/json",
        },
        method="POST",
    )

    try:
        with request.urlopen(http_request, timeout=60) as response:
            data = json.loads(response.read().decode("utf-8"))
    except error.HTTPError as exc:
        details = exc.read().decode("utf-8", errors="ignore")
        raise LLMRequestError(f"模型接口调用失败：HTTP {exc.code} {details}") from exc
    except error.URLError as exc:
        raise LLMRequestError(f"模型接口调用失败：{exc.reason}") from exc

    try:
        return data["choices"][0]["message"]["content"].strip()
    except (KeyError, IndexError, TypeError) as exc:
        raise LLMRequestError("模型接口返回格式不符合预期。") from exc


def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
    """Calls the configured chat endpoint with explicit messages and returns assistant text."""

    if not settings.LLM_API_KEY:
        raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
    if not settings.LLM_MODEL:
        raise LLMConfigurationError("缺少 LLM_MODEL 配置。")

    payload = {
        "model": settings.LLM_MODEL,
        "messages": messages,
        "temperature": temperature,
    }
    body = json.dumps(payload).encode("utf-8")
    endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"

    http_request = request.Request(
        endpoint,
        data=body,
        headers={
            "Authorization": f"Bearer {settings.LLM_API_KEY}",
            "Content-Type": "application/json",
        },
        method="POST",
    )

    try:
        with request.urlopen(http_request, timeout=timeout) as response:
            data = json.loads(response.read().decode("utf-8"))
    except error.HTTPError as exc:
        details = exc.read().decode("utf-8", errors="ignore")
        raise LLMRequestError(f"模型接口调用失败：HTTP {exc.code} {details}") from exc
    except error.URLError as exc:
        raise LLMRequestError(f"模型接口调用失败：{exc.reason}") from exc

    try:
        return data["choices"][0]["message"]["content"].strip()
    except (KeyError, IndexError, TypeError) as exc:
        raise LLMRequestError("模型接口返回格式不符合预期。") from exc


def stream_reply(conversation, user_message: str):
    """Streams incremental assistant text from the SiliconFlow chat endpoint."""

    if not settings.LLM_API_KEY:
        raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
    if not settings.LLM_MODEL:
        raise LLMConfigurationError("缺少 LLM_MODEL 配置。")

    payload = {
        "model": settings.LLM_MODEL,
        "messages": build_messages(conversation, user_message),
        "temperature": 0.3,
        "stream": True,
    }
    body = json.dumps(payload).encode("utf-8")
    endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"

    http_request = request.Request(
        endpoint,
        data=body,
        headers={
            "Authorization": f"Bearer {settings.LLM_API_KEY}",
            "Content-Type": "application/json",
        },
        method="POST",
    )

    try:
        with request.urlopen(http_request, timeout=300) as response:
            for raw_line in response:
                line = raw_line.decode("utf-8", errors="ignore").strip()
                if not line or not line.startswith("data:"):
                    continue
                data = line[5:].strip()
                if data == "[DONE]":
                    break
                try:
                    payload = json.loads(data)
                except json.JSONDecodeError:
                    logger.warning("Skipping malformed LLM stream data", extra={"data": data[:200]})
                    continue
                delta = (
                    payload.get("choices", [{}])[0]
                    .get("delta", {})
                    .get("content", "")
                )
                if delta:
                    yield delta
    except error.HTTPError as exc:
        details = exc.read().decode("utf-8", errors="ignore")
        raise LLMRequestError(f"模型接口调用失败：HTTP {exc.code} {details}") from exc
    except error.URLError as exc:
        raise LLMRequestError(f"模型接口调用失败：{exc.reason}") from exc


def build_messages(conversation, latest_user_message: str) -> list[dict[str, str]]:
    """Builds system and conversation history messages for the provider call."""

    messages = [{"role": "system", "content": system_prompt()}]

    for message in conversation.messages.all():
        messages.append({"role": message.role, "content": message.content})

    if not conversation.messages.filter(role="user", content=latest_user_message.strip()).exists():
        messages.append({"role": "user", "content": latest_user_message.strip()})

    return messages


def system_prompt() -> str:
    """Defines the initial assistant behavior for the review workspace."""

    return (
        "你是“审核智能体”，服务于体外诊断试剂临床注册文件准备与审核场景。"
        "你的回答要专业、简洁、结构清楚，优先帮助用户完成法规核查、说明书审核、"
        "风险识别、资料补充建议和审评思路梳理。"
        "当信息不足时，明确指出缺失信息，并给出下一步建议。"
        "除非用户明确要求英文，否则始终使用中文回答。"
    )