180 lines
6.5 KiB
Python
180 lines
6.5 KiB
Python
import json
|
||
import logging
|
||
from urllib import error, request
|
||
|
||
from django.conf import settings
|
||
|
||
|
||
class LLMConfigurationError(RuntimeError):
|
||
"""Raised when the project has not been configured with a usable LLM provider."""
|
||
|
||
|
||
class LLMRequestError(RuntimeError):
|
||
"""Raised when the remote LLM provider call fails."""
|
||
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def generate_reply(conversation, user_message: str) -> str:
|
||
"""Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""
|
||
|
||
if not settings.LLM_API_KEY:
|
||
raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
|
||
if not settings.LLM_MODEL:
|
||
raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
|
||
|
||
payload = {
|
||
"model": settings.LLM_MODEL,
|
||
"messages": build_messages(conversation, user_message),
|
||
"temperature": 0.3,
|
||
}
|
||
body = json.dumps(payload).encode("utf-8")
|
||
endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
|
||
|
||
http_request = request.Request(
|
||
endpoint,
|
||
data=body,
|
||
headers={
|
||
"Authorization": f"Bearer {settings.LLM_API_KEY}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with request.urlopen(http_request, timeout=60) as response:
|
||
data = json.loads(response.read().decode("utf-8"))
|
||
except error.HTTPError as exc:
|
||
details = exc.read().decode("utf-8", errors="ignore")
|
||
raise LLMRequestError(f"模型接口调用失败:HTTP {exc.code} {details}") from exc
|
||
except error.URLError as exc:
|
||
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||
|
||
try:
|
||
return data["choices"][0]["message"]["content"].strip()
|
||
except (KeyError, IndexError, TypeError) as exc:
|
||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||
|
||
|
||
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
|
||
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
|
||
|
||
if not settings.LLM_API_KEY:
|
||
raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
|
||
if not settings.LLM_MODEL:
|
||
raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
|
||
|
||
payload = {
|
||
"model": settings.LLM_MODEL,
|
||
"messages": messages,
|
||
"temperature": temperature,
|
||
}
|
||
body = json.dumps(payload).encode("utf-8")
|
||
endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
|
||
|
||
http_request = request.Request(
|
||
endpoint,
|
||
data=body,
|
||
headers={
|
||
"Authorization": f"Bearer {settings.LLM_API_KEY}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with request.urlopen(http_request, timeout=timeout) as response:
|
||
data = json.loads(response.read().decode("utf-8"))
|
||
except error.HTTPError as exc:
|
||
details = exc.read().decode("utf-8", errors="ignore")
|
||
raise LLMRequestError(f"模型接口调用失败:HTTP {exc.code} {details}") from exc
|
||
except error.URLError as exc:
|
||
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||
|
||
try:
|
||
return data["choices"][0]["message"]["content"].strip()
|
||
except (KeyError, IndexError, TypeError) as exc:
|
||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||
|
||
|
||
def stream_reply(conversation, user_message: str):
|
||
"""Streams incremental assistant text from the SiliconFlow chat endpoint."""
|
||
|
||
if not settings.LLM_API_KEY:
|
||
raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
|
||
if not settings.LLM_MODEL:
|
||
raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
|
||
|
||
payload = {
|
||
"model": settings.LLM_MODEL,
|
||
"messages": build_messages(conversation, user_message),
|
||
"temperature": 0.3,
|
||
"stream": True,
|
||
}
|
||
body = json.dumps(payload).encode("utf-8")
|
||
endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
|
||
|
||
http_request = request.Request(
|
||
endpoint,
|
||
data=body,
|
||
headers={
|
||
"Authorization": f"Bearer {settings.LLM_API_KEY}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with request.urlopen(http_request, timeout=300) as response:
|
||
for raw_line in response:
|
||
line = raw_line.decode("utf-8", errors="ignore").strip()
|
||
if not line or not line.startswith("data:"):
|
||
continue
|
||
data = line[5:].strip()
|
||
if data == "[DONE]":
|
||
break
|
||
try:
|
||
payload = json.loads(data)
|
||
except json.JSONDecodeError:
|
||
logger.warning("Skipping malformed LLM stream data", extra={"data": data[:200]})
|
||
continue
|
||
delta = (
|
||
payload.get("choices", [{}])[0]
|
||
.get("delta", {})
|
||
.get("content", "")
|
||
)
|
||
if delta:
|
||
yield delta
|
||
except error.HTTPError as exc:
|
||
details = exc.read().decode("utf-8", errors="ignore")
|
||
raise LLMRequestError(f"模型接口调用失败:HTTP {exc.code} {details}") from exc
|
||
except error.URLError as exc:
|
||
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||
|
||
|
||
def build_messages(conversation, latest_user_message: str) -> list[dict[str, str]]:
|
||
"""Builds system and conversation history messages for the provider call."""
|
||
|
||
messages = [{"role": "system", "content": system_prompt()}]
|
||
|
||
for message in conversation.messages.all():
|
||
messages.append({"role": message.role, "content": message.content})
|
||
|
||
if not conversation.messages.filter(role="user", content=latest_user_message.strip()).exists():
|
||
messages.append({"role": "user", "content": latest_user_message.strip()})
|
||
|
||
return messages
|
||
|
||
|
||
def system_prompt() -> str:
|
||
"""Defines the initial assistant behavior for the review workspace."""
|
||
|
||
return (
|
||
"你是“审核智能体”,服务于体外诊断试剂临床注册文件准备与审核场景。"
|
||
"你的回答要专业、简洁、结构清楚,优先帮助用户完成法规核查、说明书审核、"
|
||
"风险识别、资料补充建议和审评思路梳理。"
|
||
"当信息不足时,明确指出缺失信息,并给出下一步建议。"
|
||
"除非用户明确要求英文,否则始终使用中文回答。"
|
||
)
|