去除缓存区
This commit is contained in:
parent
5350860f78
commit
ccf4e4cedc
196
main.py
196
main.py
@ -56,7 +56,7 @@ class DifyModelManager:
|
|||||||
headers=headers,
|
headers=headers,
|
||||||
params={"user": "default_user"}
|
params={"user": "default_user"}
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
app_info = response.json()
|
app_info = response.json()
|
||||||
return app_info.get("name", "Unknown App")
|
return app_info.get("name", "Unknown App")
|
||||||
@ -71,7 +71,7 @@ class DifyModelManager:
|
|||||||
"""刷新所有应用信息"""
|
"""刷新所有应用信息"""
|
||||||
self.name_to_api_key.clear()
|
self.name_to_api_key.clear()
|
||||||
self.api_key_to_name.clear()
|
self.api_key_to_name.clear()
|
||||||
|
|
||||||
for api_key in self.api_keys:
|
for api_key in self.api_keys:
|
||||||
app_name = await self.fetch_app_info(api_key)
|
app_name = await self.fetch_app_info(api_key)
|
||||||
if app_name:
|
if app_name:
|
||||||
@ -112,14 +112,14 @@ def get_api_key(model_name):
|
|||||||
|
|
||||||
def transform_openai_to_dify(openai_request, endpoint):
|
def transform_openai_to_dify(openai_request, endpoint):
|
||||||
"""将OpenAI格式的请求转换为Dify格式"""
|
"""将OpenAI格式的请求转换为Dify格式"""
|
||||||
|
|
||||||
if endpoint == "/chat/completions":
|
if endpoint == "/chat/completions":
|
||||||
messages = openai_request.get("messages", [])
|
messages = openai_request.get("messages", [])
|
||||||
stream = openai_request.get("stream", False)
|
stream = openai_request.get("stream", False)
|
||||||
|
|
||||||
# 尝试从历史消息中提取conversation_id
|
# 尝试从历史消息中提取conversation_id
|
||||||
conversation_id = None
|
conversation_id = None
|
||||||
|
|
||||||
# 提取system消息内容
|
# 提取system消息内容
|
||||||
system_content = ""
|
system_content = ""
|
||||||
system_messages = [msg for msg in messages if msg.get("role") == "system"]
|
system_messages = [msg for msg in messages if msg.get("role") == "system"]
|
||||||
@ -127,7 +127,7 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
system_content = system_messages[0].get("content", "")
|
system_content = system_messages[0].get("content", "")
|
||||||
# 记录找到的system消息
|
# 记录找到的system消息
|
||||||
logger.info(f"Found system message: {system_content[:100]}{'...' if len(system_content) > 100 else ''}")
|
logger.info(f"Found system message: {system_content[:100]}{'...' if len(system_content) > 100 else ''}")
|
||||||
|
|
||||||
if CONVERSATION_MEMORY_MODE == 2: # 零宽字符模式
|
if CONVERSATION_MEMORY_MODE == 2: # 零宽字符模式
|
||||||
if len(messages) > 1:
|
if len(messages) > 1:
|
||||||
# 遍历历史消息,找到最近的assistant消息
|
# 遍历历史消息,找到最近的assistant消息
|
||||||
@ -138,15 +138,15 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
conversation_id = decode_conversation_id(content)
|
conversation_id = decode_conversation_id(content)
|
||||||
if conversation_id:
|
if conversation_id:
|
||||||
break
|
break
|
||||||
|
|
||||||
# 获取最后一条用户消息
|
# 获取最后一条用户消息
|
||||||
user_query = messages[-1]["content"] if messages and messages[-1].get("role") != "system" else ""
|
user_query = messages[-1]["content"] if messages and messages[-1].get("role") != "system" else ""
|
||||||
|
|
||||||
# 如果有system消息且是首次对话(没有conversation_id),则将system内容添加到用户查询前
|
# 如果有system消息且是首次对话(没有conversation_id),则将system内容添加到用户查询前
|
||||||
if system_content and not conversation_id:
|
if system_content and not conversation_id:
|
||||||
user_query = f"系统指令: {system_content}\n\n用户问题: {user_query}"
|
user_query = f"系统指令: {system_content}\n\n用户问题: {user_query}"
|
||||||
logger.info(f"[零宽字符模式] 首次对话,添加system内容到查询前")
|
logger.info(f"[零宽字符模式] 首次对话,添加system内容到查询前")
|
||||||
|
|
||||||
dify_request = {
|
dify_request = {
|
||||||
"inputs": {},
|
"inputs": {},
|
||||||
"query": user_query,
|
"query": user_query,
|
||||||
@ -157,12 +157,12 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
else: # history_message模式(默认)
|
else: # history_message模式(默认)
|
||||||
# 获取最后一条用户消息
|
# 获取最后一条用户消息
|
||||||
user_query = messages[-1]["content"] if messages and messages[-1].get("role") != "system" else ""
|
user_query = messages[-1]["content"] if messages and messages[-1].get("role") != "system" else ""
|
||||||
|
|
||||||
# 构造历史消息
|
# 构造历史消息
|
||||||
if len(messages) > 1:
|
if len(messages) > 1:
|
||||||
history_messages = []
|
history_messages = []
|
||||||
has_system_in_history = False
|
has_system_in_history = False
|
||||||
|
|
||||||
# 检查历史消息中是否已经包含system消息
|
# 检查历史消息中是否已经包含system消息
|
||||||
for msg in messages[:-1]: # 除了最后一条消息
|
for msg in messages[:-1]: # 除了最后一条消息
|
||||||
role = msg.get("role", "")
|
role = msg.get("role", "")
|
||||||
@ -171,12 +171,12 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
if role == "system":
|
if role == "system":
|
||||||
has_system_in_history = True
|
has_system_in_history = True
|
||||||
history_messages.append(f"{role}: {content}")
|
history_messages.append(f"{role}: {content}")
|
||||||
|
|
||||||
# 如果历史中没有system消息但现在有system消息,则添加到历史的最前面
|
# 如果历史中没有system消息但现在有system消息,则添加到历史的最前面
|
||||||
if system_content and not has_system_in_history:
|
if system_content and not has_system_in_history:
|
||||||
history_messages.insert(0, f"system: {system_content}")
|
history_messages.insert(0, f"system: {system_content}")
|
||||||
logger.info(f"[history_message模式] 添加system内容到历史消息前")
|
logger.info(f"[history_message模式] 添加system内容到历史消息前")
|
||||||
|
|
||||||
# 将历史消息添加到查询中
|
# 将历史消息添加到查询中
|
||||||
if history_messages:
|
if history_messages:
|
||||||
history_context = "\n\n".join(history_messages)
|
history_context = "\n\n".join(history_messages)
|
||||||
@ -184,7 +184,7 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
elif system_content: # 没有历史消息但有system消息
|
elif system_content: # 没有历史消息但有system消息
|
||||||
user_query = f"系统指令: {system_content}\n\n用户问题: {user_query}"
|
user_query = f"系统指令: {system_content}\n\n用户问题: {user_query}"
|
||||||
logger.info(f"[history_message模式] 首次对话,添加system内容到查询前")
|
logger.info(f"[history_message模式] 首次对话,添加system内容到查询前")
|
||||||
|
|
||||||
dify_request = {
|
dify_request = {
|
||||||
"inputs": {},
|
"inputs": {},
|
||||||
"query": user_query,
|
"query": user_query,
|
||||||
@ -193,21 +193,21 @@ def transform_openai_to_dify(openai_request, endpoint):
|
|||||||
}
|
}
|
||||||
|
|
||||||
return dify_request
|
return dify_request
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def transform_dify_to_openai(dify_response, model="claude-3-5-sonnet-v2", stream=False):
|
def transform_dify_to_openai(dify_response, model="claude-3-5-sonnet-v2", stream=False):
|
||||||
"""将Dify格式的响应转换为OpenAI格式"""
|
"""将Dify格式的响应转换为OpenAI格式"""
|
||||||
|
|
||||||
if not stream:
|
if not stream:
|
||||||
# 首先获取回答内容,支持不同的响应模式
|
# 首先获取回答内容,支持不同的响应模式
|
||||||
answer = ""
|
answer = ""
|
||||||
mode = dify_response.get("mode", "")
|
mode = dify_response.get("mode", "")
|
||||||
|
|
||||||
# 普通聊天模式
|
# 普通聊天模式
|
||||||
if "answer" in dify_response:
|
if "answer" in dify_response:
|
||||||
answer = dify_response.get("answer", "")
|
answer = dify_response.get("answer", "")
|
||||||
|
|
||||||
# 如果是Agent模式,需要从agent_thoughts中提取回答
|
# 如果是Agent模式,需要从agent_thoughts中提取回答
|
||||||
elif "agent_thoughts" in dify_response:
|
elif "agent_thoughts" in dify_response:
|
||||||
# Agent模式下通常最后一个thought包含最终答案
|
# Agent模式下通常最后一个thought包含最终答案
|
||||||
@ -216,12 +216,12 @@ def transform_dify_to_openai(dify_response, model="claude-3-5-sonnet-v2", stream
|
|||||||
for thought in agent_thoughts:
|
for thought in agent_thoughts:
|
||||||
if thought.get("thought"):
|
if thought.get("thought"):
|
||||||
answer = thought.get("thought", "")
|
answer = thought.get("thought", "")
|
||||||
|
|
||||||
# 只在零宽字符会话记忆模式时处理conversation_id
|
# 只在零宽字符会话记忆模式时处理conversation_id
|
||||||
if CONVERSATION_MEMORY_MODE == 2:
|
if CONVERSATION_MEMORY_MODE == 2:
|
||||||
conversation_id = dify_response.get("conversation_id", "")
|
conversation_id = dify_response.get("conversation_id", "")
|
||||||
history = dify_response.get("conversation_history", [])
|
history = dify_response.get("conversation_history", [])
|
||||||
|
|
||||||
# 检查历史消息中是否已经有会话ID
|
# 检查历史消息中是否已经有会话ID
|
||||||
has_conversation_id = False
|
has_conversation_id = False
|
||||||
if history:
|
if history:
|
||||||
@ -231,14 +231,14 @@ def transform_dify_to_openai(dify_response, model="claude-3-5-sonnet-v2", stream
|
|||||||
if decode_conversation_id(content) is not None:
|
if decode_conversation_id(content) is not None:
|
||||||
has_conversation_id = True
|
has_conversation_id = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# 只在新会话且历史消息中没有会话ID时插入
|
# 只在新会话且历史消息中没有会话ID时插入
|
||||||
if conversation_id and not has_conversation_id:
|
if conversation_id and not has_conversation_id:
|
||||||
logger.info(f"[Debug] Inserting conversation_id: {conversation_id}, history_length: {len(history)}")
|
logger.info(f"[Debug] Inserting conversation_id: {conversation_id}, history_length: {len(history)}")
|
||||||
encoded = encode_conversation_id(conversation_id)
|
encoded = encode_conversation_id(conversation_id)
|
||||||
answer = answer + encoded
|
answer = answer + encoded
|
||||||
logger.info(f"[Debug] Response content after insertion: {repr(answer)}")
|
logger.info(f"[Debug] Response content after insertion: {repr(answer)}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"id": dify_response.get("message_id", ""),
|
"id": dify_response.get("message_id", ""),
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
@ -277,11 +277,11 @@ def encode_conversation_id(conversation_id):
|
|||||||
"""将conversation_id编码为不可见的字符序列"""
|
"""将conversation_id编码为不可见的字符序列"""
|
||||||
if not conversation_id:
|
if not conversation_id:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# 使用Base64编码减少长度
|
# 使用Base64编码减少长度
|
||||||
import base64
|
import base64
|
||||||
encoded = base64.b64encode(conversation_id.encode()).decode()
|
encoded = base64.b64encode(conversation_id.encode()).decode()
|
||||||
|
|
||||||
# 使用8种不同的零宽字符表示3位数字
|
# 使用8种不同的零宽字符表示3位数字
|
||||||
# 这样可以将编码长度进一步减少
|
# 这样可以将编码长度进一步减少
|
||||||
char_map = {
|
char_map = {
|
||||||
@ -294,7 +294,7 @@ def encode_conversation_id(conversation_id):
|
|||||||
'6': '\u2061', # 函数应用
|
'6': '\u2061', # 函数应用
|
||||||
'7': '\u2062', # 不可见乘号
|
'7': '\u2062', # 不可见乘号
|
||||||
}
|
}
|
||||||
|
|
||||||
# 将Base64字符串转换为八进制数字
|
# 将Base64字符串转换为八进制数字
|
||||||
result = []
|
result = []
|
||||||
for c in encoded:
|
for c in encoded:
|
||||||
@ -312,14 +312,14 @@ def encode_conversation_id(conversation_id):
|
|||||||
val = 63
|
val = 63
|
||||||
else: # '='
|
else: # '='
|
||||||
val = 0
|
val = 0
|
||||||
|
|
||||||
# 每个Base64字符可以产生2个3位数字
|
# 每个Base64字符可以产生2个3位数字
|
||||||
first = (val >> 3) & 0x7
|
first = (val >> 3) & 0x7
|
||||||
second = val & 0x7
|
second = val & 0x7
|
||||||
result.append(char_map[str(first)])
|
result.append(char_map[str(first)])
|
||||||
if c != '=': # 不编码填充字符的后半部分
|
if c != '=': # 不编码填充字符的后半部分
|
||||||
result.append(char_map[str(second)])
|
result.append(char_map[str(second)])
|
||||||
|
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
def decode_conversation_id(content):
|
def decode_conversation_id(content):
|
||||||
@ -336,17 +336,17 @@ def decode_conversation_id(content):
|
|||||||
'\u2061': '6', # 函数应用
|
'\u2061': '6', # 函数应用
|
||||||
'\u2062': '7', # 不可见乘号
|
'\u2062': '7', # 不可见乘号
|
||||||
}
|
}
|
||||||
|
|
||||||
# 提取最后一段零宽字符序列
|
# 提取最后一段零宽字符序列
|
||||||
space_chars = []
|
space_chars = []
|
||||||
for c in reversed(content):
|
for c in reversed(content):
|
||||||
if c not in char_to_val:
|
if c not in char_to_val:
|
||||||
break
|
break
|
||||||
space_chars.append(c)
|
space_chars.append(c)
|
||||||
|
|
||||||
if not space_chars:
|
if not space_chars:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 将零宽字符转换回Base64字符串
|
# 将零宽字符转换回Base64字符串
|
||||||
space_chars.reverse()
|
space_chars.reverse()
|
||||||
base64_chars = []
|
base64_chars = []
|
||||||
@ -357,7 +357,7 @@ def decode_conversation_id(content):
|
|||||||
val = (first << 3) | second
|
val = (first << 3) | second
|
||||||
else:
|
else:
|
||||||
val = first << 3
|
val = first << 3
|
||||||
|
|
||||||
# 转换回Base64字符
|
# 转换回Base64字符
|
||||||
if val < 26:
|
if val < 26:
|
||||||
base64_chars.append(chr(val + ord('A')))
|
base64_chars.append(chr(val + ord('A')))
|
||||||
@ -369,17 +369,17 @@ def decode_conversation_id(content):
|
|||||||
base64_chars.append('+')
|
base64_chars.append('+')
|
||||||
else:
|
else:
|
||||||
base64_chars.append('/')
|
base64_chars.append('/')
|
||||||
|
|
||||||
# 添加Base64填充
|
# 添加Base64填充
|
||||||
padding = len(base64_chars) % 4
|
padding = len(base64_chars) % 4
|
||||||
if padding:
|
if padding:
|
||||||
base64_chars.extend(['='] * (4 - padding))
|
base64_chars.extend(['='] * (4 - padding))
|
||||||
|
|
||||||
# 解码Base64字符串
|
# 解码Base64字符串
|
||||||
import base64
|
import base64
|
||||||
base64_str = ''.join(base64_chars)
|
base64_str = ''.join(base64_chars)
|
||||||
return base64.b64decode(base64_str).decode()
|
return base64.b64decode(base64_str).decode()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Failed to decode conversation_id: {e}")
|
logger.debug(f"Failed to decode conversation_id: {e}")
|
||||||
return None
|
return None
|
||||||
@ -424,9 +424,9 @@ def chat_completions():
|
|||||||
# 继续处理原始逻辑
|
# 继续处理原始逻辑
|
||||||
openai_request = request.get_json()
|
openai_request = request.get_json()
|
||||||
logger.info(f"Received request: {json.dumps(openai_request, ensure_ascii=False)}")
|
logger.info(f"Received request: {json.dumps(openai_request, ensure_ascii=False)}")
|
||||||
|
|
||||||
model = openai_request.get("model", "claude-3-5-sonnet-v2")
|
model = openai_request.get("model", "claude-3-5-sonnet-v2")
|
||||||
|
|
||||||
# 验证模型是否支持
|
# 验证模型是否支持
|
||||||
api_key = get_api_key(model)
|
api_key = get_api_key(model)
|
||||||
if not api_key:
|
if not api_key:
|
||||||
@ -439,9 +439,9 @@ def chat_completions():
|
|||||||
"code": "model_not_found"
|
"code": "model_not_found"
|
||||||
}
|
}
|
||||||
}, 404
|
}, 404
|
||||||
|
|
||||||
dify_request = transform_openai_to_dify(openai_request, "/chat/completions")
|
dify_request = transform_openai_to_dify(openai_request, "/chat/completions")
|
||||||
|
|
||||||
if not dify_request:
|
if not dify_request:
|
||||||
logger.error("Failed to transform request")
|
logger.error("Failed to transform request")
|
||||||
return {
|
return {
|
||||||
@ -463,11 +463,11 @@ def chat_completions():
|
|||||||
if stream:
|
if stream:
|
||||||
def generate():
|
def generate():
|
||||||
client = httpx.Client(timeout=None)
|
client = httpx.Client(timeout=None)
|
||||||
|
|
||||||
def flush_chunk(chunk_data):
|
def flush_chunk(chunk_data):
|
||||||
"""Helper function to flush chunks immediately"""
|
"""Helper function to flush chunks immediately"""
|
||||||
return chunk_data.encode('utf-8')
|
return chunk_data.encode('utf-8')
|
||||||
|
|
||||||
def calculate_delay(buffer_size):
|
def calculate_delay(buffer_size):
|
||||||
"""
|
"""
|
||||||
根据缓冲区大小动态计算延迟
|
根据缓冲区大小动态计算延迟
|
||||||
@ -481,7 +481,7 @@ def chat_completions():
|
|||||||
return 0.01 # 20ms延迟
|
return 0.01 # 20ms延迟
|
||||||
else: # 内容很少,使用较慢的速度
|
else: # 内容很少,使用较慢的速度
|
||||||
return 0.02 # 30ms延迟
|
return 0.02 # 30ms延迟
|
||||||
|
|
||||||
def send_char(char, message_id):
|
def send_char(char, message_id):
|
||||||
"""Helper function to send single character"""
|
"""Helper function to send single character"""
|
||||||
openai_chunk = {
|
openai_chunk = {
|
||||||
@ -499,10 +499,10 @@ def chat_completions():
|
|||||||
}
|
}
|
||||||
chunk_data = f"data: {json.dumps(openai_chunk)}\n\n"
|
chunk_data = f"data: {json.dumps(openai_chunk)}\n\n"
|
||||||
return flush_chunk(chunk_data)
|
return flush_chunk(chunk_data)
|
||||||
|
|
||||||
# 初始化缓冲区
|
# 初始化缓冲区
|
||||||
output_buffer = []
|
output_buffer = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with client.stream(
|
with client.stream(
|
||||||
'POST',
|
'POST',
|
||||||
@ -517,74 +517,76 @@ def chat_completions():
|
|||||||
) as response:
|
) as response:
|
||||||
generate.message_id = None
|
generate.message_id = None
|
||||||
buffer = ""
|
buffer = ""
|
||||||
|
|
||||||
for raw_bytes in response.iter_raw():
|
for raw_bytes in response.iter_raw():
|
||||||
if not raw_bytes:
|
if not raw_bytes:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
buffer += raw_bytes.decode('utf-8')
|
buffer += raw_bytes.decode('utf-8')
|
||||||
|
|
||||||
while '\n' in buffer:
|
while '\n' in buffer:
|
||||||
line, buffer = buffer.split('\n', 1)
|
line, buffer = buffer.split('\n', 1)
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
||||||
if not line or not line.startswith('data: '):
|
if not line or not line.startswith('data: '):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
json_str = line[6:]
|
json_str = line[6:]
|
||||||
dify_chunk = json.loads(json_str)
|
dify_chunk = json.loads(json_str)
|
||||||
|
|
||||||
if dify_chunk.get("event") == "message" and "answer" in dify_chunk:
|
if dify_chunk.get("event") == "message" and "answer" in dify_chunk:
|
||||||
current_answer = dify_chunk["answer"]
|
current_answer = dify_chunk["answer"]
|
||||||
if not current_answer:
|
if not current_answer:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
message_id = dify_chunk.get("message_id", "")
|
message_id = dify_chunk.get("message_id", "")
|
||||||
if not generate.message_id:
|
if not generate.message_id:
|
||||||
generate.message_id = message_id
|
generate.message_id = message_id
|
||||||
|
|
||||||
# 将当前批次的字符添加到输出缓冲区
|
# # 将当前批次的字符添加到输出缓冲区
|
||||||
for char in current_answer:
|
# for char in current_answer:
|
||||||
output_buffer.append((char, generate.message_id))
|
# output_buffer.append((char, generate.message_id))
|
||||||
|
|
||||||
# 根据缓冲区大小动态调整输出速度
|
# # 根据缓冲区大小动态调整输出速度
|
||||||
while output_buffer:
|
# while output_buffer:
|
||||||
char, msg_id = output_buffer.pop(0)
|
# char, msg_id = output_buffer.pop(0)
|
||||||
yield send_char(char, msg_id)
|
# yield send_char(char, msg_id)
|
||||||
# 根据剩余缓冲区大小计算延迟
|
# # 根据剩余缓冲区大小计算延迟
|
||||||
delay = calculate_delay(len(output_buffer))
|
# delay = calculate_delay(len(output_buffer))
|
||||||
time.sleep(delay)
|
# time.sleep(delay)
|
||||||
|
yield send_char(current_answer, message_id)
|
||||||
|
|
||||||
# 立即继续处理下一个请求
|
# 立即继续处理下一个请求
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 处理Agent模式的消息事件
|
# 处理Agent模式的消息事件
|
||||||
elif dify_chunk.get("event") == "agent_message" and "answer" in dify_chunk:
|
elif dify_chunk.get("event") == "agent_message" and "answer" in dify_chunk:
|
||||||
current_answer = dify_chunk["answer"]
|
current_answer = dify_chunk["answer"]
|
||||||
if not current_answer:
|
if not current_answer:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
message_id = dify_chunk.get("message_id", "")
|
message_id = dify_chunk.get("message_id", "")
|
||||||
if not generate.message_id:
|
if not generate.message_id:
|
||||||
generate.message_id = message_id
|
generate.message_id = message_id
|
||||||
|
|
||||||
# 将当前批次的字符添加到输出缓冲区
|
# # 将当前批次的字符添加到输出缓冲区
|
||||||
for char in current_answer:
|
# for char in current_answer:
|
||||||
output_buffer.append((char, generate.message_id))
|
# output_buffer.append((char, generate.message_id))
|
||||||
|
|
||||||
# 根据缓冲区大小动态调整输出速度
|
# # 根据缓冲区大小动态调整输出速度
|
||||||
while output_buffer:
|
# while output_buffer:
|
||||||
char, msg_id = output_buffer.pop(0)
|
# char, msg_id = output_buffer.pop(0)
|
||||||
yield send_char(char, msg_id)
|
# yield send_char(char, msg_id)
|
||||||
# 根据剩余缓冲区大小计算延迟
|
# # 根据剩余缓冲区大小计算延迟
|
||||||
delay = calculate_delay(len(output_buffer))
|
# delay = calculate_delay(len(output_buffer))
|
||||||
time.sleep(delay)
|
# time.sleep(delay)
|
||||||
|
yield send_char(current_answer, message_id)
|
||||||
|
|
||||||
# 立即继续处理下一个请求
|
# 立即继续处理下一个请求
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 处理Agent的思考过程,记录日志但不输出给用户
|
# 处理Agent的思考过程,记录日志但不输出给用户
|
||||||
elif dify_chunk.get("event") == "agent_thought":
|
elif dify_chunk.get("event") == "agent_thought":
|
||||||
thought_id = dify_chunk.get("id", "")
|
thought_id = dify_chunk.get("id", "")
|
||||||
@ -592,7 +594,7 @@ def chat_completions():
|
|||||||
tool = dify_chunk.get("tool", "")
|
tool = dify_chunk.get("tool", "")
|
||||||
tool_input = dify_chunk.get("tool_input", "")
|
tool_input = dify_chunk.get("tool_input", "")
|
||||||
observation = dify_chunk.get("observation", "")
|
observation = dify_chunk.get("observation", "")
|
||||||
|
|
||||||
logger.info(f"[Agent Thought] ID: {thought_id}, Tool: {tool}")
|
logger.info(f"[Agent Thought] ID: {thought_id}, Tool: {tool}")
|
||||||
if thought:
|
if thought:
|
||||||
logger.info(f"[Agent Thought] Thought: {thought}")
|
logger.info(f"[Agent Thought] Thought: {thought}")
|
||||||
@ -600,35 +602,35 @@ def chat_completions():
|
|||||||
logger.info(f"[Agent Thought] Tool Input: {tool_input}")
|
logger.info(f"[Agent Thought] Tool Input: {tool_input}")
|
||||||
if observation:
|
if observation:
|
||||||
logger.info(f"[Agent Thought] Observation: {observation}")
|
logger.info(f"[Agent Thought] Observation: {observation}")
|
||||||
|
|
||||||
# 获取message_id以关联思考和最终输出
|
# 获取message_id以关联思考和最终输出
|
||||||
message_id = dify_chunk.get("message_id", "")
|
message_id = dify_chunk.get("message_id", "")
|
||||||
if not generate.message_id and message_id:
|
if not generate.message_id and message_id:
|
||||||
generate.message_id = message_id
|
generate.message_id = message_id
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 处理消息中的文件(如图片),记录日志但不直接输出给用户
|
# 处理消息中的文件(如图片),记录日志但不直接输出给用户
|
||||||
elif dify_chunk.get("event") == "message_file":
|
elif dify_chunk.get("event") == "message_file":
|
||||||
file_id = dify_chunk.get("id", "")
|
file_id = dify_chunk.get("id", "")
|
||||||
file_type = dify_chunk.get("type", "")
|
file_type = dify_chunk.get("type", "")
|
||||||
file_url = dify_chunk.get("url", "")
|
file_url = dify_chunk.get("url", "")
|
||||||
|
|
||||||
logger.info(f"[Message File] ID: {file_id}, Type: {file_type}, URL: {file_url}")
|
logger.info(f"[Message File] ID: {file_id}, Type: {file_type}, URL: {file_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif dify_chunk.get("event") == "message_end":
|
elif dify_chunk.get("event") == "message_end":
|
||||||
# 快速输出剩余内容
|
# 快速输出剩余内容
|
||||||
while output_buffer:
|
while output_buffer:
|
||||||
char, msg_id = output_buffer.pop(0)
|
char, msg_id = output_buffer.pop(0)
|
||||||
yield send_char(char, msg_id)
|
yield send_char(char, msg_id)
|
||||||
time.sleep(0.001) # 固定使用最小延迟快速输出剩余内容
|
time.sleep(0.001) # 固定使用最小延迟快速输出剩余内容
|
||||||
|
|
||||||
# 只在零宽字符会话记忆模式时处理conversation_id
|
# 只在零宽字符会话记忆模式时处理conversation_id
|
||||||
if CONVERSATION_MEMORY_MODE == 2:
|
if CONVERSATION_MEMORY_MODE == 2:
|
||||||
conversation_id = dify_chunk.get("conversation_id")
|
conversation_id = dify_chunk.get("conversation_id")
|
||||||
history = dify_chunk.get("conversation_history", [])
|
history = dify_chunk.get("conversation_history", [])
|
||||||
|
|
||||||
has_conversation_id = False
|
has_conversation_id = False
|
||||||
if history:
|
if history:
|
||||||
for msg in history:
|
for msg in history:
|
||||||
@ -637,7 +639,7 @@ def chat_completions():
|
|||||||
if decode_conversation_id(content) is not None:
|
if decode_conversation_id(content) is not None:
|
||||||
has_conversation_id = True
|
has_conversation_id = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# 只在新会话且历史消息中没有会话ID时插入
|
# 只在新会话且历史消息中没有会话ID时插入
|
||||||
if conversation_id and not has_conversation_id:
|
if conversation_id and not has_conversation_id:
|
||||||
logger.info(f"[Debug] Inserting conversation_id in stream: {conversation_id}")
|
logger.info(f"[Debug] Inserting conversation_id in stream: {conversation_id}")
|
||||||
@ -645,7 +647,7 @@ def chat_completions():
|
|||||||
logger.info(f"[Debug] Stream encoded content: {repr(encoded)}")
|
logger.info(f"[Debug] Stream encoded content: {repr(encoded)}")
|
||||||
for char in encoded:
|
for char in encoded:
|
||||||
yield send_char(char, generate.message_id)
|
yield send_char(char, generate.message_id)
|
||||||
|
|
||||||
final_chunk = {
|
final_chunk = {
|
||||||
"id": generate.message_id,
|
"id": generate.message_id,
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -659,11 +661,11 @@ def chat_completions():
|
|||||||
}
|
}
|
||||||
yield flush_chunk(f"data: {json.dumps(final_chunk)}\n\n")
|
yield flush_chunk(f"data: {json.dumps(final_chunk)}\n\n")
|
||||||
yield flush_chunk("data: [DONE]\n\n")
|
yield flush_chunk("data: [DONE]\n\n")
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.error(f"JSON decode error: {str(e)}")
|
logger.error(f"JSON decode error: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing chunk: {str(e)}")
|
logger.error(f"Error processing chunk: {str(e)}")
|
||||||
continue
|
continue
|
||||||
@ -692,7 +694,7 @@ def chat_completions():
|
|||||||
json=dify_request,
|
json=dify_request,
|
||||||
headers=headers
|
headers=headers
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
error_msg = f"Dify API error: {response.text}"
|
error_msg = f"Dify API error: {response.text}"
|
||||||
logger.error(f"Request failed: {error_msg}")
|
logger.error(f"Request failed: {error_msg}")
|
||||||
@ -746,13 +748,13 @@ def chat_completions():
|
|||||||
def list_models():
|
def list_models():
|
||||||
"""返回可用的模型列表"""
|
"""返回可用的模型列表"""
|
||||||
logger.info("Listing available models")
|
logger.info("Listing available models")
|
||||||
|
|
||||||
# 刷新模型信息
|
# 刷新模型信息
|
||||||
asyncio.run(model_manager.refresh_model_info())
|
asyncio.run(model_manager.refresh_model_info())
|
||||||
|
|
||||||
# 获取可用模型列表
|
# 获取可用模型列表
|
||||||
available_models = model_manager.get_available_models()
|
available_models = model_manager.get_available_models()
|
||||||
|
|
||||||
response = {
|
response = {
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": available_models
|
"data": available_models
|
||||||
@ -764,10 +766,10 @@ def list_models():
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if not VALID_API_KEYS:
|
if not VALID_API_KEYS:
|
||||||
print("Warning: No API keys configured. Set the VALID_API_KEYS environment variable with comma-separated keys.")
|
print("Warning: No API keys configured. Set the VALID_API_KEYS environment variable with comma-separated keys.")
|
||||||
|
|
||||||
# 启动时初始化模型信息
|
# 启动时初始化模型信息
|
||||||
asyncio.run(model_manager.refresh_model_info())
|
asyncio.run(model_manager.refresh_model_info())
|
||||||
|
|
||||||
host = os.getenv("SERVER_HOST", "127.0.0.1")
|
host = os.getenv("SERVER_HOST", "127.0.0.1")
|
||||||
port = int(os.getenv("SERVER_PORT", 5000))
|
port = int(os.getenv("SERVER_PORT", 5000))
|
||||||
logger.info(f"Starting server on http://{host}:{port}")
|
logger.info(f"Starting server on http://{host}:{port}")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user