Compare commits

..

No commits in common. "test" and "main" have entirely different histories.
test ... main

4 changed files with 95 additions and 142 deletions

View File

@ -1,5 +0,0 @@
.env.example
.git
.gitignore
*.md
images/

View File

@ -1,25 +0,0 @@
# 使用官方的 Python 基础镜像
# 推荐使用特定版本,例如 python:3.9-slim-buster更小更安全
FROM python:3.12-bookworm
# 设置工作目录,后续所有命令都将在此目录下执行
WORKDIR /app
# 将 requirements.txt 复制到工作目录,并安装 Python 依赖
# 这一步单独进行,利用 Docker 缓存机制,如果依赖不变,则不需要重新安装
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 将所有应用代码复制到容器中
COPY . .
# 暴露 Flask 应用监听的端口
# 注意EXPOSE 只是声明端口,不会实际发布端口,需要在运行容器时进行端口映射
EXPOSE ${SERVER_PORT}
# 定义容器启动时执行的命令
# 这里使用 Gunicorn 作为生产级的 WSGI 服务器,而不是 Flask 内置的开发服务器
# 你需要先在 requirements.txt 中添加 gunicorn
# CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "app:app"]
# 如果只是测试或简单应用,也可以直接用 Flask 开发服务器
CMD ["python", "main.py"]

View File

@ -1,11 +0,0 @@
services:
web:
build: ./ # 指定 Dockerfile 的构建上下文路径
container_name: OpenDify
ports:
- "${SERVER_PORT}:${SERVER_PORT}" # 端口映射:主机端口:容器端口
restart: always
volumes:
- ./:/app # 挂载本地代码到容器,方便开发时修改代码立即生效
environment: # 环境变量,例如 Flask 的开发模式
FLASK_ENV: development

48
main.py
View File

@ -545,23 +545,18 @@ def chat_completions():
if not generate.message_id:
generate.message_id = message_id
# # 将当前批次的字符添加到输出缓冲区
# for char in current_answer:
# output_buffer.append((char, generate.message_id))
# 将当前批次的字符添加到输出缓冲区
for char in current_answer:
output_buffer.append((char, generate.message_id))
# 根据缓冲区大小动态调整输出速度
while output_buffer:
char, msg_id = output_buffer.pop(0)
yield send_char(char, msg_id)
# 根据剩余缓冲区大小计算延迟
delay = calculate_delay(len(output_buffer))
time.sleep(delay)
# # 根据缓冲区大小动态调整输出速度
# while output_buffer:
# char, msg_id = output_buffer.pop(0)
# yield send_char(char, msg_id)
# # 根据剩余缓冲区大小计算延迟
# delay = calculate_delay(len(output_buffer))
# time.sleep(delay)
if current_answer == "<think>\n\n":
current_answer = "<think>"
yield send_char(current_answer, message_id)
current_answer = "\n"
yield send_char(current_answer, message_id)
logger.info(f"message_id: {message_id}")
# 立即继续处理下一个请求
continue
@ -575,18 +570,17 @@ def chat_completions():
if not generate.message_id:
generate.message_id = message_id
# # 将当前批次的字符添加到输出缓冲区
# for char in current_answer:
# output_buffer.append((char, generate.message_id))
# 将当前批次的字符添加到输出缓冲区
for char in current_answer:
output_buffer.append((char, generate.message_id))
# # 根据缓冲区大小动态调整输出速度
# while output_buffer:
# char, msg_id = output_buffer.pop(0)
# yield send_char(char, msg_id)
# # 根据剩余缓冲区大小计算延迟
# delay = calculate_delay(len(output_buffer))
# time.sleep(delay)
yield send_char(current_answer, message_id)
# 根据缓冲区大小动态调整输出速度
while output_buffer:
char, msg_id = output_buffer.pop(0)
yield send_char(char, msg_id)
# 根据剩余缓冲区大小计算延迟
delay = calculate_delay(len(output_buffer))
time.sleep(delay)
# 立即继续处理下一个请求
continue