⚡️ Improve performance (KenyonY#60)

- improve performance. - improve structure of the code - update documentation - beatify terminal display - fix dockerfile
0x8A63F77D · Aug 17, 2023 · dc9003f · dc9003f
1 parent 1e5ce9f
commit dc9003f
Show file tree

Hide file tree

Showing 15 changed files with 247 additions and 203 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,4 +1,3 @@
 **/*
 !openai_forward
-!pyproject.toml
-!README.md
+!pyproject.toml
diff --git a/.env b/.env
@@ -18,23 +18,20 @@ EXTRA_BASE_URL=
 # `EXTRA_ROUTE_PREFIX`: 与 EXTRA_BASE_URL 匹配的路由前缀
 EXTRA_ROUTE_PREFIX=
 
-# `RATE_LIMIT`: i.e. RPM 对指定路由的请求速率限制
+# `ROUTE_RATE_LIMIT`: i.e. RPM 对指定路由的请求速率限制
 # format: {route: ratelimit-string}
 # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
-RATE_LIMIT='{
-"/healthz": "1000/2minutes",
-"/v1/chat/completions": "300/minute"
-}'
+ROUTE_RATE_LIMIT={"/healthz": "1000/2minutes", "/v1/chat/completions": "300/minute"}
 
-#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html
+#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
 # `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost.
 RATE_LIMIT_STRATEGY=moving-window
 
 # `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制
 GLOBAL_RATE_LIMIT=
 
 # TPM: 返回的token速率限制
-TOKEN_RATE_LIMIT=50/second
+TOKEN_RATE_LIMIT=40/second
 
 # 设定时区
 TZ=Asia/Shanghai
diff --git a/.env.example b/.env.example
@@ -20,13 +20,14 @@ EXTRA_BASE_URL='http://localhost:8882, http://localhost:8881'
 # EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀
 EXTRA_ROUTE_PREFIX='/tts, /translate'
 
-# RATE LIMIT: 指定路由的请求速率限制（不区分客户）
+# `ROUTE_RATE_LIMIT`: 指定路由的请求速率限制（不区分用户）
 # format: {route: ratelimit-string}
 # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
-RATE_LIMIT='{
+ROUTE_RATE_LIMIT='{
 "/healthz": "50/3minutes",
 "/openai/v1/chat/completions": "1/10seconds",
-"/localai/v1/chat/completions": "2/second"}'
+"/localai/v1/chat/completions": "2/second"
+}'
 
 # `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制
 GLOBAL_RATE_LIMIT=2/5seconds

diff --git a/Dockerfile b/Dockerfile
@@ -10,18 +10,9 @@ RUN apk update && \
     mkdir -p /usr/share/zoneinfo/Asia/ && \
     ln -s /etc/localtime /usr/share/zoneinfo/Asia/Shanghai
 
-RUN pip install --no-cache-dir  \
-    "loguru" \
-    "fastapi" \
-    "uvicorn" \
-    "orjson" \
-    "python-dotenv" \
-    "httpx" \
-    "fire" \
-    "rich" \
-    "pytz"
-
 COPY . /home/openai-forward
 WORKDIR /home/openai-forward
+RUN pip install -e . --no-cache-dir
+
 EXPOSE 8000
 ENTRYPOINT ["python", "-m", "openai_forward.__main__", "run"]
diff --git a/Examples/chat.py b/Examples/chat.py
@@ -11,11 +11,12 @@
 
 stream = True
 user_content = """
-用c实现快速平方根算法
+用c实现目前已知最快平方根算法
 """
 
 resp = openai.ChatCompletion.create(
     model="gpt-3.5-turbo",
+    # model="gpt-4",
     messages=[
         {"role": "user", "content": user_content},
     ],
@@ -32,3 +33,72 @@
     print()
 else:
     print(resp.choices)
+
+"""
+gpt-4:
+
+以下是用C语言实现的最快已知的一种平方根算法，也叫做 "Fast Inverse Square Root"。这种算法首次出现在雷神之锤3的源代码中，被大量的现代3D图形计算所使用。
+
+```c
+#include <stdint.h>
+
+float Q_rsqrt(float number){
+    long i;
+    float x2, y;
+    const float threehalfs = 1.5F;
+
+    x2 = number * 0.5F;
+    y  = number;
+    i  = * ( long * ) &y;
+    i  = 0x5f3759df - ( i >> 1 );
+    y  = * ( float * ) &i;
+    y  = y * ( threehalfs - ( x2 * y * y ) );
+
+    return y;
+}
+```
+这种算法的精确度并不是很高，但它的速度快到足以做实时图形计算。上述代码的主要思想是通过对IEEE 754浮点数表示法的理解和利用，借助整数和浮点数的二进制表示在略有不同的特性，实现了快速逼近求解平方根倒数的方法。
+
+注意，这个函数实际上求解的是平方根的倒数，也就是1/sqrt(x)，如果需要得到sqrt(x)的结果，只需要将函数返回值取倒数即可。这是因为在3D图形计算中，往往更频繁地需要求解平方根倒数，而直接求解平方根反而较为罕见。
+
+
+----------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+gpt-3.5-turbo:
+
+目前已经发现的最快平方根算法是牛顿迭代法，可以用C语言实现如下：
+
+```c
+#include <stdio.h>
+
+double sqrt_newton(double x) {
+    if (x == 0) {
+        return 0;
+    }
+    
+    double guess = x / 2;  // 初始猜测值为x的一半
+    
+    while (1) {
+        double new_guess = (guess + x / guess) / 2;  // 根据牛顿迭代法计算新的猜测值
+        if (new_guess == guess) {  // 如果新的猜测值与上一次的猜测值相同，迭代结束
+            break;
+        }
+        guess = new_guess;
+    }
+    
+    return guess;
+}
+
+int main() {
+    double x = 16;  // 以16为例进行测试
+    double result = sqrt_newton(x);
+    printf("The square root of %lf is %lf\n", x, result);
+    
+    return 0;
+}
+```
+
+该程序使用牛顿迭代法来计算平方根，初始猜测值为待开方数的一半。然后通过迭代计算新的猜测值，直到新的猜测值与上一次的猜测值相同，迭代结束。最后输出计算得到的平方根结果。
+
+
+"""
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
     <br>
 </h1>
 <p align="center">
-    <b> OpenAI API 接口转发服务 <br/>
+    <b> OpenAI API风格接口转发服务 <br/>
     The fastest way to deploy openai api forwarding </b>
 </p>
 
@@ -48,41 +48,28 @@
 
 </div>
 
-本项目用于解决一些地区无法直接访问OpenAI的问题，将该服务部署在可以正常访问OpenAI API的(云)服务器上，
-通过该服务转发OpenAI的请求。即搭建反向代理服务; 允许输入多个OpenAI API-KEY 组成轮询池; 可自定义二次分发api key.
+本项目是大模型与用户层之间的一道转发服务，可用于搭建反向代理，自定义API KEY，请求速率限制，token速率限制等.
 
 
-<picture>
-   <source type="image/webp" srcset="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/.github/images/separators/aqua.webp">
-   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/.github/images/separators/aqua.png" height=8px width="100%">
-</picture>
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 由本项目搭建的长期代理地址：
 > https://api.openai-forward.com  
 > https://render.openai-forward.com  
 > https://cloudflare.page.openai-forward.com  
 > https://vercel.openai-forward.com  
 
-## 功能
-
-**基础功能**
-
-- [x] 支持转发OpenAI所有接口
-- [x] 支持流式响应
-- [x] 支持指定转发路由前缀
-- [x] docker部署
-- [x] pip 安装部署
-- [x] Railway 一键部署
-- [x] Render 一键部署
-- [x] cloudflare 部署
-- [x] Vercel一键部署
-
-**高级功能**
 
-- [x] 允许输入多个openai api key 组成轮询池
-- [x] 自定义 转发api key (见[高级配置](#高级配置))
-- [x] 流式响应对话日志
-- [x] 多接口转发
+### 特色
+-  万能转发: 支持转发OpenAI所有接口以及SSE流式响应
+-  支持指定转发路由前缀
+-  自定义转发api key 
+-  流式响应对话日志
+-  多接口转发
+-  请求速率限制(RPM)
+-  流式返回的token速率限制(TPM)
 
 ## 部署指南
 
@@ -93,22 +80,17 @@
 
 1. [pip 安装部署](deploy.md#pip部署)
 2. [Docker部署](deploy.md#docker部署)
-   > https://api.openai-forward.com
 
 **无vps免费部署方案**
 
 1. [Railway部署](deploy.md#Railway-一键部署)
-   > https://railway.openai-forward.com
 2. [Render一键部署](deploy.md#render-一键部署)
-   > https://render.openai-forward.com
 
 ---
 下面的部署仅提供单一转发功能
 
 3. [一键Vercel部署](deploy.md#vercel-一键部署)
-   > https://vercel.openai-forward.com
 4. [cloudflare部署](deploy.md#cloudflare-部署)
-   > https://cloudflare.page.openai-forward.com
 
 ## 应用
 
@@ -118,7 +100,7 @@
 替换docker启动命令中的 `BASE_URL`为我们自己搭建的代理服务地址
 
 
-<details >
+<details open>
    <summary> details</summary>  
 
 ```bash 
@@ -183,7 +165,7 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 
 </details>
 
-## 配置选项
+## 配置
 
 配置的设置方式支持两种  
 一种为在命令行中执行`aifd run` 的运行参数(如`--port=8000`)中指定;  
@@ -202,19 +184,14 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 |-----------------------|-----------------------|:----------------------:|
 | --port                | 服务端口号                 |          8000          |
 | --workers             | 工作进程数                 |           1            |
-| --openai_base_url     | 同 OPENAI_BASE_URL     | https://api.openai.com |
-| --openai_route_prefix | 同 OPENAI_ROUTE_PREFIX |         `None`         |
-| --api_key             | 同 OPENAI_API_KEY      |         `None`         |
-| --forward_key         | 同 FORWARD_KEY         |         `None`         |
-| --extra_base_url      | 同 EXTRA_BASE_URL      |         `None`         |
-| --extra_route_prefix  | 同 EXTRA_ROUTE_PREFIX  |         `None`         |
 | --log_chat            | 同 LOG_CHAT            |        `False`         |
 
 </details>
 
 ### 环境变量配置项
 
-支持从运行目录下的`.env`文件中读取
+支持从运行目录下的`.env`文件中读取  
+配置示例见根目录下的`.env.example`  
 
 | 环境变量                | 说明                                                                                                                                |          默认值           |
 |---------------------|-----------------------------------------------------------------------------------------------------------------------------------|:----------------------:|
@@ -226,23 +203,18 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 | EXTRA_ROUTE_PREFIX  | 额外转发服务路由前缀                                                                                                                        |           无            |
 | LOG_CHAT            | 是否记录聊天内容                                                                                                                          |        `false`         |
 
-## 高级配置
 
 ### 设置openai api_key为自定义的forward key
-<details markdown="1">
+<details open>
   <summary>Click for more details</summary>
 
-需要配置 OPENAI_API_KEY 和 FORWARD_KEY, 例如
+需要配置 OPENAI_API_KEY 和 FORWARD_KEY, 如
 
 ```bash
 OPENAI_API_KEY=sk-*******
 FORWARD_KEY=fk-****** # 这里fk-token由我们自己定义
 ```
 
-这里我们配置了FORWARD_KEY为`fk-******`, 那么后面客户端在调用时只需设置OPENAI_API_KEY为我们自定义的`fk-******` 即可。  
-这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时，我们可以使用自定义的api-key`fk-******`,
-无需担心真正的OPENAI_API_KEY被泄露。并且可以对外分发`fk-******`。
-
 **用例:**
 
 ```bash

diff --git a/deploy.md b/deploy.md
@@ -54,7 +54,9 @@ http://{ip}:{port}/v1/chat/completions
     proxy_buffering off; 
 ```
 
-
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 ## Docker部署
 
@@ -82,8 +84,9 @@ openai-forward run # 或使用别名 aifd run
 启用SSL同上.
 
 
----
-
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 ## Render 一键部署
 [![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward)
@@ -105,7 +108,9 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例
 > https://openai-forward.onrender.com 
 
 
----
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 ## Railway 一键部署
 [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv)
@@ -119,8 +124,9 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例
 
 > https://railway.openai-forward.com
 
-
----
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 ⚠️下面两种部署方式仅提供简单的转发服务，没有任何额外功能。
 
@@ -137,7 +143,10 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例
 
 > https://vercel.openai-forward.com  
 
----
+
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
 
 ## Cloudflare 部署