update: whisper

Soulter · Soulter · commit f2d953840043 · 2025-01-11T20:58:42.000+08:00
diff --git a/.vitepress/config.mjs b/.vitepress/config.mjs
@@ -100,6 +100,10 @@ export default defineConfig({
           {
             text: '沙箱化代码执行器(beta)',
             link: '/code-interpreter'
+          },
+          {
+            text: '接入 Whisper 语音转文字',
+            link: '/whisper'
           }
         ]
       },
diff --git a/config/provider.md b/config/provider.md
@@ -137,4 +137,13 @@ AstrBot 支持加载使用 `LlamaFactory` 微调的模型。
 
 AstrBot 支持接入 Dify。
 
-请参考 [接入 Dify](/others/dify)。
+请参考 [接入 Dify](/others/dify)。
+
+
+## Whisper 语音转文字
+
+AstrBot 支持接入 OpenAI 开源的 Whisper 模型，实现语音转文字。
+
+可以接入 API 版本的，也可以在本地部署 Whisper。
+
+详见 [Whisper 语音转文字](/use/whisper)。
diff --git a/deploy/platform/aiocqhttp/napcat.md b/deploy/platform/aiocqhttp/napcat.md
@@ -10,10 +10,25 @@ NapCatQQ 的文档：[NapCatQQ 文档](https://napcat.napneko.icu/)
 > - 一个 QQ 号（最好不是新创建的 QQ 号）。
 > - 一台具有摄像功能的手机以扫码登录 QQ。
 
-NapCat 提供了大量的部署方式，包括 Docker、Windows 一键安装包等等。在本篇文章里将以 Docker 部署为例。
+NapCat 提供了大量的部署方式，包括 Docker、Windows 一键安装包等等。
+
+## 通过一键脚本部署
+
+推荐这种方式。
+
+### Windows
+
+看这篇文章：[NapCat.Shell - Win手动启动教程](https://napneko.github.io/guide/boot/Shell#napcat-shell-win%E6%89%8B%E5%8A%A8%E5%90%AF%E5%8A%A8%E6%95%99%E7%A8%8B)
+
+### Linux
+
+看这篇文章：[NapCat.Installer - Linux一键使用脚本(支持Ubuntu 20+/Debian 10+/Centos9)](https://napneko.github.io/guide/boot/Shell#napcat-installer-linux%E4%B8%80%E9%94%AE%E4%BD%BF%E7%94%A8%E8%84%9A%E6%9C%AC-%E6%94%AF%E6%8C%81ubuntu-20-debian-10-centos9)
 
 ## 通过 Docker 部署
 
+> [!TIP]
+> 如果用 Docker 部署，将无法正常接收到`语音数据`、`文件数据`。这意味着语音转文字、沙箱的文件输入功能将无法使用。可以接收到文字消息、图片消息等其他类型的消息。
+
 默认您安装了 Docker。
 
 在终端执行以下命令即可一键部署。
diff --git a/source/images/whisper/image.png b/source/images/whisper/image.png
diff --git a/use/code-interpreter.md b/use/code-interpreter.md
@@ -2,6 +2,9 @@
 
 在 `v3.4.2` 版本及之后，AstrBot 支持代码执行器以强化 LLM 的能力，并实现一些自动化的操作。
 
+> [!TIP]
+> 如果您使用 Docker 部署 AstrBot，将无法使用文件输入/输出功能，因为 Docker 无法直接访问宿主机的文件系统。
+
 ## Demo
 
 ![](../source/images/code-interpreter/a3cd3a0e-aca5-41b2-aa52-66b568bd955b.png)
@@ -61,4 +64,4 @@
 
 代码执行器除了能够识别和处理图片、文字任务，还能够识别您发送的文件，并且能够发送文件。但是，目前来说有一些环境上的限制。
 
-文件输入/输出只支持 `QQ` 平台，并且使用 `napcat`。
+文件输入/输出只支持 `QQ` 平台，并且使用 `napcat`，并且非 Docker 部署 napcat。
diff --git a/use/whisper.md b/use/whisper.md
@@ -0,0 +1,71 @@
+## 接入 Whisper 语音转文字
+
+> [!TIP]
+> 如果您使用 Docker 部署 AstrBot，`目前`将无法接收到 QQ 的语音消息，因为无法访问宿主机文件系统。
+
+AstrBot 支持接入 Whisper 语音转文字。
+
+有两种接入方式，一种是使用 OpenAI API 的 Whisper API 接口，另一种是在本地部署 Whisper。
+
+### API 接入
+
+和接入支持 OpenAI API 的大语言模型提供商一样，OpenAI API 也提供了调用 Whisper 模型的 API 接口。
+
+配置文件类似：
+
+```json
+{
+    "id": "new_whisper(api)",
+    "type": "openai_whisper_api",
+    "enable": false,
+    "api_key": "your_openai_api_key",
+    "api_base": "your openai api base",
+    "model": "whisper-1"
+},
+```
+
+在管理面板上配置，只需要点击此项即可可视化配置:
+
+![](../source/images/whisper/image.png)
+
+如果你使用 OpenAI 中转服务，请确保你的 OpenAI 的中转服务商支持 Whisper 调用。
+
+### 本地部署
+
+本地运行 Whisper 模型需要 `openai-whisper` 的 Python 库，请先 Pip 安装。
+
+> [!TIP]
+> 可以在管理面板 `控制台` 页快捷 pip 安装。
+> 安装此库会自动安装 Pytorch（一个深度学习库）。N 卡用户大约下载 2GB，主要是 torch 和 cuda，CPU 用户大约下载 1 GB。
+
+除了安装 `openai-whisper` 库，还需要你的设备上安装有 `ffmpeg`。
+
+对于 Linux，大多数包管理器都有 ffmpeg，可以直接安装。
+
+对于 Windows，可以从 [ffmpeg 官网](https://ffmpeg.org/download.html) 下载。下载完成后建议重启电脑以使环境变量生效。
+
+```
+{
+    "id": "new_whisper(本地加载)",
+    "type": "openai_whisper_selfhost",
+    "enable": true,
+    "model": "tiny"
+},
+```
+
+在管理面板上配置，只需要点击此项的后面那项即可可视化配置:
+
+![](../source/images/whisper/image.png)
+
+Whisper 有多种模型，默认启用最小的 `tiny` 模型，如果你的设备性能较好，可以尝试使用其他模型。
+
+模型列表：
+
+|  模型名  | 参数量 | English-only model | Multilingual model | 需要的显存 | Relative speed |
+|:------:|:----------:|:------------------:|:------------------:|:-------------:|:--------------:|
+|  tiny  |    39 M    |     `tiny.en`      |       `tiny`       |     ~1 GB     |      ~10x      |
+|  base  |    74 M    |     `base.en`      |       `base`       |     ~1 GB     |      ~7x       |
+| small  |   244 M    |     `small.en`     |      `small`       |     ~2 GB     |      ~4x       |
+| medium |   769 M    |    `medium.en`     |      `medium`      |     ~5 GB     |      ~2x       |
+| large  |   1550 M   |        N/A         |      `large`       |    ~10 GB     |       1x       |
+| turbo  |   809 M    |        N/A         |      `turbo`       |     ~6 GB     |      ~8x       |

Original file line number	Diff line number	Diff line change
`@@ -100,6 +100,10 @@ export default defineConfig({`
`100`	`100`	`{`
`101`	`101`	`text: '沙箱化代码执行器(beta)',`
`102`	`102`	`link: '/code-interpreter'`
	`103`	`+ },`
	`104`	`+ {`
	`105`	`+ text: '接入 Whisper 语音转文字',`
	`106`	`+ link: '/whisper'`
`103`	`107`	`}`
`104`	`108`	`]`
`105`	`109`	`},`