Skip to content

Commit 1d15666

Browse files
authored
Merge pull request ChatGPTNextWeb#5919 from Yiming3/feature/flexible-visual-model
feat: runtime configuration of vision-capable models
2 parents acc2e97 + a127ae1 commit 1d15666

File tree

7 files changed

+116
-21
lines changed

7 files changed

+116
-21
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,13 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
355355

356356
Change default model
357357

358+
### `VISION_MODELS` (optional)
359+
360+
> Default: Empty
361+
> Example: `gpt-4-vision,claude-3-opus,my-custom-model` means add vision capabilities to these models in addition to the default pattern matches (which detect models containing keywords like "vision", "claude-3", "gemini-1.5", etc).
362+
363+
Add additional models to have vision capabilities, beyond the default pattern matching. Multiple models should be separated by commas.
364+
358365
### `WHITE_WEBDAV_ENDPOINTS` (optional)
359366

360367
You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format:

README_CN.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,13 @@ ChatGLM Api Url.
235235

236236
更改默认模型
237237

238+
### `VISION_MODELS` (可选)
239+
240+
> 默认值:空
241+
> 示例:`gpt-4-vision,claude-3-opus,my-custom-model` 表示为这些模型添加视觉能力,作为对默认模式匹配的补充(默认会检测包含"vision"、"claude-3"、"gemini-1.5"等关键词的模型)。
242+
243+
在默认模式匹配之外,添加更多具有视觉能力的模型。多个模型用逗号分隔。
244+
238245
### `DEFAULT_INPUT_TEMPLATE` (可选)
239246

240247
自定义默认的 template,用于初始化『设置』中的『用户输入预处理』配置项

README_JA.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,13 @@ ByteDance モードでは、`modelName@bytedance=deploymentName` 形式でモデ
217217

218218
デフォルトのモデルを変更します。
219219

220+
### `VISION_MODELS` (オプション)
221+
222+
> デフォルト:空
223+
> 例:`gpt-4-vision,claude-3-opus,my-custom-model` は、これらのモデルにビジョン機能を追加します。これはデフォルトのパターンマッチング("vision"、"claude-3"、"gemini-1.5"などのキーワードを含むモデルを検出)に加えて適用されます。
224+
225+
デフォルトのパターンマッチングに加えて、追加のモデルにビジョン機能を付与します。複数のモデルはカンマで区切ります。
226+
220227
### `DEFAULT_INPUT_TEMPLATE` (オプション)
221228

222229
『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。

app/config/build.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export const getBuildConfig = () => {
4040
buildMode,
4141
isApp,
4242
template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE,
43+
visionModels: process.env.VISION_MODELS || "",
4344
};
4445
};
4546

app/constant.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,22 @@ export const DEFAULT_TTS_VOICES = [
291291
"shimmer",
292292
];
293293

294+
export const VISION_MODEL_REGEXES = [
295+
/vision/,
296+
/gpt-4o/,
297+
/claude-3/,
298+
/gemini-1\.5/,
299+
/gemini-exp/,
300+
/gemini-2\.0/,
301+
/learnlm/,
302+
/qwen-vl/,
303+
/qwen2-vl/,
304+
/gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
305+
/^dall-e-3$/, // Matches exactly "dall-e-3"
306+
];
307+
308+
export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
309+
294310
const openaiModels = [
295311
"gpt-3.5-turbo",
296312
"gpt-3.5-turbo-1106",

app/utils.ts

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { RequestMessage } from "./client/api";
55
import { ServiceProvider } from "./constant";
66
// import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
77
import { fetch as tauriStreamFetch } from "./utils/stream";
8+
import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
9+
import { getClientConfig } from "./config/client";
810

911
export function trimTopic(topic: string) {
1012
// Fix an issue where double quotes still show in the Indonesian language
@@ -252,28 +254,16 @@ export function getMessageImages(message: RequestMessage): string[] {
252254
}
253255

254256
export function isVisionModel(model: string) {
255-
// Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using)
256-
257-
const excludeKeywords = ["claude-3-5-haiku-20241022"];
258-
const visionKeywords = [
259-
"vision",
260-
"gpt-4o",
261-
"claude-3",
262-
"gemini-1.5",
263-
"gemini-exp",
264-
"gemini-2.0",
265-
"learnlm",
266-
"qwen-vl",
267-
"qwen2-vl",
268-
];
269-
const isGpt4Turbo =
270-
model.includes("gpt-4-turbo") && !model.includes("preview");
271-
257+
const clientConfig = getClientConfig();
258+
const envVisionModels = clientConfig?.visionModels
259+
?.split(",")
260+
.map((m) => m.trim());
261+
if (envVisionModels?.includes(model)) {
262+
return true;
263+
}
272264
return (
273-
!excludeKeywords.some((keyword) => model.includes(keyword)) &&
274-
(visionKeywords.some((keyword) => model.includes(keyword)) ||
275-
isGpt4Turbo ||
276-
isDalle3(model))
265+
!EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) &&
266+
VISION_MODEL_REGEXES.some((regex) => regex.test(model))
277267
);
278268
}
279269

test/vision-model-checker.test.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { isVisionModel } from "../app/utils";
2+
3+
describe("isVisionModel", () => {
4+
const originalEnv = process.env;
5+
6+
beforeEach(() => {
7+
jest.resetModules();
8+
process.env = { ...originalEnv };
9+
});
10+
11+
afterEach(() => {
12+
process.env = originalEnv;
13+
});
14+
15+
test("should identify vision models using regex patterns", () => {
16+
const visionModels = [
17+
"gpt-4-vision",
18+
"claude-3-opus",
19+
"gemini-1.5-pro",
20+
"gemini-2.0",
21+
"gemini-exp-vision",
22+
"learnlm-vision",
23+
"qwen-vl-max",
24+
"qwen2-vl-max",
25+
"gpt-4-turbo",
26+
"dall-e-3",
27+
];
28+
29+
visionModels.forEach((model) => {
30+
expect(isVisionModel(model)).toBe(true);
31+
});
32+
});
33+
34+
test("should exclude specific models", () => {
35+
expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false);
36+
});
37+
38+
test("should not identify non-vision models", () => {
39+
const nonVisionModels = [
40+
"gpt-3.5-turbo",
41+
"gpt-4-turbo-preview",
42+
"claude-2",
43+
"regular-model",
44+
];
45+
46+
nonVisionModels.forEach((model) => {
47+
expect(isVisionModel(model)).toBe(false);
48+
});
49+
});
50+
51+
test("should identify models from VISION_MODELS env var", () => {
52+
process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
53+
54+
expect(isVisionModel("custom-vision-model")).toBe(true);
55+
expect(isVisionModel("another-vision-model")).toBe(true);
56+
expect(isVisionModel("unrelated-model")).toBe(false);
57+
});
58+
59+
test("should handle empty or missing VISION_MODELS", () => {
60+
process.env.VISION_MODELS = "";
61+
expect(isVisionModel("unrelated-model")).toBe(false);
62+
63+
delete process.env.VISION_MODELS;
64+
expect(isVisionModel("unrelated-model")).toBe(false);
65+
expect(isVisionModel("gpt-4-vision")).toBe(true);
66+
});
67+
});

0 commit comments

Comments
 (0)