Skip to content

Commit f32a173

Browse files
authored
Merge pull request #7695 from sagemathinc/llm-gpt-4o-mini
gpt-4o-mini
2 parents ec42dcd + 0855647 commit f32a173

File tree

7 files changed

+133
-65
lines changed

7 files changed

+133
-65
lines changed

src/packages/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"undici@<5.28.3": "^5.28.4",
2121
"postcss@<8.4.31": "^8.4.31",
2222
"retry-request@<7.0.1": "^7.0.2",
23-
"@langchain/core": "^0.2.10",
23+
"@langchain/core": "^0.2.17",
2424
"katex@<0.16.9": "^0.16.10"
2525
}
2626
}

src/packages/pnpm-lock.yaml

Lines changed: 85 additions & 57 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/packages/server/llm/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ export async function evaluateOpenAI({
331331
// convert *-preview and all *-8k to "gpt-4-turbo"
332332
if (model.startsWith("gpt-4-turbo")) {
333333
model = "gpt-4-turbo";
334+
} else if (model.startsWith("gpt-4o-mini")) {
335+
model = "gpt-4o-mini";
334336
} else if (model.startsWith("gpt-4o")) {
335337
model = "gpt-4o";
336338
}

src/packages/server/llm/test/00.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ test_llm("openai")("OpenAI", () => {
7575
test("gpt 4 omni works", async () => {
7676
llmOpenAI("gpt-4o-8k");
7777
});
78+
test("gpt 4o mini works", async () => {
79+
llmOpenAI("gpt-4o-mini-8k");
80+
});
7881
});
7982

8083
// ATTN: does not work everywhere around, geolocation matters

src/packages/server/package.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@
4949
"@google-cloud/storage-transfer": "^3.3.0",
5050
"@google/generative-ai": "^0.14.0",
5151
"@isaacs/ttlcache": "^1.2.1",
52-
"@langchain/anthropic": "^0.2.2",
53-
"@langchain/community": "^0.2.14",
54-
"@langchain/core": "^0.2.10",
55-
"@langchain/google-genai": "^0.0.21",
56-
"@langchain/mistralai": "^0.0.24",
57-
"@langchain/openai": "^0.2.0",
52+
"@langchain/anthropic": "^0.2.6",
53+
"@langchain/community": "^0.2.19",
54+
"@langchain/core": "^0.2.17",
55+
"@langchain/google-genai": "^0.0.23",
56+
"@langchain/mistralai": "^0.0.26",
57+
"@langchain/openai": "^0.2.4",
5858
"@node-saml/passport-saml": "^4.0.4",
5959
"@passport-js/passport-twitter": "^1.0.8",
6060
"@passport-next/passport-google-oauth2": "^1.0.0",

src/packages/util/db-schema/llm-utils.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ export const OPENAI_PREFIX = "openai-";
105105

106106
export const MODELS_OPENAI = [
107107
"gpt-3.5-turbo",
108+
"gpt-4o-mini-8k", // context limited
109+
"gpt-4o-mini", // Released 2024-07-18
108110
"gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
109111
"gpt-4o", // Released 2024-05-13
110112
// the "preview" variants are disabled, because the preview is over
@@ -219,7 +221,11 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: {
219221
[vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
220222
} = {
221223
openai: MODELS_OPENAI.filter(
222-
(m) => m === "gpt-4" || m === "gpt-4-turbo-preview-8k" || m === "gpt-4o-8k",
224+
(m) =>
225+
m === "gpt-4" ||
226+
m === "gpt-4-turbo-preview-8k" ||
227+
m === "gpt-4o-8k" ||
228+
m === "gpt-4o-mini-8k",
223229
),
224230
google: GOOGLE_MODELS.filter(
225231
(m) =>
@@ -699,6 +705,8 @@ export const LLM_USERNAMES: LLM2String = {
699705
"gpt-4-turbo-8k": "GPT-4 Turbo 8k",
700706
"gpt-4o": "GPT-4 Omni 128k",
701707
"gpt-4o-8k": "GPT-4 Omni 8k",
708+
"gpt-4o-mini": "GPT-4o Mini 128k",
709+
"gpt-4o-mini-8k": "GPT-4o Mini 8k",
702710
"text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
703711
"text-bison-001": "PaLM 2",
704712
"chat-bison-001": "PaLM 2",
@@ -742,6 +750,9 @@ export const LLM_DESCR: LLM2String = {
742750
"gpt-4o-8k":
743751
"Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
744752
"gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
753+
"gpt-4o-mini-8k":
754+
"Most cost-efficient small model (OpenAI, 8k token context)",
755+
"gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
745756
"text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
746757
"text-bison-001": "",
747758
"chat-bison-001": "",
@@ -910,6 +921,18 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
910921
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
911922
free: false,
912923
},
924+
"gpt-4o-mini-8k": {
925+
prompt_tokens: usd1Mtokens(0.15),
926+
completion_tokens: usd1Mtokens(0.6),
927+
max_tokens: 8192, // like gpt-4-turbo-8k
928+
free: true,
929+
},
930+
"gpt-4o-mini": {
931+
prompt_tokens: usd1Mtokens(0.15),
932+
completion_tokens: usd1Mtokens(0.6),
933+
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
934+
free: true,
935+
},
913936
// also OpenAI
914937
"text-embedding-ada-002": {
915938
prompt_tokens: 0.0001 / 1000,

src/packages/util/db-schema/purchase-quotas.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ const GPT_OMNI_8K: Spec = {
4343
display: `${GPT_OMNI.display} 8k`,
4444
} as const;
4545

46+
const GPT_OMNI_MINI: Spec = {
47+
...GPT_OMNI,
48+
display: "OpenAI GPT-4o Mini",
49+
};
50+
51+
const GPT_OMNI_MINI_8K: Spec = {
52+
...GPT_OMNI_MINI,
53+
display: `${GPT_OMNI_MINI.display} 8k`,
54+
};
55+
4656
const GOOGLE_AI_COLOR = "#ff4d4f";
4757

4858
// NOTE: all-quotas-config.tsx will automatically filter out those, which are free or not selectable by the user
@@ -116,6 +126,8 @@ export const QUOTA_SPEC: QuotaSpec = {
116126
"openai-gpt-4-turbo-8k": GPT_TURBO_8K,
117127
"openai-gpt-4o": GPT_OMNI,
118128
"openai-gpt-4o-8k": GPT_OMNI_8K,
129+
"openai-gpt-4o-mini": GPT_OMNI_MINI,
130+
"openai-gpt-4o-mini-8k": GPT_OMNI_MINI_8K,
119131
"google-text-bison-001": {
120132
display: "Google Palm 2 (Text)",
121133
color: GOOGLE_AI_COLOR,

0 commit comments

Comments
 (0)