Merge pull request #7695 from sagemathinc/llm-gpt-4o-mini

williamstein · web-flow · commit f32a173cf388 · 2024-07-20T10:04:44.000-07:00
gpt-4o-mini
diff --git a/src/packages/package.json b/src/packages/package.json
@@ -20,7 +20,7 @@
       "undici@<5.28.3": "^5.28.4",
       "postcss@<8.4.31": "^8.4.31",
       "retry-request@<7.0.1": "^7.0.2",
-      "@langchain/core": "^0.2.10",
+      "@langchain/core": "^0.2.17",
       "katex@<0.16.9": "^0.16.10"
     }
   }
diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml
diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts
@@ -331,6 +331,8 @@ export async function evaluateOpenAI({
   // convert *-preview and all *-8k to "gpt-4-turbo"
   if (model.startsWith("gpt-4-turbo")) {
     model = "gpt-4-turbo";
+  } else if (model.startsWith("gpt-4o-mini")) {
+    model = "gpt-4o-mini";
   } else if (model.startsWith("gpt-4o")) {
     model = "gpt-4o";
   }
diff --git a/src/packages/server/llm/test/00.test.ts b/src/packages/server/llm/test/00.test.ts
@@ -75,6 +75,9 @@ test_llm("openai")("OpenAI", () => {
   test("gpt 4 omni works", async () => {
     llmOpenAI("gpt-4o-8k");
   });
+  test("gpt 4o mini works", async () => {
+    llmOpenAI("gpt-4o-mini-8k");
+  });
 });
 
 // ATTN: does not work everywhere around, geolocation matters
diff --git a/src/packages/server/package.json b/src/packages/server/package.json
@@ -49,12 +49,12 @@
     "@google-cloud/storage-transfer": "^3.3.0",
     "@google/generative-ai": "^0.14.0",
     "@isaacs/ttlcache": "^1.2.1",
-    "@langchain/anthropic": "^0.2.2",
-    "@langchain/community": "^0.2.14",
-    "@langchain/core": "^0.2.10",
-    "@langchain/google-genai": "^0.0.21",
-    "@langchain/mistralai": "^0.0.24",
-    "@langchain/openai": "^0.2.0",
+    "@langchain/anthropic": "^0.2.6",
+    "@langchain/community": "^0.2.19",
+    "@langchain/core": "^0.2.17",
+    "@langchain/google-genai": "^0.0.23",
+    "@langchain/mistralai": "^0.0.26",
+    "@langchain/openai": "^0.2.4",
     "@node-saml/passport-saml": "^4.0.4",
     "@passport-js/passport-twitter": "^1.0.8",
     "@passport-next/passport-google-oauth2": "^1.0.0",
diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts
@@ -105,6 +105,8 @@ export const OPENAI_PREFIX = "openai-";
 
 export const MODELS_OPENAI = [
   "gpt-3.5-turbo",
+  "gpt-4o-mini-8k", // context limited
+  "gpt-4o-mini", // Released 2024-07-18
   "gpt-4o-8k", // context limited, similar to gpt-4-turbo-8k
   "gpt-4o", // Released 2024-05-13
   // the "preview" variants are disabled, because the preview is over
@@ -219,7 +221,11 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: {
   [vendor in LLMServiceName]: Readonly<LanguageModelCore[]>;
 } = {
   openai: MODELS_OPENAI.filter(
-    (m) => m === "gpt-4" || m === "gpt-4-turbo-preview-8k" || m === "gpt-4o-8k",
+    (m) =>
+      m === "gpt-4" ||
+      m === "gpt-4-turbo-preview-8k" ||
+      m === "gpt-4o-8k" ||
+      m === "gpt-4o-mini-8k",
   ),
   google: GOOGLE_MODELS.filter(
     (m) =>
@@ -699,6 +705,8 @@ export const LLM_USERNAMES: LLM2String = {
   "gpt-4-turbo-8k": "GPT-4 Turbo 8k",
   "gpt-4o": "GPT-4 Omni 128k",
   "gpt-4o-8k": "GPT-4 Omni 8k",
+  "gpt-4o-mini": "GPT-4o Mini 128k",
+  "gpt-4o-mini-8k": "GPT-4o Mini 8k",
   "text-embedding-ada-002": "Text Embedding Ada 002", // TODO: this is for embeddings, should be moved to a different place
   "text-bison-001": "PaLM 2",
   "chat-bison-001": "PaLM 2",
@@ -742,6 +750,9 @@ export const LLM_DESCR: LLM2String = {
   "gpt-4o-8k":
     "Most powerful, fastest, and cheapest (OpenAI, 8k token context)",
   "gpt-4o": "Most powerful fastest, and cheapest (OpenAI, 128k token context)",
+  "gpt-4o-mini-8k":
+    "Most cost-efficient small model (OpenAI, 8k token context)",
+  "gpt-4o-mini": "Most cost-efficient small model (OpenAI, 128k token context)",
   "text-embedding-ada-002": "Text embedding Ada 002 by OpenAI", // TODO: this is for embeddings, should be moved to a different place
   "text-bison-001": "",
   "chat-bison-001": "",
@@ -910,6 +921,18 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
     max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
     free: false,
   },
+  "gpt-4o-mini-8k": {
+    prompt_tokens: usd1Mtokens(0.15),
+    completion_tokens: usd1Mtokens(0.6),
+    max_tokens: 8192, // like gpt-4-turbo-8k
+    free: true,
+  },
+  "gpt-4o-mini": {
+    prompt_tokens: usd1Mtokens(0.15),
+    completion_tokens: usd1Mtokens(0.6),
+    max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
+    free: true,
+  },
   // also OpenAI
   "text-embedding-ada-002": {
     prompt_tokens: 0.0001 / 1000,
diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts
@@ -43,6 +43,16 @@ const GPT_OMNI_8K: Spec = {
   display: `${GPT_OMNI.display} 8k`,
 } as const;
 
+const GPT_OMNI_MINI: Spec = {
+  ...GPT_OMNI,
+  display: "OpenAI GPT-4o Mini",
+};
+
+const GPT_OMNI_MINI_8K: Spec = {
+  ...GPT_OMNI_MINI,
+  display: `${GPT_OMNI_MINI.display} 8k`,
+};
+
 const GOOGLE_AI_COLOR = "#ff4d4f";
 
 // NOTE: all-quotas-config.tsx will automatically filter out those, which are free or not selectable by the user
@@ -116,6 +126,8 @@ export const QUOTA_SPEC: QuotaSpec = {
   "openai-gpt-4-turbo-8k": GPT_TURBO_8K,
   "openai-gpt-4o": GPT_OMNI,
   "openai-gpt-4o-8k": GPT_OMNI_8K,
+  "openai-gpt-4o-mini": GPT_OMNI_MINI,
+  "openai-gpt-4o-mini-8k": GPT_OMNI_MINI_8K,
   "google-text-bison-001": {
     display: "Google Palm 2 (Text)",
     color: GOOGLE_AI_COLOR,

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`"undici@<5.28.3": "^5.28.4",`
`21`	`21`	`"postcss@<8.4.31": "^8.4.31",`
`22`	`22`	`"retry-request@<7.0.1": "^7.0.2",`
`23`		`- "@langchain/core": "^0.2.10",`
	`23`	`+ "@langchain/core": "^0.2.17",`
`24`	`24`	`"katex@<0.16.9": "^0.16.10"`
`25`	`25`	`}`
`26`	`26`	`}`
Original file line number	Diff line number	Diff line change
`@@ -331,6 +331,8 @@ export async function evaluateOpenAI({`
`331`	`331`	`// convert -preview and all -8k to "gpt-4-turbo"`
`332`	`332`	`if (model.startsWith("gpt-4-turbo")) {`
`333`	`333`	`model = "gpt-4-turbo";`
	`334`	`+ } else if (model.startsWith("gpt-4o-mini")) {`
	`335`	`+ model = "gpt-4o-mini";`
`334`	`336`	`} else if (model.startsWith("gpt-4o")) {`
`335`	`337`	`model = "gpt-4o";`
`336`	`338`	`}`