Skip to content

Commit a8d7590

Browse files
authored
Merge pull request #7597 from sagemathinc/llm-pricing-20240530
llm pricing: there are a couple of small changes
2 parents 6fc77ea + f761405 commit a8d7590

File tree

1 file changed

+17
-37
lines changed

1 file changed

+17
-37
lines changed

src/packages/util/db-schema/llm-utils.ts

Lines changed: 17 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: {
145145
),
146146
google: GOOGLE_MODELS.filter(
147147
(m) =>
148-
// we only enable the 1.0 pro and 1.5 pro with a limited context window
148+
// we only enable the 1.0, 1.5 pro and 1.5 flash with a limited context window
149149
m === "gemini-pro" ||
150150
m === "gemini-1.5-pro-8k" ||
151151
m === "gemini-1.5-flash-8k",
@@ -795,43 +795,22 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
795795
max_tokens: 8191,
796796
free: false,
797797
},
798-
// https://developers.generativeai.google/models/language
799-
// "text-bison-001": {
800-
// // we assume 5 characters is 1 token on average
801-
// prompt_tokens: (5 * 0.0005) / 1000,
802-
// completion_tokens: (5 * 0.0005) / 1000,
803-
// max_tokens: 8196,
804-
// },
805-
// "chat-bison-001": {
806-
// // we assume 5 characters is 1 token on average
807-
// prompt_tokens: (5 * 0.0005) / 1000,
808-
// completion_tokens: (5 * 0.0005) / 1000,
809-
// max_tokens: 8196,
810-
// },
811-
// "embedding-gecko-001": {
812-
// prompt_tokens: (5 * 0.0001) / 1000,
813-
// completion_tokens: 0,
814-
// max_tokens: 8196, // ???
815-
// },
816-
// you can learn details about the google models via
817-
// curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$KEY"
818-
// Pricing, at least Gemini Pro: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_foundational_models
798+
// https://ai.google.dev/pricing
819799
"gemini-pro": {
820-
prompt_tokens: usd1Mtokens(0.5), // https://ai.google.dev/pricing
800+
prompt_tokens: usd1Mtokens(0.5),
821801
completion_tokens: usd1Mtokens(1.5),
822802
max_tokens: 30720,
823803
free: true,
824804
},
825805
"gemini-1.5-pro-8k": {
826-
prompt_tokens: usd1Mtokens(7), // https://ai.google.dev/pricing
827-
completion_tokens: usd1Mtokens(21),
806+
prompt_tokens: usd1Mtokens(3.5), // (we're below the 128k context)
807+
completion_tokens: usd1Mtokens(10.5),
828808
max_tokens: 8_000,
829-
// will change 2024-05-30
830-
free: new Date("2024-05-30") > new Date(),
809+
free: false,
831810
},
832811
"gemini-1.5-pro": {
833-
prompt_tokens: usd1Mtokens(3.5), // https://ai.google.dev/pricing (cheaper, because we're below the 128k context)
834-
completion_tokens: usd1Mtokens(10.5),
812+
prompt_tokens: usd1Mtokens(7),
813+
completion_tokens: usd1Mtokens(21),
835814
max_tokens: 1048576,
836815
free: false,
837816
},
@@ -843,35 +822,36 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
843822
},
844823
"gemini-1.5-flash-8k": {
845824
prompt_tokens: usd1Mtokens(0.35),
846-
completion_tokens: usd1Mtokens(0.53),
825+
completion_tokens: usd1Mtokens(1.05),
847826
max_tokens: 8_000,
848827
free: true,
849828
},
829+
// https://mistral.ai/technology/
850830
"mistral-small-latest": {
851-
prompt_tokens: usd1Mtokens(2), // 2$ / 1M tokens
852-
completion_tokens: usd1Mtokens(6), // 6$ / 1M tokens
831+
prompt_tokens: usd1Mtokens(1),
832+
completion_tokens: usd1Mtokens(3),
853833
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
854834
free: true,
855835
},
856836
"mistral-medium-latest": {
857-
prompt_tokens: usd1Mtokens(2.7), // 2.7$ / 1M tokens
858-
completion_tokens: usd1Mtokens(8.1), // 8.1$ / 1M tokens
837+
prompt_tokens: usd1Mtokens(2.7),
838+
completion_tokens: usd1Mtokens(8.1),
859839
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
860840
free: true,
861841
},
862842
"mistral-large-latest": {
863-
prompt_tokens: usd1Mtokens(8), // 8$ / 1M tokens
864-
completion_tokens: usd1Mtokens(24), // 24$ / 1M tokens
843+
prompt_tokens: usd1Mtokens(4),
844+
completion_tokens: usd1Mtokens(12),
865845
max_tokens: 4096, // TODO don't know the real value, see getMaxTokens
866846
free: false,
867847
},
848+
// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
868849
"claude-3-opus-8k": {
869850
prompt_tokens: usd1Mtokens(15),
870851
completion_tokens: usd1Mtokens(75),
871852
max_tokens: 8_000, // limited to 8k tokens, to reduce the necessary spend limit to commit to
872853
free: false,
873854
},
874-
// Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
875855
"claude-3-opus": {
876856
prompt_tokens: usd1Mtokens(15),
877857
completion_tokens: usd1Mtokens(75),

0 commit comments

Comments
 (0)