@@ -145,7 +145,7 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: {
145
145
) ,
146
146
google : GOOGLE_MODELS . filter (
147
147
( m ) =>
148
- // we only enable the 1.0 pro and 1.5 pro with a limited context window
148
+ // we only enable the 1.0, 1.5 pro and 1.5 flash with a limited context window
149
149
m === "gemini-pro" ||
150
150
m === "gemini-1.5-pro-8k" ||
151
151
m === "gemini-1.5-flash-8k" ,
@@ -795,43 +795,22 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
795
795
max_tokens : 8191 ,
796
796
free : false ,
797
797
} ,
798
- // https://developers.generativeai.google/models/language
799
- // "text-bison-001": {
800
- // // we assume 5 characters is 1 token on average
801
- // prompt_tokens: (5 * 0.0005) / 1000,
802
- // completion_tokens: (5 * 0.0005) / 1000,
803
- // max_tokens: 8196,
804
- // },
805
- // "chat-bison-001": {
806
- // // we assume 5 characters is 1 token on average
807
- // prompt_tokens: (5 * 0.0005) / 1000,
808
- // completion_tokens: (5 * 0.0005) / 1000,
809
- // max_tokens: 8196,
810
- // },
811
- // "embedding-gecko-001": {
812
- // prompt_tokens: (5 * 0.0001) / 1000,
813
- // completion_tokens: 0,
814
- // max_tokens: 8196, // ???
815
- // },
816
- // you can learn details about the google models via
817
- // curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$KEY"
818
- // Pricing, at least Gemini Pro: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_foundational_models
798
+ // https://ai.google.dev/pricing
819
799
"gemini-pro" : {
820
- prompt_tokens : usd1Mtokens ( 0.5 ) , // https://ai.google.dev/pricing
800
+ prompt_tokens : usd1Mtokens ( 0.5 ) ,
821
801
completion_tokens : usd1Mtokens ( 1.5 ) ,
822
802
max_tokens : 30720 ,
823
803
free : true ,
824
804
} ,
825
805
"gemini-1.5-pro-8k" : {
826
- prompt_tokens : usd1Mtokens ( 7 ) , // https://ai.google.dev/pricing
827
- completion_tokens : usd1Mtokens ( 21 ) ,
806
+ prompt_tokens : usd1Mtokens ( 3.5 ) , // (we're below the 128k context)
807
+ completion_tokens : usd1Mtokens ( 10.5 ) ,
828
808
max_tokens : 8_000 ,
829
- // will change 2024-05-30
830
- free : new Date ( "2024-05-30" ) > new Date ( ) ,
809
+ free : false ,
831
810
} ,
832
811
"gemini-1.5-pro" : {
833
- prompt_tokens : usd1Mtokens ( 3.5 ) , // https://ai.google.dev/pricing (cheaper, because we're below the 128k context)
834
- completion_tokens : usd1Mtokens ( 10.5 ) ,
812
+ prompt_tokens : usd1Mtokens ( 7 ) ,
813
+ completion_tokens : usd1Mtokens ( 21 ) ,
835
814
max_tokens : 1048576 ,
836
815
free : false ,
837
816
} ,
@@ -843,35 +822,36 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
843
822
} ,
844
823
"gemini-1.5-flash-8k" : {
845
824
prompt_tokens : usd1Mtokens ( 0.35 ) ,
846
- completion_tokens : usd1Mtokens ( 0.53 ) ,
825
+ completion_tokens : usd1Mtokens ( 1.05 ) ,
847
826
max_tokens : 8_000 ,
848
827
free : true ,
849
828
} ,
829
+ // https://mistral.ai/technology/
850
830
"mistral-small-latest" : {
851
- prompt_tokens : usd1Mtokens ( 2 ) , // 2$ / 1M tokens
852
- completion_tokens : usd1Mtokens ( 6 ) , // 6$ / 1M tokens
831
+ prompt_tokens : usd1Mtokens ( 1 ) ,
832
+ completion_tokens : usd1Mtokens ( 3 ) ,
853
833
max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
854
834
free : true ,
855
835
} ,
856
836
"mistral-medium-latest" : {
857
- prompt_tokens : usd1Mtokens ( 2.7 ) , // 2.7$ / 1M tokens
858
- completion_tokens : usd1Mtokens ( 8.1 ) , // 8.1$ / 1M tokens
837
+ prompt_tokens : usd1Mtokens ( 2.7 ) ,
838
+ completion_tokens : usd1Mtokens ( 8.1 ) ,
859
839
max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
860
840
free : true ,
861
841
} ,
862
842
"mistral-large-latest" : {
863
- prompt_tokens : usd1Mtokens ( 8 ) , // 8$ / 1M tokens
864
- completion_tokens : usd1Mtokens ( 24 ) , // 24$ / 1M tokens
843
+ prompt_tokens : usd1Mtokens ( 4 ) ,
844
+ completion_tokens : usd1Mtokens ( 12 ) ,
865
845
max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
866
846
free : false ,
867
847
} ,
848
+ // Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
868
849
"claude-3-opus-8k" : {
869
850
prompt_tokens : usd1Mtokens ( 15 ) ,
870
851
completion_tokens : usd1Mtokens ( 75 ) ,
871
852
max_tokens : 8_000 , // limited to 8k tokens, to reduce the necessary spend limit to commit to
872
853
free : false ,
873
854
} ,
874
- // Anthropic: pricing somewhere on that page: https://www.anthropic.com/api
875
855
"claude-3-opus" : {
876
856
prompt_tokens : usd1Mtokens ( 15 ) ,
877
857
completion_tokens : usd1Mtokens ( 75 ) ,
0 commit comments