@@ -700,32 +700,32 @@ export const LLM_USERNAMES: LLM2String = {
700
700
"gpt-3.5-turbo" : "GPT-3.5" ,
701
701
"gpt-3.5-turbo-16k" : "GPT-3.5-16k" ,
702
702
"gpt-4-turbo-preview" : "GPT-4 Turbo 128k" ,
703
- "gpt-4-turbo-preview-8k" : "GPT-4 Turbo 8k " ,
703
+ "gpt-4-turbo-preview-8k" : "GPT-4 Turbo" ,
704
704
"gpt-4-turbo" : "GPT-4 Turbo 128k" ,
705
- "gpt-4-turbo-8k" : "GPT-4 Turbo 8k " ,
706
- "gpt-4o" : "GPT-4 Omni 128k" ,
707
- "gpt-4o-8k" : "GPT-4 Omni 8k " ,
705
+ "gpt-4-turbo-8k" : "GPT-4 Turbo" ,
706
+ "gpt-4o" : "GPT-4o 128k" ,
707
+ "gpt-4o-8k" : "GPT-4o " ,
708
708
"gpt-4o-mini" : "GPT-4o Mini 128k" ,
709
- "gpt-4o-mini-8k" : "GPT-4o Mini 8k " ,
709
+ "gpt-4o-mini-8k" : "GPT-4o Mini" ,
710
710
"text-embedding-ada-002" : "Text Embedding Ada 002" , // TODO: this is for embeddings, should be moved to a different place
711
711
"text-bison-001" : "PaLM 2" ,
712
712
"chat-bison-001" : "PaLM 2" ,
713
713
"gemini-pro" : "Gemini 1.0 Pro" ,
714
714
"gemini-1.0-ultra" : "Gemini 1.0 Ultra" ,
715
715
"gemini-1.5-pro" : "Gemini 1.5 Pro 1m" ,
716
- "gemini-1.5-pro-8k" : "Gemini 1.5 Pro 8k " ,
717
- "gemini-1.5-flash-8k" : "Gemini 1.5 Flash 8k " ,
716
+ "gemini-1.5-pro-8k" : "Gemini 1.5 Pro" ,
717
+ "gemini-1.5-flash-8k" : "Gemini 1.5 Flash" ,
718
718
"mistral-small-latest" : "Mistral AI Small" ,
719
719
"mistral-medium-latest" : "Mistral AI Medium" ,
720
720
"mistral-large-latest" : "Mistral AI Large" ,
721
- "claude-3-haiku" : "Claude 3 Haiku" ,
722
- "claude-3-haiku-8k" : "Claude 3 Haiku 8k " ,
723
- "claude-3-sonnet" : "Claude 3 Sonnet" ,
724
- "claude-3-sonnet-4k" : "Claude 3 Sonnet 4k " ,
725
- "claude-3-5-sonnet" : "Claude 3.5 Sonnet" ,
726
- "claude-3-5-sonnet-4k" : "Claude 3.5 Sonnet 4k " ,
721
+ "claude-3-haiku" : "Claude 3 Haiku 200k " ,
722
+ "claude-3-haiku-8k" : "Claude 3 Haiku" ,
723
+ "claude-3-sonnet" : "Claude 3 Sonnet 200k " ,
724
+ "claude-3-sonnet-4k" : "Claude 3 Sonnet" ,
725
+ "claude-3-5-sonnet" : "Claude 3.5 Sonnet 200k " ,
726
+ "claude-3-5-sonnet-4k" : "Claude 3.5 Sonnet" ,
727
727
"claude-3-opus" : "Claude 3 Opus 200k" ,
728
- "claude-3-opus-8k" : "Claude 3 Opus 8k " ,
728
+ "claude-3-opus-8k" : "Claude 3 Opus" ,
729
729
} as const ;
730
730
731
731
// similar to the above, we map to short user-visible description texts
@@ -736,17 +736,17 @@ export const LLM_DESCR: LLM2String = {
736
736
chatgpt4 :
737
737
"Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
738
738
"gpt-4" :
739
- "Most powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
739
+ "Powerful OpenAI model. Can follow complex instructions and solve difficult problems. (OpenAI, 8k token context)" ,
740
740
"gpt-4-32k" : "" ,
741
741
"gpt-3.5-turbo" : "Fast, great for everyday tasks. (OpenAI, 4k token context)" ,
742
742
"gpt-3.5-turbo-16k" : `Same as ${ LLM_USERNAMES [ "gpt-3.5-turbo" ] } but with larger 16k token context` ,
743
743
"gpt-4-turbo-preview-8k" :
744
744
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)" ,
745
745
"gpt-4-turbo-preview" :
746
- "Like GPT-4 Turbo 8k , but with up to 128k token context" ,
746
+ "Like GPT-4 Turbo, but with up to 128k token context" ,
747
747
"gpt-4-turbo-8k" :
748
748
"Faster, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)" ,
749
- "gpt-4-turbo" : "Like GPT-4 Turbo 8k , but with up to 128k token context" ,
749
+ "gpt-4-turbo" : "Like GPT-4 Turbo, but with up to 128k token context" ,
750
750
"gpt-4o-8k" :
751
751
"Most powerful, fastest, and cheapest (OpenAI, 8k token context)" ,
752
752
"gpt-4o" : "Most powerful fastest, and cheapest (OpenAI, 128k token context)" ,
@@ -873,14 +873,14 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
873
873
free : false ,
874
874
} ,
875
875
"gpt-3.5-turbo" : {
876
- prompt_tokens : usd1Mtokens ( 1.5 ) ,
877
- completion_tokens : usd1Mtokens ( 2 ) ,
876
+ prompt_tokens : usd1Mtokens ( 3 ) ,
877
+ completion_tokens : usd1Mtokens ( 6 ) ,
878
878
max_tokens : 4096 ,
879
879
free : true ,
880
880
} ,
881
881
"gpt-3.5-turbo-16k" : {
882
- prompt_tokens : usd1Mtokens ( 0.5 ) ,
883
- completion_tokens : usd1Mtokens ( 1.5 ) ,
882
+ prompt_tokens : usd1Mtokens ( 3 ) ,
883
+ completion_tokens : usd1Mtokens ( 6 ) ,
884
884
max_tokens : 16384 ,
885
885
free : false ,
886
886
} ,
@@ -910,14 +910,14 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
910
910
free : false ,
911
911
} ,
912
912
"gpt-4o-8k" : {
913
- prompt_tokens : usd1Mtokens ( 5 ) ,
914
- completion_tokens : usd1Mtokens ( 15 ) ,
913
+ prompt_tokens : usd1Mtokens ( 2. 5) ,
914
+ completion_tokens : usd1Mtokens ( 10 ) ,
915
915
max_tokens : 8192 , // like gpt-4-turbo-8k
916
916
free : false ,
917
917
} ,
918
918
"gpt-4o" : {
919
- prompt_tokens : usd1Mtokens ( 5 ) ,
920
- completion_tokens : usd1Mtokens ( 15 ) ,
919
+ prompt_tokens : usd1Mtokens ( 2. 5) ,
920
+ completion_tokens : usd1Mtokens ( 10 ) ,
921
921
max_tokens : 128000 , // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
922
922
free : false ,
923
923
} ,
@@ -966,15 +966,15 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
966
966
free : true ,
967
967
} ,
968
968
"gemini-1.5-flash-8k" : {
969
- prompt_tokens : usd1Mtokens ( 0.35 ) ,
970
- completion_tokens : usd1Mtokens ( 1.05 ) ,
969
+ prompt_tokens : usd1Mtokens ( 0.075 ) ,
970
+ completion_tokens : usd1Mtokens ( 0.3 ) ,
971
971
max_tokens : 8_000 ,
972
972
free : true ,
973
973
} ,
974
974
// https://mistral.ai/technology/
975
975
"mistral-small-latest" : {
976
- prompt_tokens : usd1Mtokens ( 1 ) ,
977
- completion_tokens : usd1Mtokens ( 3 ) ,
976
+ prompt_tokens : usd1Mtokens ( 0.2 ) ,
977
+ completion_tokens : usd1Mtokens ( 0.6 ) ,
978
978
max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
979
979
free : true ,
980
980
} ,
@@ -985,8 +985,8 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
985
985
free : true ,
986
986
} ,
987
987
"mistral-large-latest" : {
988
- prompt_tokens : usd1Mtokens ( 4 ) ,
989
- completion_tokens : usd1Mtokens ( 12 ) ,
988
+ prompt_tokens : usd1Mtokens ( 2 ) ,
989
+ completion_tokens : usd1Mtokens ( 6 ) ,
990
990
max_tokens : 4096 , // TODO don't know the real value, see getMaxTokens
991
991
free : false ,
992
992
} ,
@@ -1006,13 +1006,13 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = {
1006
1006
"claude-3-5-sonnet" : {
1007
1007
prompt_tokens : usd1Mtokens ( 3 ) ,
1008
1008
completion_tokens : usd1Mtokens ( 15 ) ,
1009
- max_tokens : 4_000 , // limited to 4k tokens, offered for free
1009
+ max_tokens : 200_000 ,
1010
1010
free : false ,
1011
1011
} ,
1012
1012
"claude-3-5-sonnet-4k" : {
1013
1013
prompt_tokens : usd1Mtokens ( 3 ) ,
1014
1014
completion_tokens : usd1Mtokens ( 15 ) ,
1015
- max_tokens : 4_000 , // limited to 4k tokens, offered for free
1015
+ max_tokens : 4_000 , // limited to 4k tokens
1016
1016
free : false ,
1017
1017
} ,
1018
1018
"claude-3-sonnet-4k" : {
0 commit comments