Skip to content

Commit ae09ced

Browse files
committed
updated model lists
This adds a CLI tool to fetch some model data from @BerriAI/litellm
1 parent f64fd6f commit ae09ced

File tree

8 files changed

+634
-62
lines changed

8 files changed

+634
-62
lines changed

Model/Anthropic/models.json

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,59 @@
22
"chat": {
33
"claude-3-opus-20240229": {
44
"description": "Most powerful model for highly complex tasks. Top-level performance, intelligence, fluency, and understanding.",
5-
"inputTokens": 150000,
6-
"inputTokenPrice": 15.00,
5+
"inputTokens": 200000,
6+
"inputTokenPrice": 15,
77
"outputTokens": 4096,
8-
"outputTokenPrice": 75.00
8+
"outputTokenPrice": 75
99
},
1010
"claude-3-5-sonnet-20240620": {
1111
"description": "Most intelligent model",
1212
"inputTokens": 200000,
13-
"inputTokenPrice": 3.00,
13+
"inputTokenPrice": 3,
1414
"outputTokens": 4096,
15-
"outputTokenPrice": 15.00
15+
"outputTokenPrice": 15
1616
},
1717
"claude-3-sonnet-20240229": {
1818
"description": "Ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.",
19-
"inputTokens": 150000,
20-
"inputTokenPrice": 3.00,
19+
"inputTokens": 200000,
20+
"inputTokenPrice": 3,
2121
"outputTokens": 4096,
22-
"outputTokenPrice": 15.00
22+
"outputTokenPrice": 15
2323
},
2424
"claude-3-haiku-20240307": {
2525
"description": "Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
26-
"inputTokens": 150000,
26+
"inputTokens": 200000,
2727
"inputTokenPrice": 0.25,
2828
"outputTokens": 4096,
2929
"outputTokenPrice": 1.25
30+
},
31+
"claude-instant-1": {
32+
"description": "",
33+
"inputTokens": 100000,
34+
"inputTokenPrice": 1.63,
35+
"outputTokens": 8191,
36+
"outputTokenPrice": 5.51
37+
},
38+
"claude-instant-1.2": {
39+
"description": "",
40+
"inputTokens": 100000,
41+
"inputTokenPrice": 0.16,
42+
"outputTokens": 8191,
43+
"outputTokenPrice": 0.55
44+
},
45+
"claude-2": {
46+
"description": "",
47+
"inputTokens": 100000,
48+
"inputTokenPrice": 8,
49+
"outputTokens": 8191,
50+
"outputTokenPrice": 24
51+
},
52+
"claude-2.1": {
53+
"description": "",
54+
"inputTokens": 200000,
55+
"inputTokenPrice": 8,
56+
"outputTokens": 8191,
57+
"outputTokenPrice": 24
3058
}
3159
}
32-
}
60+
}

Model/Groq/models.json

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,37 @@
33
"llama3-8b-8192": {
44
"description": "LLaMA3 8b",
55
"inputTokens": 8192,
6-
"inputTokenPrice": 0,
6+
"inputTokenPrice": 0.05,
77
"outputTokens": 8192,
8-
"outputTokenPrice": 0
8+
"outputTokenPrice": 0.08
99
},
1010
"llama3-70b-8192": {
1111
"description": "LLaMA3 70b",
1212
"inputTokens": 8192,
13-
"inputTokenPrice": 0,
13+
"inputTokenPrice": 0.59,
1414
"outputTokens": 8192,
15-
"outputTokenPrice": 0
15+
"outputTokenPrice": 0.79
1616
},
1717
"llama2-70b-4096": {
1818
"description": "LLaMA2 70b",
1919
"inputTokens": 4096,
20-
"inputTokenPrice": 0,
20+
"inputTokenPrice": 0.7,
2121
"outputTokens": 4096,
22-
"outputTokenPrice": 0
22+
"outputTokenPrice": 0.8
2323
},
2424
"mixtral-8x7b-32768": {
2525
"description": "Mixtral 8x7b",
2626
"inputTokens": 32768,
27-
"inputTokenPrice": 0,
27+
"inputTokenPrice": 0.24,
2828
"outputTokens": 32768,
29-
"outputTokenPrice": 0
29+
"outputTokenPrice": 0.24
3030
},
3131
"gemma-7b-it": {
3232
"description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.",
3333
"inputTokens": 8192,
34-
"inputTokenPrice": 0,
35-
"outputTokens": 8182,
36-
"outputTokenPrice": 0
34+
"inputTokenPrice": 0.07,
35+
"outputTokens": 8192,
36+
"outputTokenPrice": 0.07
3737
}
3838
}
39-
}
39+
}

Model/Mistral/models.json

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,51 +4,100 @@
44
"description": "Our very first. A 7B transformer model, fast-deployed and easily customisable. Small, yet very powerful for a variety of use cases. English and code.",
55
"inputTokens": 32000,
66
"inputTokenPrice": 0.25,
7-
"outputTokens": 4096,
7+
"outputTokens": 8191,
88
"outputTokenPrice": 0.25
99
},
1010
"open-mixtral-8x7b": {
1111
"description": "A 7B sparse Mixture-of-Experts (SMoE). Uses 12B active parameters out of 45B total. Fluent in English, French, Italian, German, Spanish, and strong in code.",
1212
"inputTokens": 32000,
1313
"inputTokenPrice": 0.7,
14-
"outputTokens": 4096,
14+
"outputTokens": 8191,
1515
"outputTokenPrice": 0.7
1616
},
1717
"open-mixtral-8x22b": {
1818
"description": "A 22B sparse Mixture-of-Experts (SMoE). Uses only 39B active parameters out of 141B. Fluent in English, French, Italian, German, Spanish, and strong in code.",
1919
"inputTokens": 64000,
20-
"inputTokenPrice": 2.0,
21-
"outputTokens": 4096,
22-
"outputTokenPrice": 6.0
20+
"inputTokenPrice": 2,
21+
"outputTokens": 8191,
22+
"outputTokenPrice": 6
2323
},
2424
"mistral-small-latest": {
2525
"description": "Cost-efficient reasoning for low-latency workloads. Fluent in English, French, Italian, German, Spanish, and strong in code.",
2626
"inputTokens": 32000,
27-
"inputTokenPrice": 2.00,
28-
"outputTokens": 4096,
29-
"outputTokenPrice": 6.00
27+
"inputTokenPrice": 1,
28+
"outputTokens": 8191,
29+
"outputTokenPrice": 3
3030
},
3131
"mistral-medium-latest": {
3232
"description": "Balanced reasoning for a wide range of tasks. Fluent in English, French, Italian, German, Spanish, and strong in code.",
3333
"inputTokens": 32000,
34-
"inputTokenPrice": 2.70,
35-
"outputTokens": 4096,
36-
"outputTokenPrice": 8.10
34+
"inputTokenPrice": 2.7,
35+
"outputTokens": 8191,
36+
"outputTokenPrice": 8.1
3737
},
3838
"mistral-large-latest": {
3939
"description": "Top-tier reasoning for high-complexity tasks. Fluent in English, French, Italian, German, Spanish, and strong in code.",
4040
"inputTokens": 32000,
41-
"inputTokenPrice": 8.00,
42-
"outputTokens": 4096,
43-
"outputTokenPrice": 24.00
41+
"inputTokenPrice": 4,
42+
"outputTokens": 8191,
43+
"outputTokenPrice": 12
44+
},
45+
"mistral-tiny": {
46+
"description": "",
47+
"inputTokens": 32000,
48+
"inputTokenPrice": 0.25,
49+
"outputTokens": 8191,
50+
"outputTokenPrice": 0.25
51+
},
52+
"mistral-small": {
53+
"description": "",
54+
"inputTokens": 32000,
55+
"inputTokenPrice": 1,
56+
"outputTokens": 8191,
57+
"outputTokenPrice": 3
58+
},
59+
"mistral-medium": {
60+
"description": "",
61+
"inputTokens": 32000,
62+
"inputTokenPrice": 2.7,
63+
"outputTokens": 8191,
64+
"outputTokenPrice": 8.1
65+
},
66+
"codestral-latest": {
67+
"description": "",
68+
"inputTokens": 32000,
69+
"inputTokenPrice": 1,
70+
"outputTokens": 8191,
71+
"outputTokenPrice": 3
72+
},
73+
"open-mistral-nemo": {
74+
"description": "https:\/\/mistral.ai\/technology\/",
75+
"inputTokens": 128000,
76+
"inputTokenPrice": 0.3,
77+
"outputTokens": 128000,
78+
"outputTokenPrice": 0.3
79+
},
80+
"open-codestral-mamba": {
81+
"description": "https:\/\/mistral.ai\/technology\/",
82+
"inputTokens": 256000,
83+
"inputTokenPrice": 0.25,
84+
"outputTokens": 256000,
85+
"outputTokenPrice": 0.25
86+
},
87+
"codestral-mamba-latest": {
88+
"description": "https:\/\/mistral.ai\/technology\/",
89+
"inputTokens": 256000,
90+
"inputTokenPrice": 0.25,
91+
"outputTokens": 256000,
92+
"outputTokenPrice": 0.25
4493
}
4594
},
4695
"embedding": {
4796
"mistral-embed": {
4897
"description": "State-of-the-art semantic for extracting representation of text extracts. English only for now.",
49-
"inputTokenPrice": 0.10,
50-
"inputTokens": 4096,
98+
"inputTokens": 8192,
99+
"inputTokenPrice": 0.1,
51100
"dimensions": 1024
52101
}
53102
}
54-
}
103+
}

Model/OpenAI/models.json

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,77 @@
33
"gpt-3.5-turbo": {
44
"description": "GPT-3.5 Turbo is a variant of GPT-3.5 that is optimized for speed and can generate responses in a fraction of the time.",
55
"inputTokens": 16385,
6-
"inputTokenPrice": 0.50,
6+
"inputTokenPrice": 1.5,
77
"outputTokens": 4096,
8-
"outputTokenPrice": 1.50
8+
"outputTokenPrice": 2
99
},
1010
"gpt-4": {
1111
"description": "GPT-4 is the latest version of OpenAI's Generative Pre-trained Transformer (GPT) language model. It is capable of generating human-like text and can be used for a wide range of natural language processing tasks.",
12-
"inputTokens": 81924,
13-
"inputTokenPrice": 30.00,
12+
"inputTokens": 8192,
13+
"inputTokenPrice": 30,
1414
"outputTokens": 4096,
15-
"outputTokenPrice": 60.00
15+
"outputTokenPrice": 60
1616
},
1717
"gpt-4-turbo": {
1818
"description": "GPT-4 Turbo is a variant of GPT-4 that is optimized for speed and can generate responses in a fraction of the time.",
1919
"inputTokens": 128000,
20-
"inputTokenPrice": 10.00,
20+
"inputTokenPrice": 10,
2121
"outputTokens": 4096,
22-
"outputTokenPrice": 30.00
22+
"outputTokenPrice": 30
2323
},
2424
"gpt-4o": {
2525
"description": "GPT-4o has the same high intelligence as GPT-4 Turbo but is much more efficient",
2626
"inputTokens": 128000,
27-
"inputTokenPrice": 5.00,
27+
"inputTokenPrice": 5,
28+
"outputTokens": 4096,
29+
"outputTokenPrice": 15
30+
},
31+
"gpt-4o-mini": {
32+
"description": "",
33+
"inputTokens": 128000,
34+
"inputTokenPrice": 0.15,
35+
"outputTokens": 4096,
36+
"outputTokenPrice": 0.6
37+
},
38+
"gpt-4-32k": {
39+
"description": "",
40+
"inputTokens": 32768,
41+
"inputTokenPrice": 60,
42+
"outputTokens": 4096,
43+
"outputTokenPrice": 120
44+
},
45+
"gpt-3.5-turbo-16k": {
46+
"description": "",
47+
"inputTokens": 16385,
48+
"inputTokenPrice": 3,
2849
"outputTokens": 4096,
29-
"outputTokenPrice": 15.00
50+
"outputTokenPrice": 4
3051
}
3152
},
3253
"embedding": {
3354
"text-embedding-ada-002": {
3455
"description": "Most capable 2nd generation embedding model",
35-
"inputTokens": 8192,
36-
"inputTokenPrice": 0.10,
56+
"inputTokens": 8191,
57+
"inputTokenPrice": 0.1,
3758
"dimensions": 1536
3859
},
3960
"text-embedding-3-small": {
4061
"description": "Increased performance over 2nd generation ada embedding model",
41-
"inputTokens": 8192,
62+
"inputTokens": 8191,
4263
"inputTokenPrice": 0.02,
4364
"dimensions": 1536
4465
},
4566
"text-embedding-3-large": {
4667
"description": "Most capable embedding model for both english and non-english tasks",
47-
"inputTokens": 8192,
68+
"inputTokens": 8191,
4869
"inputTokenPrice": 0.13,
4970
"dimensions": 3072
71+
},
72+
"text-embedding-ada-002-v2": {
73+
"description": "",
74+
"inputTokens": 8191,
75+
"inputTokenPrice": 0.1,
76+
"dimensions": 1536
5077
}
5178
}
52-
}
79+
}

Model/Reka/models.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@
55
"inputTokens": 128000,
66
"inputTokenPrice": 0.8,
77
"outputTokens": 8192,
8-
"outputTokenPrice": 2.0
8+
"outputTokenPrice": 2
99
},
1010
"reka-edge": {
1111
"description": "Lightweight model for local (i.e., on-hardware) or latency sensitive applications",
1212
"inputTokens": 64000,
1313
"inputTokenPrice": 0.4,
1414
"outputTokens": 8192,
15-
"outputTokenPrice": 1.0
15+
"outputTokenPrice": 1
1616
},
1717
"reka-core": {
1818
"description": "Superior capabilities for complex tasks",
1919
"inputTokens": 128000,
20-
"inputTokenPrice": 10.0,
20+
"inputTokenPrice": 10,
2121
"outputTokens": 8192,
22-
"outputTokenPrice": 25.0
22+
"outputTokenPrice": 25
2323
}
2424
}
25-
}
25+
}

0 commit comments

Comments
 (0)