|
20 | 20 | "openai/gpt-4o-mini-2024-07-18": OpenAIModelArgs( |
21 | 21 | model_name="gpt-4o-mini-2024-07-18", |
22 | 22 | max_total_tokens=128_000, |
23 | | - max_input_tokens=40_000, |
24 | | - max_new_tokens=4000, |
| 23 | + max_input_tokens=100_000, |
| 24 | + max_new_tokens=28_000, |
25 | 25 | vision_support=True, |
26 | 26 | ), |
27 | 27 | "openai/gpt-4-1106-preview": OpenAIModelArgs( |
28 | 28 | model_name="gpt-4-1106-preview", |
29 | 29 | max_total_tokens=128_000, |
30 | | - max_input_tokens=40_000, # make sure we don't bust budget |
31 | | - max_new_tokens=4000, |
| 30 | + max_input_tokens=100_000, |
| 31 | + max_new_tokens=28_000, |
32 | 32 | ), |
33 | 33 | "openai/gpt-4-vision-preview": OpenAIModelArgs( |
34 | 34 | model_name="gpt-4-vision-preview", |
35 | 35 | max_total_tokens=128_000, |
36 | | - max_input_tokens=40_000, # make sure we don't bust budget |
37 | | - max_new_tokens=4000, # I think this model has very small default value if we don't set max_new_tokens |
| 36 | + max_input_tokens=100_000, |
| 37 | + max_new_tokens=28_000, # I think this model has very small default value if we don't set max_new_tokens |
38 | 38 | vision_support=True, |
39 | 39 | ), |
40 | 40 | "openai/gpt-4o-2024-05-13": OpenAIModelArgs( |
41 | 41 | model_name="gpt-4o-2024-05-13", |
42 | 42 | max_total_tokens=128_000, |
43 | | - max_input_tokens=40_000, # make sure we don't bust budget |
44 | | - max_new_tokens=4000, # I think this model has very small default value if we don't set max_new_tokens |
| 43 | + max_input_tokens=100_000, |
| 44 | + max_new_tokens=28_000, # I think this model has very small default value if we don't set max_new_tokens |
45 | 45 | vision_support=True, |
46 | 46 | ), |
47 | 47 | "openai/gpt-3.5-turbo-0125": OpenAIModelArgs( |
|
67 | 67 | model_name="gpt-4o", |
68 | 68 | deployment_name="gpt-4o-2024-05-13", |
69 | 69 | max_total_tokens=128_000, |
70 | | - max_input_tokens=40_000, |
71 | | - max_new_tokens=4_000, |
| 70 | + max_input_tokens=100_000, |
| 71 | + max_new_tokens=28_000, |
| 72 | + vision_support=True, |
72 | 73 | ), |
73 | 74 | "azure/gpt-4o-2024-08-06": AzureModelArgs( |
74 | 75 | model_name="gpt-4o", |
75 | 76 | deployment_name="gpt-4o-2024-08-06", |
76 | 77 | max_total_tokens=128_000, |
77 | | - max_input_tokens=40_000, |
78 | | - max_new_tokens=4_000, |
| 78 | + max_input_tokens=100_000, |
| 79 | + max_new_tokens=28_000, |
| 80 | + vision_support=True, |
79 | 81 | ), |
80 | 82 | "azure/gpt-4o-mini-2024-07-18": AzureModelArgs( |
81 | 83 | model_name="gpt-4o-mini", |
82 | 84 | deployment_name="gpt-4o-mini-2024-07-18", |
83 | 85 | max_total_tokens=128_000, |
84 | | - max_input_tokens=40_000, |
85 | | - max_new_tokens=4_000, |
| 86 | + max_input_tokens=100_000, |
| 87 | + max_new_tokens=28_000, |
| 88 | + vision_support=True, |
86 | 89 | ), |
87 | 90 | # ---------------- OSS LLMs ----------------# |
88 | 91 | "meta-llama/Meta-Llama-3-70B-Instruct": SelfHostedModelArgs( |
|
113 | 116 | "openrouter/meta-llama/llama-3.1-405b-instruct": OpenRouterModelArgs( |
114 | 117 | model_name="meta-llama/llama-3.1-405b-instruct", |
115 | 118 | max_total_tokens=128_000, |
116 | | - max_input_tokens=40_000, |
117 | | - max_new_tokens=4000, |
| 119 | + max_input_tokens=100_000, |
| 120 | + max_new_tokens=28_000, |
118 | 121 | temperature=1e-1, |
119 | 122 | ), |
120 | 123 | "openrouter/meta-llama/llama-3.1-70b-instruct": OpenRouterModelArgs( |
121 | 124 | model_name="meta-llama/llama-3.1-70b-instruct", |
122 | 125 | max_total_tokens=128_000, |
123 | | - max_input_tokens=40_000, |
124 | | - max_new_tokens=4000, |
| 126 | + max_input_tokens=100_000, |
| 127 | + max_new_tokens=28_000, |
125 | 128 | temperature=1e-1, |
126 | 129 | ), |
127 | 130 | "openrouter/meta-llama/llama-3-70b-instruct": OpenRouterModelArgs( |
128 | 131 | model_name="meta-llama/llama-3-70b-instruct", |
129 | 132 | max_total_tokens=128_000, |
130 | | - max_input_tokens=40_000, |
131 | | - max_new_tokens=4000, |
| 133 | + max_input_tokens=100_000, |
| 134 | + max_new_tokens=28_000, |
132 | 135 | temperature=1e-1, |
133 | 136 | ), |
134 | 137 | "openrouter/meta-llama/llama-3.1-8b-instruct:free": OpenRouterModelArgs( |
135 | 138 | model_name="meta-llama/llama-3.1-8b-instruct:free", |
136 | 139 | max_total_tokens=128_000, |
137 | | - max_input_tokens=40_000, |
138 | | - max_new_tokens=4000, |
| 140 | + max_input_tokens=100_000, |
| 141 | + max_new_tokens=28_000, |
139 | 142 | temperature=1e-1, |
140 | 143 | ), |
141 | 144 | "openrouter/meta-llama/llama-3.1-8b-instruct": OpenRouterModelArgs( |
142 | 145 | model_name="meta-llama/llama-3.1-8b-instruct", |
143 | 146 | max_total_tokens=128_000, |
144 | | - max_input_tokens=40_000, |
145 | | - max_new_tokens=4000, |
| 147 | + max_input_tokens=100_000, |
| 148 | + max_new_tokens=28_000, |
146 | 149 | temperature=1e-1, |
147 | 150 | ), |
148 | 151 | "openrouter/anthropic/claude-3.5-sonnet:beta": OpenRouterModelArgs( |
149 | 152 | model_name="anthropic/claude-3.5-sonnet:beta", |
150 | 153 | max_total_tokens=200_000, |
151 | | - max_input_tokens=40_000, |
152 | | - max_new_tokens=4000, |
| 154 | + max_input_tokens=160_000, |
| 155 | + max_new_tokens=40_000, |
153 | 156 | temperature=1e-1, |
154 | 157 | vision_support=True, |
155 | 158 | ), |
|
163 | 166 | "openrouter/openai/o1-mini-2024-09-12": OpenRouterModelArgs( |
164 | 167 | model_name="openai/o1-mini-2024-09-12", |
165 | 168 | max_total_tokens=128_000, |
166 | | - max_input_tokens=40_000, |
167 | | - max_new_tokens=4000, |
| 169 | + max_input_tokens=100_000, |
| 170 | + max_new_tokens=28_000, |
168 | 171 | temperature=1e-1, |
169 | 172 | ), |
170 | 173 | } |
0 commit comments