|
1 | | -from .types import Model |
| 1 | +from .types import Model, SampleParam |
2 | 2 |
|
3 | 3 | _ANTHROPIC_MODELS = [ |
4 | 4 | Model( |
|
227 | 227 | input_token_price_1m=1.25, |
228 | 228 | output_token_price_1m=10, |
229 | 229 | ), |
| 230 | + Model( |
| 231 | + id="gemini-3-pro-preview", |
| 232 | + provider="google", |
| 233 | + name="Gemini 3 Pro", |
| 234 | + max_context_token=1_000_000, |
| 235 | + description="Google's most intelligent model family to date, built on a foundation of state-of-the-art reasoning", |
| 236 | + capabilities=["tool_use", "thinking", "vision", "structured_output"], |
| 237 | + force_sample_params=SampleParam(temperature=1.0), |
| 238 | + input_token_price_1m=2, |
| 239 | + output_token_price_1m=12, |
| 240 | + ), |
230 | 241 | ] |
231 | 242 |
|
232 | 243 | _DEEPINFRA_MODELS = [ |
|
254 | 265 | name="Qwen 3 Coder", |
255 | 266 | description="Qwen3-Coder-480B-A35B-Instruct is the Qwen3's most agentic code model", |
256 | 267 | capabilities=["tool_use", "stream", "structured_output"], |
| 268 | + force_sample_params=SampleParam( |
| 269 | + temperature=0.7, top_p=0.8, top_k=20, repetition_penalty=1.05 |
| 270 | + ), |
257 | 271 | input_token_price_1m=0.4, |
258 | 272 | output_token_price_1m=1.6, |
259 | 273 | ), |
|
263 | 277 | name="Qwen 3 Coder", |
264 | 278 | description="Qwen3-Coder-480B-A35B-Instruct is the Qwen3's most agentic code model", |
265 | 279 | capabilities=["tool_use", "stream", "structured_output"], |
| 280 | + force_sample_params=SampleParam( |
| 281 | + temperature=0.7, top_p=0.8, top_k=20, min_p=0.0 |
| 282 | + ), |
266 | 283 | input_token_price_1m=0.14, |
267 | 284 | output_token_price_1m=1.1, |
268 | 285 | ), |
|
272 | 289 | name="Qwen 3 MoE 235B-22B", |
273 | 290 | description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models", |
274 | 291 | capabilities=["tool_use", "thinking", "stream", "structured_output"], |
| 292 | + force_sample_params=SampleParam( |
| 293 | + temperature=0.6, top_p=0.95, top_k=20, min_p=0.0 |
| 294 | + ), |
275 | 295 | input_token_price_1m=0.2, |
276 | 296 | output_token_price_1m=0.6, |
277 | 297 | ), |
|
280 | 300 | provider="deepinfra", |
281 | 301 | name="Zai GLM-4.6", |
282 | 302 | description="The GLM-4.6 series models are foundation models designed for intelligent agents", |
| 303 | + force_sample_params=SampleParam(temperature=1, top_p=0.95, top_k=40), |
283 | 304 | capabilities=["tool_use", "stream", "structured_output"], |
284 | 305 | input_token_price_1m=0.6, |
285 | 306 | output_token_price_1m=2.0, |
|
290 | 311 | name="Qwen 3 32B", |
291 | 312 | description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models", |
292 | 313 | capabilities=["tool_use", "stream", "structured_output"], |
| 314 | + force_sample_params=SampleParam( |
| 315 | + temperature=0.6, top_p=0.95, top_k=20, min_p=0.0 |
| 316 | + ), |
293 | 317 | input_token_price_1m=0.1, |
294 | 318 | output_token_price_1m=0.3, |
295 | 319 | ), |
|
308 | 332 | name="DeepSeek R1 0528", |
309 | 333 | description="The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528.", |
310 | 334 | capabilities=["tool_use", "thinking", "stream", "structured_output"], |
| 335 | + force_sample_params=SampleParam(temperature=0.6), |
311 | 336 | input_token_price_1m=0.5, |
312 | 337 | output_token_price_1m=2.18, |
313 | 338 | ), |
|
317 | 342 | name="Kimi K2 Instruct", |
318 | 343 | description="Kimi K2 is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass", |
319 | 344 | capabilities=["tool_use", "stream", "structured_output"], |
| 345 | + force_sample_params=SampleParam(temperature=0.6), |
320 | 346 | input_token_price_1m=0.5, |
321 | 347 | output_token_price_1m=2.0, |
322 | 348 | ), |
|
342 | 368 | input_token_price_1m=0.0, |
343 | 369 | output_token_price_1m=0.0, |
344 | 370 | ), |
| 371 | + Model( |
| 372 | + id="gemini-3-pro-preview", |
| 373 | + provider="github_copilot", |
| 374 | + name="Gemini 3 Pro", |
| 375 | + description="", |
| 376 | + capabilities=["tool_use", "vision", "stream"], |
| 377 | + default=False, |
| 378 | + input_token_price_1m=0.0, |
| 379 | + force_sample_params=SampleParam(temperature=1.0), |
| 380 | + output_token_price_1m=0.0, |
| 381 | + ), |
345 | 382 | Model( |
346 | 383 | id="gpt-4.1", |
347 | 384 | provider="github_copilot", |
|
0 commit comments