|
44 | 44 | input_token_price_1m=3.0, |
45 | 45 | output_token_price_1m=15.0, |
46 | 46 | ), |
| 47 | + Model( |
| 48 | + id="claude-sonnet-4-6", |
| 49 | + provider="claude", |
| 50 | + name="Claude 4.6 Sonnet", |
| 51 | + description="Our smartest model for complex agents and coding", |
| 52 | + capabilities=["thinking", "tool_use", "vision", "stream"], |
| 53 | + max_context_token=200_000, |
| 54 | + default=False, |
| 55 | + input_token_price_1m=3.0, |
| 56 | + output_token_price_1m=15.0, |
| 57 | + ), |
47 | 58 | Model( |
48 | 59 | id="claude-haiku-4-5", |
49 | 60 | provider="claude", |
|
281 | 292 | input_token_price_1m=2, |
282 | 293 | output_token_price_1m=12, |
283 | 294 | ), |
| 295 | + Model( |
| 296 | + id="gemini-3.1-pro-preview", |
| 297 | + provider="google", |
| 298 | + name="Gemini 3.1 Pro", |
| 299 | + max_context_token=1_000_000, |
| 300 | + description="Google's most intelligent model family to date, built on a foundation of state-of-the-art reasoning", |
| 301 | + capabilities=["tool_use", "thinking", "vision", "structured_output"], |
| 302 | + force_sample_params=SampleParam(temperature=1.0), |
| 303 | + input_token_price_1m=2, |
| 304 | + output_token_price_1m=12, |
| 305 | + ), |
284 | 306 | ] |
285 | 307 |
|
286 | 308 | _DEEPINFRA_MODELS = [ |
|
327 | 349 | output_token_price_1m=1.1, |
328 | 350 | ), |
329 | 351 | Model( |
330 | | - id="Qwen/Qwen3-235B-A22B-Thinking-2507", |
| 352 | + id="Qwen/Qwen3-Max-Thinking", |
331 | 353 | provider="deepinfra", |
332 | | - name="Qwen 3 MoE 235B-22B", |
| 354 | + name="Qwen 3 Max Thinking", |
333 | 355 | description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models", |
334 | 356 | capabilities=["tool_use", "thinking", "stream", "structured_output"], |
335 | | - force_sample_params=SampleParam( |
336 | | - temperature=0.6, top_p=0.95, top_k=20, min_p=0.0 |
337 | | - ), |
| 357 | + force_sample_params=SampleParam(temperature=0.7, top_p=0.9, top_k=0, min_p=0.0), |
| 358 | + max_context_token=256_000, |
338 | 359 | input_token_price_1m=0.2, |
339 | 360 | output_token_price_1m=0.6, |
340 | 361 | ), |
|
421 | 442 | output_token_price_1m=2.0, |
422 | 443 | ), |
423 | 444 | Model( |
424 | | - id="MiniMaxAI/MiniMax-M2", |
| 445 | + id="MiniMaxAI/MiniMax-M2.5", |
425 | 446 | provider="deepinfra", |
426 | | - name="MiniMax-M2", |
427 | | - description="MiniMax-M2 is a Mini model built for Max coding & agentic workflows with just 10 billion activated parameters", |
| 447 | + name="MiniMax-M2.5", |
| 448 | + description="MiniMax-M2.5 is a Mini model built for Max coding & agentic workflows with just 10 billion activated parameters", |
428 | 449 | capabilities=["tool_use", "stream", "structured_output"], |
429 | 450 | force_sample_params=SampleParam(temperature=0.6), |
430 | 451 | max_context_token=262_000, |
431 | | - input_token_price_1m=0.254, |
432 | | - output_token_price_1m=1.02, |
| 452 | + input_token_price_1m=0.27, |
| 453 | + output_token_price_1m=0.95, |
433 | 454 | ), |
434 | 455 | ] |
435 | 456 | _GITHUB_COPILOT_MODELS = [ |
|
465 | 486 | force_sample_params=SampleParam(temperature=1.0), |
466 | 487 | output_token_price_1m=0.0, |
467 | 488 | ), |
| 489 | + Model( |
| 490 | + id="gemini-3.1-pro-preview", |
| 491 | + provider="github_copilot", |
| 492 | + name="Gemini 3.1 Pro", |
| 493 | + description="", |
| 494 | + capabilities=["tool_use", "vision", "stream"], |
| 495 | + default=False, |
| 496 | + input_token_price_1m=0.0, |
| 497 | + force_sample_params=SampleParam(temperature=1.0), |
| 498 | + output_token_price_1m=0.0, |
| 499 | + ), |
468 | 500 | Model( |
469 | 501 | id="gemini-3-flash-preview", |
470 | 502 | provider="github_copilot", |
|
574 | 606 | input_token_price_1m=0.0, |
575 | 607 | output_token_price_1m=0.0, |
576 | 608 | ), |
| 609 | + Model( |
| 610 | + id="gpt-5.3-codex", |
| 611 | + provider="copilot_response", |
| 612 | + name="GPT 5 Codex", |
| 613 | + description="", |
| 614 | + capabilities=["tool_use", "vision", "stream", "structured_output"], |
| 615 | + max_context_token=400_000, |
| 616 | + default=False, |
| 617 | + input_token_price_1m=0.0, |
| 618 | + output_token_price_1m=0.0, |
| 619 | + endpoint="response", |
| 620 | + ), |
577 | 621 | Model( |
578 | 622 | id="gpt-5.1-codex", |
579 | 623 | provider="copilot_response", |
|
631 | 675 | input_token_price_1m=0.0, |
632 | 676 | output_token_price_1m=0.0, |
633 | 677 | ), |
| 678 | + Model( |
| 679 | + id="claude-sonnet-4.6", |
| 680 | + provider="github_copilot", |
| 681 | + name="Claude Sonnet 4.6", |
| 682 | + description="", |
| 683 | + capabilities=["tool_use", "vision", "stream", "thinking"], |
| 684 | + default=False, |
| 685 | + max_context_token=200_000, |
| 686 | + input_token_price_1m=0.0, |
| 687 | + output_token_price_1m=0.0, |
| 688 | + ), |
634 | 689 | Model( |
635 | 690 | id="claude-opus-41", |
636 | 691 | provider="github_copilot", |
|
0 commit comments