|
1 | 1 | { |
2 | 2 | "meta": { |
3 | | - "version": "2026.02.19", |
4 | | - "last_update": "2026-02-19T16:00:00Z", |
| 3 | + "version": "2026.02.24", |
| 4 | + "last_update": "2026-02-20T14:27:25Z", |
5 | 5 | "schema_version": "1.0" |
6 | 6 | }, |
7 | 7 | "models": [ |
8 | 8 | { |
9 | 9 | "id": "minimax-m2-1", |
10 | 10 | "name": "MiniMax M2.1", |
| 11 | + "aka": ["minimax-m2.1", "minimax-m21", "m2.1"], |
11 | 12 | "provider": "MiniMax", |
12 | 13 | "type": "open-source", |
13 | 14 | "release_date": "2025-12-23", |
|
76 | 77 | "latency_ttft_ms": 2170, |
77 | 78 | "source": "https://artificialanalysis.ai/models/minimax-m2-5" |
78 | 79 | }, |
79 | | - "editor_notes": "MiniMax's latest 229B MoE model focused on agentic coding. Officially reports 80.2% on SWE-Bench Verified with improved efficiency over M2.1.", |
| 80 | + "editor_notes": "MiniMax's latest 229B MoE model focused on agentic coding. Officially reports 80.2% on SWE-Bench Verified, with additional AIME and GPQA scores captured from MiniMax public release material.", |
80 | 81 | "benchmark_scores": { |
81 | 82 | "swe_bench": 80.2, |
82 | 83 | "terminal_bench": null, |
83 | 84 | "live_code_bench": null, |
84 | | - "gpqa_diamond": null, |
85 | | - "aime": null, |
| 85 | + "gpqa_diamond": 85.2, |
| 86 | + "aime": 86.3, |
86 | 87 | "mmlu_pro": null, |
87 | 88 | "humanity_last_exam": null, |
88 | 89 | "lmarena_coding_elo": null, |
|
202 | 203 | "latency_ttft_ms": 1650, |
203 | 204 | "source": "https://artificialanalysis.ai/models/claude-opus-4-6/providers" |
204 | 205 | }, |
205 | | - "editor_notes": "High-effort adaptive thinking profile for Claude Opus 4.6. Family-level benchmark claims are attributed here while base variant remains null to allow conservative inferior_of imputation.", |
| 206 | + "editor_notes": "High-effort adaptive thinking profile for Claude Opus 4.6. Family-level benchmark claims are attributed here while base variant remains null to allow conservative inferior_of imputation; SWE-Bench is populated from Anthropic's launch disclosure.", |
206 | 207 | "benchmark_scores": { |
207 | 208 | "aime": null, |
208 | 209 | "arc_agi_2": 68.8, |
|
388 | 389 | "latency_ttft_ms": 1200, |
389 | 390 | "source": "https://artificialanalysis.ai/models/claude-4-5-sonnet" |
390 | 391 | }, |
391 | | - "editor_notes": "Launch-day entry for Claude Sonnet 4.6. Benchmark values are intentionally null pending independent verification, and performance values are temporary placeholders from Sonnet 4.5 until provider measurements are published.", |
| 392 | + "editor_notes": "Base profile for Claude Sonnet 4.6. Family-level benchmark disclosures are attributed to the thinking variant, keeping base benchmarks null for conservative inferior_of imputation.", |
392 | 393 | "benchmark_scores": { |
393 | 394 | "aime": null, |
394 | 395 | "arc_agi_2": 58.3, |
|
452 | 453 | "latency_ttft_ms": 5500, |
453 | 454 | "source": "https://artificialanalysis.ai/models/claude-4-5-sonnet" |
454 | 455 | }, |
455 | | - "editor_notes": "High-effort adaptive thinking profile for Claude Sonnet 4.6. Benchmark values are intentionally null pending independent verification, and performance values are temporary placeholders from Sonnet 4.5 Thinking until provider measurements are published.", |
| 456 | + "editor_notes": "High-effort adaptive thinking profile for Claude Sonnet 4.6 with benchmark values populated from Anthropic's official system card (single-source fallback where independent mirrors are unavailable).", |
456 | 457 | "benchmark_scores": { |
457 | | - "aime": null, |
| 458 | + "aime": 95.6, |
458 | 459 | "arc_agi_2": 58.3, |
459 | 460 | "bfcl": null, |
460 | 461 | "frontiermath": null, |
461 | 462 | "gpqa_diamond": 89.9, |
462 | | - "humanity_last_exam": 49.0, |
| 463 | + "humanity_last_exam": 33.2, |
463 | 464 | "live_code_bench": null, |
464 | 465 | "livebench": null, |
465 | 466 | "lmarena_coding_elo": null, |
|
476 | 477 | "mmmlu": 89.3, |
477 | 478 | "simpleqa": null, |
478 | 479 | "mmmu": null, |
479 | | - "mmmu_pro": null, |
480 | | - "osworld": null, |
| 480 | + "mmmu_pro": 74.5, |
| 481 | + "osworld": 72.5, |
481 | 482 | "swe_bench": 79.6, |
482 | 483 | "tau_bench": null, |
483 | | - "terminal_bench": null, |
| 484 | + "terminal_bench": 59.1, |
484 | 485 | "webdev_arena_elo": null, |
485 | 486 | "livebench_reasoning": null, |
486 | 487 | "livebench_coding": null, |
|
626 | 627 | { |
627 | 628 | "id": "claude-opus-4-1-20250805", |
628 | 629 | "name": "Claude Opus 4.1", |
| 630 | + "aka": ["claude-opus-4-1", "claude-opus-4.1", "claude-4.1-opus", "opus-4.1"], |
629 | 631 | "provider": "Anthropic", |
630 | 632 | "type": "proprietary", |
631 | 633 | "release_date": "2025-08-05", |
|
681 | 683 | { |
682 | 684 | "id": "gpt-4o-2024-05-13", |
683 | 685 | "name": "GPT-4o", |
| 686 | + "aka": ["gpt-4o", "gpt4o", "gpt-4o-base"], |
684 | 687 | "provider": "OpenAI", |
685 | 688 | "type": "proprietary", |
686 | 689 | "release_date": "2024-05-13", |
|
1259 | 1262 | { |
1260 | 1263 | "id": "gemini-2.5-pro", |
1261 | 1264 | "name": "Gemini 2.5 Pro", |
| 1265 | + "aka": ["gemini-2.5-pro", "gemini-25-pro", "gemini-2-5-pro"], |
1262 | 1266 | "provider": "Google", |
1263 | 1267 | "type": "proprietary", |
1264 | 1268 | "release_date": "2025-03-25", |
|
1314 | 1318 | { |
1315 | 1319 | "id": "deepseek-v3.2", |
1316 | 1320 | "name": "DeepSeek V3.2", |
| 1321 | + "aka": ["deepseek-v3-2", "deepseek-v32", "deepseek-v3.2-base"], |
1317 | 1322 | "provider": "DeepSeek", |
1318 | 1323 | "type": "open-source", |
1319 | 1324 | "release_date": "2025-12-01", |
|
1369 | 1374 | { |
1370 | 1375 | "id": "deepseek-v3.2-thinking", |
1371 | 1376 | "name": "DeepSeek V3.2 Thinking", |
| 1377 | + "aka": ["deepseek-v3-2-thinking", "deepseek-v32-thinking", "deepseek-v3.2-reasoning"], |
1372 | 1378 | "superior_of": "deepseek-v3.2", |
1373 | 1379 | "provider": "DeepSeek", |
1374 | 1380 | "type": "open-source", |
|
1425 | 1431 | { |
1426 | 1432 | "id": "deepseek-r1", |
1427 | 1433 | "name": "DeepSeek R1", |
| 1434 | + "aka": ["deepseek-r1-2025-05-28", "deepseek-r1-0528", "deepseek-r1-reasoning"], |
1428 | 1435 | "provider": "DeepSeek", |
1429 | 1436 | "type": "open-source", |
1430 | 1437 | "release_date": "2025-05-28", |
|
1658 | 1665 | { |
1659 | 1666 | "id": "llama-4-maverick-17b-128e-instruct", |
1660 | 1667 | "name": "Llama 4 Maverick", |
| 1668 | + "aka": ["llama-4-maverick", "llama4-maverick", "llama-4-maverick-instruct"], |
1661 | 1669 | "provider": "Meta", |
1662 | 1670 | "type": "open-source", |
1663 | 1671 | "release_date": "2025-04-05", |
|
1722 | 1730 | { |
1723 | 1731 | "id": "llama-4-scout-17b-16e-instruct", |
1724 | 1732 | "name": "Llama 4 Scout", |
| 1733 | + "aka": ["llama-4-scout", "llama4-scout", "llama-4-scout-instruct"], |
1725 | 1734 | "provider": "Meta", |
1726 | 1735 | "type": "open-source", |
1727 | 1736 | "release_date": "2025-04-05", |
|
1786 | 1795 | { |
1787 | 1796 | "id": "qwen3-235b-a22b-instruct-2507", |
1788 | 1797 | "name": "Qwen3 235B", |
| 1798 | + "aka": ["qwen3-235b-a22b-instruct", "qwen3-235b", "qwen3-235b-2507"], |
1789 | 1799 | "provider": "Alibaba", |
1790 | 1800 | "type": "open-source", |
1791 | 1801 | "release_date": "2025-04-29", |
|
1841 | 1851 | { |
1842 | 1852 | "id": "qwen3-32b", |
1843 | 1853 | "name": "Qwen3 32B", |
| 1854 | + "aka": ["qwen-3-32b", "qwen3-32b-instruct", "qwen-32b"], |
1844 | 1855 | "provider": "Alibaba", |
1845 | 1856 | "type": "open-source", |
1846 | 1857 | "release_date": "2025-04-29", |
|
1905 | 1916 | { |
1906 | 1917 | "id": "qwen3-max-preview", |
1907 | 1918 | "name": "Qwen3 Max Preview", |
| 1919 | + "aka": ["qwen3-max", "qwen-max-preview", "qwen3-max-preview-2025"], |
1908 | 1920 | "provider": "Alibaba", |
1909 | 1921 | "type": "proprietary", |
1910 | 1922 | "release_date": "2025-12-01", |
|
1960 | 1972 | { |
1961 | 1973 | "id": "o3-2025-04-16", |
1962 | 1974 | "name": "OpenAI o3", |
| 1975 | + "aka": ["o3", "openai-o3", "o3-2025-04-16"], |
1963 | 1976 | "provider": "OpenAI", |
1964 | 1977 | "type": "proprietary", |
1965 | 1978 | "release_date": "2025-04-16", |
|
2119 | 2132 | { |
2120 | 2133 | "id": "gemini-2.5-flash", |
2121 | 2134 | "name": "Gemini 2.5 Flash", |
| 2135 | + "aka": ["gemini-2.5-flash", "gemini-25-flash", "gemini-2-5-flash"], |
2122 | 2136 | "provider": "Google", |
2123 | 2137 | "type": "proprietary", |
2124 | 2138 | "release_date": "2025-05-20", |
|
2183 | 2197 | { |
2184 | 2198 | "id": "minimax-m2", |
2185 | 2199 | "name": "MiniMax M2", |
| 2200 | + "aka": ["minimax-m2.0", "minimax-m20", "m2"], |
2186 | 2201 | "provider": "MiniMax", |
2187 | 2202 | "type": "open-source", |
2188 | 2203 | "release_date": "2025-10-27", |
|
2425 | 2440 | { |
2426 | 2441 | "id": "kimi-k2.5-thinking", |
2427 | 2442 | "name": "Kimi K2.5 Thinking", |
| 2443 | + "aka": ["kimi-k2.5-thinking", "kimi-k25-thinking", "k2.5-thinking"], |
2428 | 2444 | "superior_of": "kimi-k2.5-instant", |
2429 | 2445 | "provider": "Moonshot AI", |
2430 | 2446 | "type": "open-source", |
|
2481 | 2497 | { |
2482 | 2498 | "id": "longcat-flash-chat", |
2483 | 2499 | "name": "Longcat Flash Chat", |
| 2500 | + "aka": ["longcat-flash", "longcat-chat", "longcat-flashchat"], |
2484 | 2501 | "provider": "Meituan", |
2485 | 2502 | "type": "open-source", |
2486 | 2503 | "release_date": "2025-12-01", |
|
2545 | 2562 | { |
2546 | 2563 | "id": "mistral-large-3", |
2547 | 2564 | "name": "Mistral Large 3", |
| 2565 | + "aka": ["mistral-large-3.0", "mistral-large3", "mistral-large-v3"], |
2548 | 2566 | "provider": "Mistral", |
2549 | 2567 | "type": "open-source", |
2550 | 2568 | "release_date": "2025-12-02", |
|
0 commit comments