Skip to content

Commit 970429d

Browse files
authored
update some models and scores
1 parent d3d769b commit 970429d

File tree

1 file changed

+40
-4
lines changed

1 file changed

+40
-4
lines changed

results.json

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,42 @@
371371
"prompted": true,
372372
"size": 7.0
373373
},
374+
"O1 Mini (Sept 2024)": {
375+
"link": "https://platform.openai.com/docs/models/",
376+
"open-data": "NONE",
377+
"pass@1": {
378+
"humaneval": 96.3,
379+
"humaneval+": 89.0,
380+
"mbpp": 93.1,
381+
"mbpp+": 78.8
382+
},
383+
"prompted": true,
384+
"size": null
385+
},
386+
"GPT 4o (Aug 2024)": {
387+
"link": "https://platform.openai.com/docs/models/",
388+
"open-data": "NONE",
389+
"pass@1": {
390+
"humaneval": 92.7,
391+
"humaneval+": 87.2,
392+
"mbpp": 87.6,
393+
"mbpp+": 72.2
394+
},
395+
"prompted": true,
396+
"size": null
397+
},
398+
"GPT 4o Mini (July 2024)": {
399+
"link": "https://platform.openai.com/docs/models/",
400+
"open-data": "NONE",
401+
"pass@1": {
402+
"humaneval": 88.4,
403+
"humaneval+": 83.5,
404+
"mbpp": 85.4,
405+
"mbpp+": 72.2
406+
},
407+
"prompted": true,
408+
"size": null
409+
},
374410
"GPT-3.5 (May 2023)": {
375411
"link": "https://openai.com/blog/chatgpt",
376412
"open-data": "NONE",
@@ -533,8 +569,8 @@
533569
"pass@1": {
534570
"humaneval": 69.5,
535571
"humaneval+": 62.8,
536-
"mbpp": 66.4,
537-
"mbpp+": 54.0
572+
"mbpp": 68.3,
573+
"mbpp+": 55.6
538574
},
539575
"prompted": true,
540576
"size": 8.0
@@ -545,8 +581,8 @@
545581
"pass@1": {
546582
"humaneval": 61.6,
547583
"humaneval+": 56.7,
548-
"mbpp": 70.1,
549-
"mbpp+": 59.3
584+
"mbpp": 64.6,
585+
"mbpp+": 54.8
550586
},
551587
"prompted": true,
552588
"size": 8.0

0 commit comments

Comments
 (0)