Skip to content

Commit a802e65

Browse files
authored
fix score inconsistency of new versions
1 parent 56ce8f1 commit a802e65

File tree

1 file changed

+28
-28
lines changed

1 file changed

+28
-28
lines changed

results.json

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@
6565
"pass@1": {
6666
"humaneval": 24.4,
6767
"humaneval+": 22.6,
68-
"mbpp": null,
69-
"mbpp+": null
68+
"mbpp": 46.3,
69+
"mbpp+": 36.0
7070
},
7171
"prompted": false,
7272
"size": 2.0
@@ -219,8 +219,8 @@
219219
"link": "https://huggingface.co/Qwen/Qwen1.5-72B-Chat",
220220
"open-data": "NONE",
221221
"pass@1": {
222-
"humaneval": 67.1,
223-
"humaneval+": 58.5,
222+
"humaneval": 68.3,
223+
"humaneval+": 59.1,
224224
"mbpp": 72.5,
225225
"mbpp+": 61.6
226226
},
@@ -245,8 +245,8 @@
245245
"pass@1": {
246246
"humaneval": 25.0,
247247
"humaneval+": 22.0,
248-
"mbpp": 48.1,
249-
"mbpp+": 37.8
248+
"mbpp": 48.4,
249+
"mbpp+": 38.1
250250
},
251251
"prompted": false,
252252
"size": 2.0
@@ -351,7 +351,7 @@
351351
"link": "https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5",
352352
"open-data": "NONE",
353353
"pass@1": {
354-
"humaneval": 75.0,
354+
"humaneval": 75.6,
355355
"humaneval+": 71.3,
356356
"mbpp": 75.2,
357357
"mbpp+": 62.2
@@ -495,8 +495,8 @@
495495
"link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
496496
"open-data": "NONE",
497497
"pass@1": {
498-
"humaneval": 76.2,
499-
"humaneval+": 70.7,
498+
"humaneval": 77.4,
499+
"humaneval+": 72.0,
500500
"mbpp": 82.3,
501501
"mbpp+": 69.0
502502
},
@@ -627,8 +627,8 @@
627627
"link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
628628
"open-data": "NONE",
629629
"pass@1": {
630-
"humaneval": 75.0,
631-
"humaneval+": 70.1,
630+
"humaneval": 42.1,
631+
"humaneval+": 36.0,
632632
"mbpp": 44.7,
633633
"mbpp+": 37.0
634634
},
@@ -639,8 +639,8 @@
639639
"link": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
640640
"open-data": "NONE",
641641
"pass@1": {
642-
"humaneval": 39.6,
643-
"humaneval+": 34.1,
642+
"humaneval": 76.2,
643+
"humaneval+": 72.0,
644644
"mbpp": 73.8,
645645
"mbpp+": 64.3
646646
},
@@ -688,7 +688,7 @@
688688
"open-data": "Partial",
689689
"pass@1": {
690690
"humaneval": 77.4,
691-
"humaneval+": 72.0,
691+
"humaneval+": 73.8,
692692
"mbpp": 76.5,
693693
"mbpp+": 66.4
694694
},
@@ -784,7 +784,7 @@
784784
"open-data": "Full",
785785
"pass@1": {
786786
"humaneval": 73.8,
787-
"humaneval+": 70.7,
787+
"humaneval+": 71.3,
788788
"mbpp": 74.9,
789789
"mbpp+": 64.6
790790
},
@@ -976,7 +976,7 @@
976976
"open-data": "NONE",
977977
"pass@1": {
978978
"humaneval": 75.6,
979-
"humaneval+": 68.9,
979+
"humaneval+": 69.5,
980980
"mbpp": 77.0,
981981
"mbpp+": 64.8
982982
},
@@ -1073,8 +1073,8 @@
10731073
"pass@1": {
10741074
"humaneval": 26.8,
10751075
"humaneval+": 20.7,
1076-
"mbpp": null,
1077-
"mbpp+": null
1076+
"mbpp": 55.6,
1077+
"mbpp+": 46.6
10781078
},
10791079
"prompted": false,
10801080
"size": 2.0
@@ -1083,10 +1083,10 @@
10831083
"link": "https://huggingface.co/google/codegemma-7b",
10841084
"open-data": "NONE",
10851085
"pass@1": {
1086-
"humaneval": 40.2,
1087-
"humaneval+": 29.9,
1088-
"mbpp": null,
1089-
"mbpp+": null
1086+
"humaneval": 44.5,
1087+
"humaneval+": 41.5,
1088+
"mbpp": 65.1,
1089+
"mbpp+": 52.4
10901090
},
10911091
"prompted": false,
10921092
"size": 7.0
@@ -1096,9 +1096,9 @@
10961096
"open-data": "NONE",
10971097
"pass@1": {
10981098
"humaneval": 60.4,
1099-
"humaneval+": 51.2,
1100-
"mbpp": null,
1101-
"mbpp+": null
1099+
"humaneval+": 51.8,
1100+
"mbpp": 70.4,
1101+
"mbpp+": 56.9
11021102
},
11031103
"prompted": true,
11041104
"size": 7.0
@@ -1167,8 +1167,8 @@
11671167
"link": "https://huggingface.co/google/gemma-7b",
11681168
"open-data": "NONE",
11691169
"pass@1": {
1170-
"humaneval": 26.8,
1171-
"humaneval+": 20.1,
1170+
"humaneval": 35.4,
1171+
"humaneval+": 28.7,
11721172
"mbpp": 52.6,
11731173
"mbpp+": 43.4
11741174
},
@@ -1180,7 +1180,7 @@
11801180
"open-data": "NONE",
11811181
"pass@1": {
11821182
"humaneval": 28.7,
1183-
"humaneval+": 24.4,
1183+
"humaneval+": 25.0,
11841184
"mbpp": 47.1,
11851185
"mbpp+": 36.8
11861186
},

0 commit comments

Comments
 (0)