44        "open-data" : " None" 
55        "pass@1" : {
66            "instruct" : null ,
7-             "complete" : 41.7 
7+             "complete" : 38.73 
88        },
9-         "prompted" : false ,
9+         "prompted" : true ,
1010        "size" : 34 ,
11-         "direct_complete" : true ,
11+         "direct_complete" : false ,
1212        "lazy" : false ,
1313        "elo_mle" : 942 
1414    },
15-     "CodeLlama-13B-Python " : {
16-         "link" : " https://huggingface.co/codellama/CodeLlama-13b-hf " 
15+     "Meta-Llama-3-70B " : {
16+         "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B " 
1717        "open-data" : " None" 
1818        "pass@1" : {
1919            "instruct" : null ,
20-             "complete" : 40.0 
20+             "complete" : 48.98 
2121        },
2222        "prompted" : false ,
23-         "size" : 13 ,
24-         "direct_complete" : true ,
23+         "size" : 70 ,
24+         "direct_complete" : false ,
25+         "lazy" : false ,
26+         "elo_mle" : 874 
27+     },
28+     "Meta-Llama-3-70B-Instruct" : {
29+         "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" 
30+         "open-data" : " None" 
31+         "pass@1" : {
32+             "instruct" : null ,
33+             "complete" : 62.45 
34+         },
35+         "prompted" : true ,
36+         "size" : 70 ,
37+         "direct_complete" : false ,
38+         "lazy" : false ,
39+         "elo_mle" : 874 
40+     },
41+     "Meta-Llama-3.1-70B-Instruct" : {
42+         "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" 
43+         "open-data" : " None" 
44+         "pass@1" : {
45+             "instruct" : null ,
46+             "complete" : 60 
47+         },
48+         "prompted" : true ,
49+         "size" : 70 ,
50+         "direct_complete" : false ,
2551        "lazy" : false ,
2652        "elo_mle" : 874 
2753    },
28-     "CodeQwen1.5-7B " : {
29-         "link" : " https://huggingface.co/Qwen/CodeQwen1.5-7B " 
54+     "Meta-Llama-3.1-70B " : {
55+         "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B " 
3056        "open-data" : " None" 
3157        "pass@1" : {
3258            "instruct" : null ,
33-             "complete" : 31.8 
59+             "complete" : 37.56 
3460        },
3561        "prompted" : false ,
62+         "size" : 70 ,
63+         "direct_complete" : false ,
64+         "lazy" : false ,
65+         "elo_mle" : 874 
66+     },
67+     "Mistral-7B-Instruct-v0.3" : {
68+         "link" : " https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" 
69+         "open-data" : " None" 
70+         "pass@1" : {
71+             "instruct" : null ,
72+             "complete" : 43.33 
73+         },
74+         "prompted" : true ,
3675        "size" : 7 ,
37-         "direct_complete" : true ,
76+         "direct_complete" : false ,
3877        "lazy" : false ,
39-         "elo_mle" : 1056 
78+         "elo_mle" : 874 
4079    },
41-     "DeepSeek-Coder-33B-Base " : {
42-         "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-base " 
80+     "Mixtral-8x7B-Instruct-v0.1 " : {
81+         "link" : " https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 " 
4382        "open-data" : " None" 
4483        "pass@1" : {
4584            "instruct" : null ,
46-             "complete" : 33.5 
85+             "complete" : 42.96 
4786        },
48-         "prompted" : false ,
49-         "size" : 33 ,
50-         "direct_complete" : true ,
87+         "prompted" : true ,
88+         "size" : 7 ,
89+         "direct_complete" : false ,
90+         "lazy" : false ,
91+         "elo_mle" : 874 
92+     },
93+     "Codestral-22B-v0.1" : {
94+         "link" : " https://huggingface.co/mistralai/Codestral-22B-v0.1" 
95+         "open-data" : " None" 
96+         "pass@1" : {
97+             "instruct" : null ,
98+             "complete" : 47.6 
99+         },
100+         "prompted" : true ,
101+         "size" : 22 ,
102+         "direct_complete" : false ,
51103        "lazy" : false ,
52-         "elo_mle" : 1064 
104+         "elo_mle" : 874 
53105    },
54-     "StarCoder2-15B " : {
55-         "link" : " https://huggingface.co/bigcode/starcoder2-15b " 
56-         "open-data" : " Full " 
106+     "Phi-3-medium-128k-instruct " : {
107+         "link" : " https://huggingface.co/microsoft/Phi-3-medium-128k-instruct " 
108+         "open-data" : " None " 
57109        "pass@1" : {
58110            "instruct" : null ,
59-             "complete" : 28.2 
111+             "complete" : 48.03 
60112        },
61-         "prompted" : false ,
62-         "size" : 15 ,
63-         "direct_complete" : true ,
113+         "prompted" : true ,
114+         "size" : 14 ,
115+         "direct_complete" : false ,
64116        "lazy" : false ,
65-         "elo_mle" : 960 
117+         "elo_mle" : 874 
66118    },
67-     "DeepSeek-Coder-6.7B-Base " : {
68-         "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base " 
119+     "Phi-3-mini-128k-instruct " : {
120+         "link" : " https://huggingface.co/microsoft/Phi-3-mini-128k-instruct " 
69121        "open-data" : " None" 
70122        "pass@1" : {
71123            "instruct" : null ,
72-             "complete" : 28.4 
124+             "complete" : 37.93 
73125        },
74-         "prompted" : false ,
75-         "size" : 6.7 ,
76-         "direct_complete" : true ,
126+         "prompted" : true ,
127+         "size" : 3.8 ,
128+         "direct_complete" : false ,
77129        "lazy" : false ,
78-         "elo_mle" : 1002 
130+         "elo_mle" : 874 
79131    },
80-     "DeepSeek-Coder-33B -Instruct" : {
81-         "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " 
132+     "Qwen2-57B-A14B -Instruct" : {
133+         "link" : " https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct " 
82134        "open-data" : " None" 
83135        "pass@1" : {
84136            "instruct" : null ,
85-             "complete" : 33.5 
137+             "complete" : 46.34 
86138        },
87139        "prompted" : true ,
88-         "size" : 33 ,
140+         "size" : 57 ,
89141        "direct_complete" : false ,
90142        "lazy" : false ,
91-         "elo_mle" : 1129 
143+         "elo_mle" : 874 
92144    },
93-     "Yi-1 .5-34B " : {
94-         "link" : " https://huggingface.co/01-ai/Yi-1 .5-34B " 
145+     "CodeQwen1 .5-7B-Chat " : {
146+         "link" : " https://huggingface.co/Qwen/CodeQwen1 .5-7B-Chat " 
95147        "open-data" : " None" 
96148        "pass@1" : {
97149            "instruct" : null ,
98-             "complete" : 34.9 
150+             "complete" : 49.82 
99151        },
100-         "prompted" : false ,
152+         "prompted" : true ,
153+         "size" : 7 ,
154+         "direct_complete" : false ,
155+         "lazy" : false ,
156+         "elo_mle" : 874 
157+     },
158+     "Yi-1.5-34B-Chat" : {
159+         "link" : " https://huggingface.co/01-ai/Yi-1.5-34B-Chat" 
160+         "open-data" : " None" 
161+         "pass@1" : {
162+             "instruct" : null ,
163+             "complete" : 49.39 
164+         },
165+         "prompted" : true ,
101166        "size" : 34 ,
102-         "direct_complete" : true ,
167+         "direct_complete" : false ,
168+         "lazy" : false ,
169+         "elo_mle" : 874 
170+     },
171+     "Yi-1.5-9B-Chat" : {
172+         "link" : " https://huggingface.co/01-ai/Yi-1.5-9B-Chat" 
173+         "open-data" : " None" 
174+         "pass@1" : {
175+             "instruct" : null ,
176+             "complete" : 47.23 
177+         },
178+         "prompted" : true ,
179+         "size" : 9 ,
180+         "direct_complete" : false ,
181+         "lazy" : false ,
182+         "elo_mle" : 874 
183+     },
184+     "DeepSeek-coder-7b-instruct-v1.5" : {
185+         "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5" 
186+         "open-data" : " None" 
187+         "pass@1" : {
188+             "instruct" : null ,
189+             "complete" : 41.21 
190+         },
191+         "prompted" : true ,
192+         "size" : 7 ,
193+         "direct_complete" : false ,
103194        "lazy" : false ,
104-         "elo_mle" : 978 
195+         "elo_mle" : 874 
105196    },
106-     "OpenCodeInterpreter-DS-33B " : {
107-         "link" : " https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B " 
108-         "open-data" : " Partial " 
197+     "DeepSeek-coder-33b-instruct " : {
198+         "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " 
199+         "open-data" : " None " 
109200        "pass@1" : {
110201            "instruct" : null ,
111-             "complete" : 31.0 
202+             "complete" : 36.6 
112203        },
113204        "prompted" : true ,
114205        "size" : 33 ,
115-         "direct_complete" : true ,
206+         "direct_complete" : false ,
116207        "lazy" : false ,
117-         "elo_mle" : 1131 
208+         "elo_mle" : 874 
118209    },
119-     "To be updated " : {
120-         "link" : " " 
210+     "DeepSeek-moe-16b-chat " : {
211+         "link" : " https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat " 
121212        "open-data" : " None" 
122213        "pass@1" : {
123214            "instruct" : null ,
124-             "complete" : 0 
215+             "complete" : 31.01 
125216        },
126-         "prompted" : false ,
217+         "prompted" : true ,
218+         "size" : 16.4 ,
219+         "direct_complete" : false ,
220+         "lazy" : false ,
221+         "elo_mle" : 874 
222+     },
223+     "DeepSeek-Coder-V2-Lite-Instruct" : {
224+         "link" : " https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" 
225+         "open-data" : " None" 
226+         "pass@1" : {
227+             "instruct" : null ,
228+             "complete" : 46.51 
229+         },
230+         "prompted" : true ,
231+         "size" : 16 ,
232+         "direct_complete" : false ,
233+         "lazy" : false ,
234+         "elo_mle" : 874 
235+     },
236+     "InternLM2-5-20b-chat" : {
237+         "link" : " https://huggingface.co/internlm/internlm2_5-20b-chat" 
238+         "open-data" : " None" 
239+         "pass@1" : {
240+             "instruct" : null ,
241+             "complete" : 44.89 
242+         },
243+         "prompted" : true ,
244+         "size" : 20 ,
245+         "direct_complete" : false ,
246+         "lazy" : false ,
247+         "elo_mle" : 874 
248+     },
249+     "StarCoder2-15b-instruct-v0.1" : {
250+         "link" : " https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1" 
251+         "open-data" : " None" 
252+         "pass@1" : {
253+             "instruct" : null ,
254+             "complete" : 47.94 
255+         },
256+         "prompted" : true ,
127257        "size" : 15 ,
128-         "direct_complete" : true ,
258+         "direct_complete" : false ,
259+         "lazy" : false ,
260+         "elo_mle" : 874 
261+     },
262+     "Claude-3-sonnet@20240229" : {
263+         "link" : " " 
264+         "open-data" : " None" 
265+         "pass@1" : {
266+             "instruct" : null ,
267+             "complete" : 53.97 
268+         },
269+         "prompted" : true ,
270+         "size" : " None" 
271+         "direct_complete" : false ,
272+         "lazy" : false ,
273+         "elo_mle" : 874 
274+     },
275+     "GPT-4o-2024-05-13" : {
276+         "link" : " " 
277+         "open-data" : " None" 
278+         "pass@1" : {
279+             "instruct" : null ,
280+             "complete" : 67 
281+         },
282+         "prompted" : true ,
283+         "size" : " None" 
284+         "direct_complete" : false ,
285+         "lazy" : false ,
286+         "elo_mle" : 874 
287+     },
288+     "GPT-3.5-turbo-0613" : {
289+         "link" : " " 
290+         "open-data" : " None" 
291+         "pass@1" : {
292+             "instruct" : null ,
293+             "complete" : 51.7 
294+         },
295+         "prompted" : true ,
296+         "size" : " None" 
297+         "direct_complete" : false ,
129298        "lazy" : false ,
130-         "elo_mle" : 960 
299+         "elo_mle" : 874 
131300    }
132301}
0 commit comments