|
3 | 3 | "dps": 79.85512300275154, |
4 | 4 | "dps_norm": 76.4398130083651, |
5 | 5 | "pass@1": 77.63559322033899, |
6 | | - "win_rate": 0.49004524886877826 |
| 6 | + "win_rate": 0.4936570428696413 |
| 7 | + }, |
| 8 | + "Qwen--CodeQwen1.5-7B-Chat": { |
| 9 | + "dps": 80.70604298474056, |
| 10 | + "dps_norm": 77.70445772388321, |
| 11 | + "pass@1": 74.11016949152543, |
| 12 | + "win_rate": 0.3984565393988627 |
7 | 13 | }, |
8 | 14 | "mistralai--Codestral-22B-v0.1": { |
9 | 15 | "dps": 82.68924330620962, |
10 | 16 | "dps_norm": 79.07819827252048, |
11 | 17 | "pass@1": 72.83898305084746, |
12 | | - "win_rate": 0.48872506504770163 |
| 18 | + "win_rate": 0.4920368818105616 |
13 | 19 | }, |
14 | 20 | "Qwen--Qwen2.5-14B-Instruct": { |
15 | 21 | "dps": 82.48566826452165, |
16 | 22 | "dps_norm": 77.66575182191508, |
17 | 23 | "pass@1": 82.48305084745763, |
18 | | - "win_rate": 0.5291380008093889 |
| 24 | + "win_rate": 0.5283979631805719 |
19 | 25 | }, |
20 | 26 | "deepseek-ai--deepseek-coder-33b-instruct": { |
21 | 27 | "dps": 82.65680460618937, |
22 | 28 | "dps_norm": 77.67106700323255, |
23 | 29 | "pass@1": 77.50847457627118, |
24 | | - "win_rate": 0.46629901960784315 |
| 30 | + "win_rate": 0.46857707509881424 |
25 | 31 | }, |
26 | 32 | "01-ai--Yi-Coder-9B-Chat": { |
27 | 33 | "dps": 83.60305818801254, |
28 | 34 | "dps_norm": 78.76801867108128, |
29 | 35 | "pass@1": 78.80508474576271, |
30 | | - "win_rate": 0.4875316990701606 |
| 36 | + "win_rate": 0.4916189697465249 |
31 | 37 | }, |
32 | 38 | "google--gemma-2-27b-it": { |
33 | 39 | "dps": 84.20223252621882, |
34 | 40 | "dps_norm": 78.67511770954597, |
35 | 41 | "pass@1": 80.23728813559322, |
36 | | - "win_rate": 0.5551975945017182 |
37 | | - }, |
38 | | - "Qwen--Qwen2.5-Coder-7B-Instruct": { |
39 | | - "dps": 79.81023172966313, |
40 | | - "dps_norm": 76.74937875060594, |
41 | | - "pass@1": 76.42372881355932, |
42 | | - "win_rate": 0.3791905151267375 |
| 42 | + "win_rate": 0.5566625155666252 |
43 | 43 | }, |
44 | 44 | "Qwen--Qwen2.5-32B-Instruct": { |
45 | 45 | "dps": 83.54712283112897, |
46 | 46 | "dps_norm": 78.3588319852899, |
47 | 47 | "pass@1": 87.5677966101695, |
48 | | - "win_rate": 0.5474555735056543 |
| 48 | + "win_rate": 0.5515826494724502 |
49 | 49 | }, |
50 | 50 | "mistralai--Mistral-Nemo-Instruct-2407": { |
51 | 51 | "dps": 81.98436677712466, |
52 | 52 | "dps_norm": 77.61969110961331, |
53 | 53 | "pass@1": 57.389830508474574, |
54 | | - "win_rate": 0.4432494279176201 |
| 54 | + "win_rate": 0.44836716681376876 |
55 | 55 | }, |
56 | 56 | "Qwen--Qwen2.5-72B-Instruct": { |
57 | 57 | "dps": 84.69258296490358, |
58 | 58 | "dps_norm": 79.00610315513151, |
59 | 59 | "pass@1": 88.27966101694915, |
60 | | - "win_rate": 0.5433430111986728 |
| 60 | + "win_rate": 0.5473515248796148 |
61 | 61 | }, |
62 | 62 | "gpt-4o-mini-2024-07-18": { |
63 | 63 | "dps": 84.19186096830988, |
64 | 64 | "dps_norm": 79.21827803090933, |
65 | 65 | "pass@1": 85.51694915254237, |
66 | | - "win_rate": 0.5626810095159288 |
| 66 | + "win_rate": 0.5631009615384616 |
67 | 67 | }, |
68 | 68 | "Qwen--Qwen2.5-7B-Instruct": { |
69 | 69 | "dps": 84.68492179229716, |
70 | 70 | "dps_norm": 79.33664874489173, |
71 | 71 | "pass@1": 80.02542372881356, |
72 | | - "win_rate": 0.5323045267489712 |
| 72 | + "win_rate": 0.5342220453641067 |
73 | 73 | }, |
74 | 74 | "gemini-1.5-pro-002": { |
75 | 75 | "dps": 76.26393608564656, |
76 | 76 | "dps_norm": 75.16850711244093, |
77 | 77 | "pass@1": 83.71186440677967, |
78 | | - "win_rate": 0.44697294976384716 |
| 78 | + "win_rate": 0.4499168744804655 |
79 | 79 | }, |
80 | 80 | "deepseek-ai--deepseek-coder-6.7b-instruct": { |
81 | 81 | "dps": 83.55516548026816, |
82 | 82 | "dps_norm": 78.20569308671841, |
83 | 83 | "pass@1": 73.57627118644068, |
84 | | - "win_rate": 0.47451820128479655 |
| 84 | + "win_rate": 0.4788907284768212 |
85 | 85 | }, |
86 | 86 | "nvidia--Llama-3.1-Nemotron-70B-Instruct-HF": { |
87 | 87 | "dps": 78.40270067725943, |
88 | 88 | "dps_norm": 76.22281062678165, |
89 | 89 | "pass@1": 61.83050847457627, |
90 | | - "win_rate": 0.4121043627031651 |
| 90 | + "win_rate": 0.4110099337748344 |
91 | 91 | }, |
92 | 92 | "ise-uiuc--Magicoder-S-DS-6.7B": { |
93 | 93 | "dps": 83.62957240263601, |
94 | 94 | "dps_norm": 78.58003556526222, |
95 | 95 | "pass@1": 69.83898305084746, |
96 | | - "win_rate": 0.45699152542372884 |
| 96 | + "win_rate": 0.45927138763814984 |
97 | 97 | }, |
98 | 98 | "google--gemma-2-9b-it": { |
99 | 99 | "dps": 82.35863751376931, |
100 | 100 | "dps_norm": 78.14625766928611, |
101 | 101 | "pass@1": 68.07627118644068, |
102 | | - "win_rate": 0.5287128712871287 |
| 102 | + "win_rate": 0.5286123032904149 |
103 | 103 | }, |
104 | 104 | "mistralai--Mistral-Large-Instruct-2407": { |
105 | 105 | "dps": 85.58694758404829, |
106 | 106 | "dps_norm": 80.65167529745199, |
107 | 107 | "pass@1": 82.4322033898305, |
108 | | - "win_rate": 0.578808752025932 |
| 108 | + "win_rate": 0.5823852491173009 |
109 | 109 | }, |
110 | 110 | "deepseek-ai--DeepSeek-Coder-V2-Lite-Instruct": { |
111 | 111 | "dps": 82.81873138697289, |
112 | 112 | "dps_norm": 78.09379634032757, |
113 | 113 | "pass@1": 79.04237288135593, |
114 | | - "win_rate": 0.5301230377598642 |
| 114 | + "win_rate": 0.5299548625359048 |
| 115 | + }, |
| 116 | + "meta-llama--Meta-Llama-3-8B-Instruct": { |
| 117 | + "dps": 77.04079291937829, |
| 118 | + "dps_norm": 75.1570617360002, |
| 119 | + "pass@1": 43.69491525423729, |
| 120 | + "win_rate": 0.4024024024024024 |
115 | 121 | }, |
116 | 122 | "deepseek-chat": { |
117 | 123 | "dps": 86.84652683144942, |
118 | 124 | "dps_norm": 79.08774817854689, |
119 | 125 | "pass@1": 88.51694915254237, |
120 | | - "win_rate": 0.5940152801358234 |
| 126 | + "win_rate": 0.5948665297741274 |
121 | 127 | }, |
122 | 128 | "gpt-4o-2024-08-06": { |
123 | 129 | "dps": 87.8465793074948, |
124 | 130 | "dps_norm": 82.23308124778713, |
125 | 131 | "pass@1": 86.65254237288136, |
126 | | - "win_rate": 0.6191666666666666 |
| 132 | + "win_rate": 0.6226795803066989 |
127 | 133 | }, |
128 | 134 | "meta-llama--Llama-3.1-70B-Instruct": { |
129 | 135 | "dps": 80.40042339048047, |
130 | 136 | "dps_norm": 77.29818188061222, |
131 | 137 | "pass@1": 77.26271186440678, |
132 | | - "win_rate": 0.452760736196319 |
| 138 | + "win_rate": 0.45465346534653467 |
133 | 139 | }, |
134 | 140 | "mistralai--Mistral-Small-Instruct-2409": { |
135 | 141 | "dps": 82.25625421641564, |
136 | 142 | "dps_norm": 78.98044554401069, |
137 | 143 | "pass@1": 69.51694915254237, |
138 | | - "win_rate": 0.47498931167165453 |
| 144 | + "win_rate": 0.4797436957420422 |
139 | 145 | }, |
140 | 146 | "meta-llama--Llama-3.1-8B-Instruct": { |
141 | 147 | "dps": 80.93273308299496, |
142 | 148 | "dps_norm": 76.57037397653662, |
143 | 149 | "pass@1": 64.34745762711864, |
144 | | - "win_rate": 0.39338071968014215 |
| 150 | + "win_rate": 0.3991434689507495 |
145 | 151 | } |
146 | 152 | } |
0 commit comments