2929
3030 th ,
3131 td {
32- text-align : left ;
32+ text-align : center ;
3333 width : fit-content;
3434 font-size : larger;
3535 }
@@ -127,16 +127,30 @@ <h1 class="text-nowrap mt-5" style="font-size: xx-large;">
127127
128128 < div class ="container d-flex flex-column align-items-center gap-3 mt-5 ">
129129 < h3 > Win-rate Leaderboard</ h3 >
130- < p > 📊 Ranking metrics: WR (Win-Rate; %) based on task- and model-wise competiton (i.e., pairwise DPS).</ p >
131- < p > 📝 Notes: the default prompt does not emphasize efficiency requirements as our work shows such emphasis
130+ < p class ="align-self-start "> 📊 Ranking metrics: WR (Win-Rate; %) based on task- and model-wise competiton
131+ (i.e., pairwise DPS).</ p >
132+ < p class ="align-self-start "> 📝 Notes: the default prompt does not emphasize efficiency requirements as our
133+ work shows such emphasis
132134 might degrade both efficiency and correctness for some weak models. Yet, "(⏩)" marks models using
133135 performance-encouraging prompts as they might be able to accurately understand such needs.</ p >
136+ < div class ="align-self-start d-inline-flex gap-3 ">
137+ < p > 📐 Show more metrics: </ p >
138+ < div class ="form-check ">
139+ < input class ="form-check-input " type ="checkbox " id ="passAt1Checkbox ">
140+ < label class ="form-check-label fs-5 " for ="passAt1Checkbox "> pass@1</ label >
141+ </ div >
142+ < div class ="form-check ">
143+ < input class ="form-check-input " type ="checkbox " id ="dpsCheckbox ">
144+ < label class ="form-check-label fs-5 " for ="dpsCheckbox "> DPS</ label >
145+ </ div >
146+ </ div >
134147 < table id ="leaderboard "
135148 class ="table table-responsive table-striped table-bordered flex-shrink-1 border border-5 ">
136149 </ table >
137- < p > 🏪 The detailed model generation data and results are available at our page < a
150+ < p class =" align-self-start " > 🏪 The detailed model generation data and results are available at our page < a
138151 href ="https://github.com/evalplus/evalplus.github.io/tree/main/results/evalperf "> repository</ a > .</ p >
139- < p > 💸 We use 50 samples (half) for o1 model series for cost saving; also because it's easy to sample desired
152+ < p class ="align-self-start "> 💸 We use 50 samples (half) for o1 model series for cost saving; also because it's
153+ easy to sample desired
140154 amount of correct samples from strong models using less tries.</ p >
141155
142156 < br >
@@ -218,10 +232,12 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
218232 </ div >
219233
220234 < script >
221- const contextTable = document . getElementById ( "leaderboard" ) ;
235+ const metricTable = document . getElementById ( "leaderboard" ) ;
222236 const linkMapping = new Map ( [ ] ) ;
223237 const hfLinkPrefix = "https://huggingface.co/" ;
224238 const dataUrlPrefix = "results/evalperf" ;
239+ const passCheckBox = document . getElementById ( "passAt1Checkbox" ) ;
240+ const dpsCheckBox = document . getElementById ( "dpsCheckbox" ) ;
225241
226242 // Load data
227243 var data = null ;
@@ -286,8 +302,8 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
286302 const theaders = [
287303 "#" , // rank
288304 "Model" , // model name
289- // "DPS",
290- // "pass@1",
305+ "DPS" ,
306+ "pass@1" ,
291307 "Task WR" , // task winrate
292308 winrate_tag , // computed over the same set of passing solutions
293309 ] ;
@@ -297,8 +313,17 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
297313 var headerRow = document . createElement ( "tr" ) ;
298314 // headers
299315 theaders . forEach ( function ( header ) {
316+ if ( header == "DPS" && ! dpsCheckBox . checked ) {
317+ return ;
318+ }
319+ if ( header == "pass@1" && ! passCheckBox . checked ) {
320+ return ;
321+ }
300322 var th = document . createElement ( "th" ) ;
301323 th . classList . add ( "text-nowrap" ) ;
324+ if ( header == "Model" ) {
325+ th . style . textAlign = "left" ;
326+ }
302327 th . textContent = header ;
303328
304329 if ( header == winrate_tag ) {
@@ -358,38 +383,48 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
358383 modelLink . classList . add ( "link-underline-primary" ) ;
359384 modelLink . classList . add ( "text-nowrap" ) ;
360385 modelCell . appendChild ( modelLink ) ;
386+ modelCell . style . textAlign = "left" ;
361387 dataRow . appendChild ( modelCell ) ;
362388
363- // dpsRow = document.createElement("td");
364- // dpsRow.textContent = row.get("dps").toFixed(1);
365- // dataRow.appendChild(dpsRow);
366- // passRow = document.createElement("td");
367- // passRow.textContent = row.get("pass@1").toFixed(1);
368- // dataRow.appendChild(passRow);
389+ if ( dpsCheckBox . checked ) {
390+ dpsRow = document . createElement ( "td" ) ;
391+ dpsRow . textContent = row . get ( "dps" ) . toFixed ( 1 ) ;
392+ dataRow . appendChild ( dpsRow ) ;
393+ }
394+ if ( passCheckBox . checked ) {
395+ passRow = document . createElement ( "td" ) ;
396+ passRow . textContent = row . get ( "pass@1" ) . toFixed ( 1 ) ;
397+ dataRow . appendChild ( passRow ) ;
398+ }
369399
370400 taskWinRateRow = document . createElement ( "td" ) ;
371401 taskWinRateRow . textContent = ( row . get ( 'task_win_rate' ) * 100 ) . toFixed ( 1 ) ;
402+ // center-align
372403 dataRow . appendChild ( taskWinRateRow ) ;
373404
374405
375406 modelWinRateRow = document . createElement ( "td" ) ;
376407 modelWinRateRow . textContent = ( row . get ( 'model_win_rate' ) * 100 ) . toFixed ( 1 ) ;
377408 modelWinRateRow . style . backgroundColor = "#EEFFEE" ;
409+ // center-align
378410 dataRow . appendChild ( modelWinRateRow ) ;
379411 tbody . appendChild ( dataRow ) ;
380412 } ) ;
381413 table . appendChild ( tbody ) ;
382414 } ;
383415
384416 const clearTable = ( ) => {
385- contextTable . innerHTML = "" ;
417+ metricTable . innerHTML = "" ;
386418 } ;
387419
388420 const main = ( ) => {
389421 clearTable ( ) ;
390- displayTable ( contextTable ) ;
422+ displayTable ( metricTable ) ;
391423 } ;
392424
425+ passCheckBox . addEventListener ( "change" , main ) ;
426+ dpsCheckBox . addEventListener ( "change" , main ) ;
427+
393428 main ( ) ;
394429
395430 initializeHeatmap ( ) ;
0 commit comments