Skip to content

Commit 7979707

Browse files
FatPigeorzganler
andauthored
feat: optionally display pass@1 and dps metric (#25)
* minor: text center for table * minor: only metric center * evalperf: add metrics display options and center-align table headers * minor: align text for leaderboard section and improve readability * tweak --------- Co-authored-by: ganler <[email protected]>
1 parent 608ede5 commit 7979707

File tree

1 file changed

+51
-16
lines changed

1 file changed

+51
-16
lines changed

evalperf.html

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
th,
3131
td {
32-
text-align: left;
32+
text-align: center;
3333
width: fit-content;
3434
font-size: larger;
3535
}
@@ -127,16 +127,30 @@ <h1 class="text-nowrap mt-5" style="font-size: xx-large;">
127127

128128
<div class="container d-flex flex-column align-items-center gap-3 mt-5">
129129
<h3>Win-rate Leaderboard</h3>
130-
<p>📊 Ranking metrics: WR (Win-Rate; %) based on task- and model-wise competiton (i.e., pairwise DPS).</p>
131-
<p>📝 Notes: the default prompt does not emphasize efficiency requirements as our work shows such emphasis
130+
<p class="align-self-start">📊 Ranking metrics: WR (Win-Rate; %) based on task- and model-wise competiton
131+
(i.e., pairwise DPS).</p>
132+
<p class="align-self-start">📝 Notes: the default prompt does not emphasize efficiency requirements as our
133+
work shows such emphasis
132134
might degrade both efficiency and correctness for some weak models. Yet, "(⏩)" marks models using
133135
performance-encouraging prompts as they might be able to accurately understand such needs.</p>
136+
<div class="align-self-start d-inline-flex gap-3">
137+
<p>📐 Show more metrics: </p>
138+
<div class="form-check">
139+
<input class="form-check-input" type="checkbox" id="passAt1Checkbox">
140+
<label class="form-check-label fs-5" for="passAt1Checkbox">pass@1</label>
141+
</div>
142+
<div class="form-check">
143+
<input class="form-check-input" type="checkbox" id="dpsCheckbox">
144+
<label class="form-check-label fs-5" for="dpsCheckbox">DPS</label>
145+
</div>
146+
</div>
134147
<table id="leaderboard"
135148
class="table table-responsive table-striped table-bordered flex-shrink-1 border border-5">
136149
</table>
137-
<p>🏪 The detailed model generation data and results are available at our page <a
150+
<p class="align-self-start">🏪 The detailed model generation data and results are available at our page <a
138151
href="https://github.com/evalplus/evalplus.github.io/tree/main/results/evalperf">repository</a>.</p>
139-
<p>💸 We use 50 samples (half) for o1 model series for cost saving; also because it's easy to sample desired
152+
<p class="align-self-start">💸 We use 50 samples (half) for o1 model series for cost saving; also because it's
153+
easy to sample desired
140154
amount of correct samples from strong models using less tries.</p>
141155

142156
<br>
@@ -218,10 +232,12 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
218232
</div>
219233

220234
<script>
221-
const contextTable = document.getElementById("leaderboard");
235+
const metricTable = document.getElementById("leaderboard");
222236
const linkMapping = new Map([]);
223237
const hfLinkPrefix = "https://huggingface.co/";
224238
const dataUrlPrefix = "results/evalperf";
239+
const passCheckBox = document.getElementById("passAt1Checkbox");
240+
const dpsCheckBox = document.getElementById("dpsCheckbox");
225241

226242
// Load data
227243
var data = null;
@@ -286,8 +302,8 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
286302
const theaders = [
287303
"#", // rank
288304
"Model", // model name
289-
// "DPS",
290-
// "pass@1",
305+
"DPS",
306+
"pass@1",
291307
"Task WR", // task winrate
292308
winrate_tag, // computed over the same set of passing solutions
293309
];
@@ -297,8 +313,17 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
297313
var headerRow = document.createElement("tr");
298314
// headers
299315
theaders.forEach(function (header) {
316+
if (header == "DPS" && !dpsCheckBox.checked) {
317+
return;
318+
}
319+
if (header == "pass@1" && !passCheckBox.checked) {
320+
return;
321+
}
300322
var th = document.createElement("th");
301323
th.classList.add("text-nowrap");
324+
if (header == "Model") {
325+
th.style.textAlign = "left";
326+
}
302327
th.textContent = header;
303328

304329
if (header == winrate_tag) {
@@ -358,38 +383,48 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
358383
modelLink.classList.add("link-underline-primary");
359384
modelLink.classList.add("text-nowrap");
360385
modelCell.appendChild(modelLink);
386+
modelCell.style.textAlign = "left";
361387
dataRow.appendChild(modelCell);
362388

363-
// dpsRow = document.createElement("td");
364-
// dpsRow.textContent = row.get("dps").toFixed(1);
365-
// dataRow.appendChild(dpsRow);
366-
// passRow = document.createElement("td");
367-
// passRow.textContent = row.get("pass@1").toFixed(1);
368-
// dataRow.appendChild(passRow);
389+
if (dpsCheckBox.checked) {
390+
dpsRow = document.createElement("td");
391+
dpsRow.textContent = row.get("dps").toFixed(1);
392+
dataRow.appendChild(dpsRow);
393+
}
394+
if (passCheckBox.checked) {
395+
passRow = document.createElement("td");
396+
passRow.textContent = row.get("pass@1").toFixed(1);
397+
dataRow.appendChild(passRow);
398+
}
369399

370400
taskWinRateRow = document.createElement("td");
371401
taskWinRateRow.textContent = (row.get('task_win_rate') * 100).toFixed(1);
402+
// center-align
372403
dataRow.appendChild(taskWinRateRow);
373404

374405

375406
modelWinRateRow = document.createElement("td");
376407
modelWinRateRow.textContent = (row.get('model_win_rate') * 100).toFixed(1);
377408
modelWinRateRow.style.backgroundColor = "#EEFFEE";
409+
// center-align
378410
dataRow.appendChild(modelWinRateRow);
379411
tbody.appendChild(dataRow);
380412
});
381413
table.appendChild(tbody);
382414
};
383415

384416
const clearTable = () => {
385-
contextTable.innerHTML = "";
417+
metricTable.innerHTML = "";
386418
};
387419

388420
const main = () => {
389421
clearTable();
390-
displayTable(contextTable);
422+
displayTable(metricTable);
391423
};
392424

425+
passCheckBox.addEventListener("change", main);
426+
dpsCheckBox.addEventListener("change", main);
427+
393428
main();
394429

395430
initializeHeatmap();

0 commit comments

Comments
 (0)