Skip to content

Commit 09622e0

Browse files
Sela TachnaiSela Tachnai
authored andcommitted
table fix
1 parent 07b9dbc commit 09622e0

18 files changed

+954
-872
lines changed

energy-leaderboard-web/public/data/manifest.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
"output_gemma3_4b_medium_26_11.json",
88
"output_gemma3_4b_mixed_02_12.json",
99
"output_gemma3_4b_mixed_26_11.json",
10+
"output_gpt-oss-120b_easy_16_12.json",
11+
"output_gpt-oss-120b_hard_16_12.json",
12+
"output_gpt-oss-120b_medium_16_12.json",
13+
"output_gpt-oss-120b_mixed_16_12.json",
14+
"output_gpt-oss-20b_easy_16_12.json",
15+
"output_gpt-oss-20b_easy_16_12_ollama.json",
16+
"output_gpt-oss-20b_hard_16_12.json",
17+
"output_gpt-oss-20b_hard_16_12_ollama.json",
18+
"output_gpt-oss-20b_medium_16_12.json",
19+
"output_gpt-oss-20b_medium_16_12_ollama.json",
20+
"output_gpt-oss-20b_mixed_16_12.json",
21+
"output_gpt-oss-20b_mixed_16_12_ollama.json",
1022
"output_gpt-oss_20b_easy_02_12.json",
1123
"output_gpt-oss_20b_hard_02_12.json",
1224
"output_gpt-oss_20b_medium_02_12.json",
@@ -23,6 +35,10 @@
2335
"output_llama3_8b_hard_26_11.json",
2436
"output_llama3_8b_medium_26_11.json",
2537
"output_llama3_8b_mixed_26_11.json",
38+
"output_nemotron-3-nano_easy_16_12_ollama.json",
39+
"output_nemotron-3-nano_hard_16_12_ollama.json",
40+
"output_nemotron-3-nano_medium_16_12_ollama.json",
41+
"output_nemotron-3-nano_mixed_16_12_ollama.json",
2642
"output_qwen3_8b_easy_02_12.json",
2743
"output_qwen3_8b_easy_26_11.json",
2844
"output_qwen3_8b_hard_02_12.json",

energy-leaderboard-web/public/data/output_gpt-oss-120b_easy_16_12.json

Lines changed: 56 additions & 56 deletions
Large diffs are not rendered by default.

energy-leaderboard-web/public/data/output_gpt-oss-120b_hard_16_12.json

Lines changed: 69 additions & 69 deletions
Large diffs are not rendered by default.

energy-leaderboard-web/public/data/output_gpt-oss-120b_medium_16_12.json

Lines changed: 57 additions & 57 deletions
Large diffs are not rendered by default.

energy-leaderboard-web/public/data/output_gpt-oss-120b_mixed_16_12.json

Lines changed: 62 additions & 62 deletions
Large diffs are not rendered by default.

energy-leaderboard-web/public/data/output_gpt-oss-20b_easy_16_12.json

Lines changed: 54 additions & 54 deletions
Large diffs are not rendered by default.

energy-leaderboard-web/public/data/output_gpt-oss-20b_easy_16_12_ollama.json

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@
1313
"region": "unknown",
1414
"notice": null,
1515
"sampling_ms": 100,
16-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
16+
"device_name": "AMD Radeon Graphics",
1717
"device_type": "amd",
1818
"os_name": "Linux",
1919
"os_version": "6.14.0-37-generic",
2020
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
21-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
21+
"gpu_model": "AMD Radeon Graphics",
2222
"ram_gb": 125.1,
2323
"chip_architecture": "x86_64",
2424
"testset_id": "ts1",
2525
"testset_name": "Short factual questions (low energy baseline)",
26-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
26+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
2727
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
2828
"question_id": "ts1_q1",
2929
"question_difficulty": "easy",
3030
"question_task_type": "qa",
31-
"expected_answer_description": "A one\u2011word answer naming Japan\u2019s capital (Tokyo).",
31+
"expected_answer_description": "A one‑word answer naming Japan’s capital (Tokyo).",
3232
"max_output_tokens_hint": 5,
3333
"energy_relevance": "Very short prompt and answer provide a baseline for minimal energy consumption.",
3434
"tags": [
@@ -54,17 +54,17 @@
5454
"region": "unknown",
5555
"notice": null,
5656
"sampling_ms": 100,
57-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
57+
"device_name": "AMD Radeon Graphics",
5858
"device_type": "amd",
5959
"os_name": "Linux",
6060
"os_version": "6.14.0-37-generic",
6161
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
62-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
62+
"gpu_model": "AMD Radeon Graphics",
6363
"ram_gb": 125.1,
6464
"chip_architecture": "x86_64",
6565
"testset_id": "ts1",
6666
"testset_name": "Short factual questions (low energy baseline)",
67-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
67+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
6868
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
6969
"question_id": "ts1_q2",
7070
"question_difficulty": "easy",
@@ -95,17 +95,17 @@
9595
"region": "unknown",
9696
"notice": null,
9797
"sampling_ms": 100,
98-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
98+
"device_name": "AMD Radeon Graphics",
9999
"device_type": "amd",
100100
"os_name": "Linux",
101101
"os_version": "6.14.0-37-generic",
102102
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
103-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
103+
"gpu_model": "AMD Radeon Graphics",
104104
"ram_gb": 125.1,
105105
"chip_architecture": "x86_64",
106106
"testset_id": "ts1",
107107
"testset_name": "Short factual questions (low energy baseline)",
108-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
108+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
109109
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
110110
"question_id": "ts1_q3",
111111
"question_difficulty": "easy",
@@ -136,22 +136,22 @@
136136
"region": "unknown",
137137
"notice": null,
138138
"sampling_ms": 100,
139-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
139+
"device_name": "AMD Radeon Graphics",
140140
"device_type": "amd",
141141
"os_name": "Linux",
142142
"os_version": "6.14.0-37-generic",
143143
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
144-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
144+
"gpu_model": "AMD Radeon Graphics",
145145
"ram_gb": 125.1,
146146
"chip_architecture": "x86_64",
147147
"testset_id": "ts1",
148148
"testset_name": "Short factual questions (low energy baseline)",
149-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
149+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
150150
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
151151
"question_id": "ts1_q4",
152152
"question_difficulty": "easy",
153153
"question_task_type": "qa",
154-
"expected_answer_description": "The two\u2011letter chemical formula (H2O).",
154+
"expected_answer_description": "The two‑letter chemical formula (H2O).",
155155
"max_output_tokens_hint": 5,
156156
"energy_relevance": "Short chemical knowledge question; minimal output.",
157157
"tags": [
@@ -164,7 +164,7 @@
164164
"g_co2": 0.017527
165165
},
166166
{
167-
"prompt": "Who wrote the play \u2018Romeo and Juliet\u2019?",
167+
"prompt": "Who wrote the play ‘Romeo and Juliet?",
168168
"completion": "",
169169
"tokens_prompt": 78,
170170
"tokens_completion": 61,
@@ -177,22 +177,22 @@
177177
"region": "unknown",
178178
"notice": null,
179179
"sampling_ms": 100,
180-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
180+
"device_name": "AMD Radeon Graphics",
181181
"device_type": "amd",
182182
"os_name": "Linux",
183183
"os_version": "6.14.0-37-generic",
184184
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
185-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
185+
"gpu_model": "AMD Radeon Graphics",
186186
"ram_gb": 125.1,
187187
"chip_architecture": "x86_64",
188188
"testset_id": "ts1",
189189
"testset_name": "Short factual questions (low energy baseline)",
190-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
190+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
191191
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
192192
"question_id": "ts1_q5",
193193
"question_difficulty": "easy",
194194
"question_task_type": "qa",
195-
"expected_answer_description": "The author\u2019s name (William Shakespeare).",
195+
"expected_answer_description": "The author’s name (William Shakespeare).",
196196
"max_output_tokens_hint": 6,
197197
"energy_relevance": "Simple fact retrieval; low energy.",
198198
"tags": [
@@ -218,22 +218,22 @@
218218
"region": "unknown",
219219
"notice": null,
220220
"sampling_ms": 100,
221-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
221+
"device_name": "AMD Radeon Graphics",
222222
"device_type": "amd",
223223
"os_name": "Linux",
224224
"os_version": "6.14.0-37-generic",
225225
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
226-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
226+
"gpu_model": "AMD Radeon Graphics",
227227
"ram_gb": 125.1,
228228
"chip_architecture": "x86_64",
229229
"testset_id": "ts1",
230230
"testset_name": "Short factual questions (low energy baseline)",
231-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
231+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
232232
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
233233
"question_id": "ts1_q6",
234234
"question_difficulty": "easy",
235235
"question_task_type": "qa",
236-
"expected_answer_description": "A four\u2011digit year (1969).",
236+
"expected_answer_description": "A four‑digit year (1969).",
237237
"max_output_tokens_hint": 5,
238238
"energy_relevance": "Short question with numeric answer; minimal compute.",
239239
"tags": [
@@ -259,22 +259,22 @@
259259
"region": "unknown",
260260
"notice": null,
261261
"sampling_ms": 100,
262-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
262+
"device_name": "AMD Radeon Graphics",
263263
"device_type": "amd",
264264
"os_name": "Linux",
265265
"os_version": "6.14.0-37-generic",
266266
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
267-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
267+
"gpu_model": "AMD Radeon Graphics",
268268
"ram_gb": 125.1,
269269
"chip_architecture": "x86_64",
270270
"testset_id": "ts1",
271271
"testset_name": "Short factual questions (low energy baseline)",
272-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
272+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
273273
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
274274
"question_id": "ts1_q7",
275275
"question_difficulty": "easy",
276276
"question_task_type": "qa",
277-
"expected_answer_description": "The element\u2019s name (Carbon).",
277+
"expected_answer_description": "The element’s name (Carbon).",
278278
"max_output_tokens_hint": 5,
279279
"energy_relevance": "Simple factual recall; low energy cost.",
280280
"tags": [
@@ -300,24 +300,24 @@
300300
"region": "unknown",
301301
"notice": null,
302302
"sampling_ms": 100,
303-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
303+
"device_name": "AMD Radeon Graphics",
304304
"device_type": "amd",
305305
"os_name": "Linux",
306306
"os_version": "6.14.0-37-generic",
307307
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
308-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
308+
"gpu_model": "AMD Radeon Graphics",
309309
"ram_gb": 125.1,
310310
"chip_architecture": "x86_64",
311311
"testset_id": "ts1",
312312
"testset_name": "Short factual questions (low energy baseline)",
313-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
313+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
314314
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
315315
"question_id": "ts1_q8",
316316
"question_difficulty": "easy",
317317
"question_task_type": "qa",
318318
"expected_answer_description": "A single integer (7).",
319319
"max_output_tokens_hint": 4,
320-
"energy_relevance": "Basic arithmetic with one\u2011word output.",
320+
"energy_relevance": "Basic arithmetic with one‑word output.",
321321
"tags": [
322322
"math",
323323
"numeric",
@@ -341,17 +341,17 @@
341341
"region": "unknown",
342342
"notice": null,
343343
"sampling_ms": 100,
344-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
344+
"device_name": "AMD Radeon Graphics",
345345
"device_type": "amd",
346346
"os_name": "Linux",
347347
"os_version": "6.14.0-37-generic",
348348
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
349-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
349+
"gpu_model": "AMD Radeon Graphics",
350350
"ram_gb": 125.1,
351351
"chip_architecture": "x86_64",
352352
"testset_id": "ts1",
353353
"testset_name": "Short factual questions (low energy baseline)",
354-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
354+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
355355
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
356356
"question_id": "ts1_q9",
357357
"question_difficulty": "easy",
@@ -382,24 +382,24 @@
382382
"region": "unknown",
383383
"notice": null,
384384
"sampling_ms": 100,
385-
"device_name": "Linux with GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
385+
"device_name": "AMD Radeon Graphics",
386386
"device_type": "amd",
387387
"os_name": "Linux",
388388
"os_version": "6.14.0-37-generic",
389389
"cpu_model": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
390-
"gpu_model": "GPU[0]\t\t: Card Series: \t\tAMD Radeon Graphics",
390+
"gpu_model": "AMD Radeon Graphics",
391391
"ram_gb": 125.1,
392392
"chip_architecture": "x86_64",
393393
"testset_id": "ts1",
394394
"testset_name": "Short factual questions (low energy baseline)",
395-
"testset_goal": "Establish a low\u2011energy baseline by asking simple factual or arithmetic questions that require short, single\u2011sentence answers.",
395+
"testset_goal": "Establish a low‑energy baseline by asking simple factual or arithmetic questions that require short, single‑sentence answers.",
396396
"testset_notes": "Use this set to measure the minimum energy cost of a local LLM under controlled conditions. Each prompt is concise, and the expected output is a short factual answer with minimal reasoning. Run multiple models on the same hardware and compare average energy per response.",
397397
"question_id": "ts1_q10",
398398
"question_difficulty": "easy",
399399
"question_task_type": "qa",
400400
"expected_answer_description": "The resulting secondary color (purple or violet).",
401401
"max_output_tokens_hint": 6,
402-
"energy_relevance": "Simple fact with one\u2011word answer to establish baseline energy usage.",
402+
"energy_relevance": "Simple fact with one‑word answer to establish baseline energy usage.",
403403
"tags": [
404404
"color-mixing",
405405
"factual",

0 commit comments

Comments
 (0)