File tree Expand file tree Collapse file tree 4 files changed +241
-0
lines changed
Llama-3.3-70B-Instruct-FP8-dynamic/accuracy
Llama-3.3-70B-Instruct-quantized.w4a16/accuracy
Llama-3.3-70B-Instruct-quantized.w8a8/accuracy
meta-llama/Llama-3.3-70B-Instruct/accuracy Expand file tree Collapse file tree 4 files changed +241
-0
lines changed Original file line number Diff line number Diff line change
1
+ tasks :
2
+ - name : arc_challenge
3
+ metrics :
4
+ - name : acc_norm,none
5
+ value : 0.5196
6
+
7
+ - name : gsm8k
8
+ metrics :
9
+ - name : exact_match,strict-match
10
+ value : 0.9492
11
+
12
+ - name : hellaswag
13
+ metrics :
14
+ - name : acc_norm,none
15
+ value : 0.8643
16
+
17
+ - name : mmlu
18
+ metrics :
19
+ - name : acc,none
20
+ value : 0.8131
21
+
22
+ - name : truthfulqa_mc2
23
+ metrics :
24
+ - name : acc,none
25
+ value : 0.6321
26
+
27
+ - name : winogrande
28
+ metrics :
29
+ - name : acc,none
30
+ value : 0.8453
31
+
32
+ # following are placeholders for mid-level "leaderboard_*" tasks
33
+ # (OpenLLM v2) waiting for info on how to calculate the metric
34
+ # values from the individual sub tasks.
35
+
36
+ # - name: leaderboard_ifeval
37
+ # metrics:
38
+ # - name: inst_level_strict_acc,none
39
+ # value: 0.9092
40
+
41
+ # - name: leaderboard_bbh
42
+ # metrics:
43
+ # - name: acc-norm,none
44
+ # value: 0.6284
45
+
46
+ # TODO: need to identify if this is available
47
+ # - name: leaderboard_math_v_5
48
+ # metrics:
49
+ # - name: exact_match,none
50
+ # value: 0.0033
51
+
52
+ # - name: leaderboard_gpqa
53
+ # metrics:
54
+ # - name: acc-norm,none
55
+ # value: 0.463
56
+
57
+ # - name: leaderboard_musr
58
+ # metrics:
59
+ # - name: acc-norm,none
60
+ # value: 0.4396
61
+
62
+ # - name: leaderboard_mmlu_pro
63
+ # metrics:
64
+ # - name: acc,none
65
+ # value: 0.515
66
+
67
+ # - name: humaneval
68
+ # metrics:
69
+ # - name: exact_match,none
70
+ # value: 0.837
Original file line number Diff line number Diff line change
1
+ tasks :
2
+ - name : arc_challenge
3
+ metrics :
4
+ - name : acc_norm,none
5
+ value : 0.4949
6
+
7
+ - name : gsm8k
8
+ metrics :
9
+ - name : exact_match,strict-match
10
+ value : 0.9447
11
+
12
+ - name : hellaswag
13
+ metrics :
14
+ - name : acc_norm,none
15
+ value : 0.8597
16
+
17
+ - name : mmlu
18
+ metrics :
19
+ - name : acc,none
20
+ value : 0.8062
21
+
22
+ - name : truthfulqa_mc2
23
+ metrics :
24
+ - name : acc,none
25
+ value : 0.6166
26
+
27
+ # not available in model card as of 20250417
28
+ # - name: winogrande
29
+ # metrics:
30
+ # - name: acc,none
31
+ # value: 0.8058
Original file line number Diff line number Diff line change
1
+ tasks :
2
+ - name : arc_challenge
3
+ metrics :
4
+ - name : acc_norm,none
5
+ value : 0.4804
6
+
7
+ - name : gsm8k
8
+ metrics :
9
+ - name : exact_match,strict-match
10
+ value : 0.9401
11
+
12
+ - name : hellaswag
13
+ metrics :
14
+ - name : acc_norm,none
15
+ value : 0.8647
16
+
17
+ - name : mmlu
18
+ metrics :
19
+ - name : acc,none
20
+ value : 0.8119
21
+
22
+ - name : truthfulqa_mc2
23
+ metrics :
24
+ - name : acc,none
25
+ value : 0.6309
26
+
27
+ - name : winogrande
28
+ metrics :
29
+ - name : acc,none
30
+ value : 0.8374
31
+
32
+ # following are placeholders for mid-level "leaderboard_*" tasks
33
+ # (OpenLLM v2) waiting for info on how to calculate the metric
34
+ # values from the individual sub tasks.
35
+
36
+ # - name: leaderboard_ifeval
37
+ # metrics:
38
+ # - name: inst_level_strict_acc,none
39
+ # value: 0.9068
40
+
41
+ # - name: leaderboard_bbh
42
+ # metrics:
43
+ # - name: acc-norm,none
44
+ # value: 0.6254
45
+
46
+ # TODO: need to identify if this is available
47
+ # - name: leaderboard_math_v_5
48
+ # metrics:
49
+ # - name: exact_match,none
50
+ # value: 0
51
+
52
+ # - name: leaderboard_gpqa
53
+ # metrics:
54
+ # - name: acc-norm,none
55
+ # value: 0.4644
56
+
57
+ # - name: leaderboard_musr
58
+ # metrics:
59
+ # - name: acc-norm,none
60
+ # value: 0.4434
61
+
62
+ # - name: leaderboard_mmlu_pro
63
+ # metrics:
64
+ # - name: acc,none
65
+ # value: 0.5159
66
+
67
+ # - name: humaneval
68
+ # metrics:
69
+ # - name: exact_match,none
70
+ # value: 0.833
Original file line number Diff line number Diff line change
1
+ tasks :
2
+ - name : arc_challenge
3
+ metrics :
4
+ - name : acc_norm,none
5
+ value : 0.4923
6
+
7
+ - name : gsm8k
8
+ metrics :
9
+ - name : exact_match,strict-match
10
+ value : 0.9416
11
+
12
+ - name : hellaswag
13
+ metrics :
14
+ - name : acc_norm,none
15
+ value : 0.8649
16
+
17
+ - name : mmlu
18
+ metrics :
19
+ - name : acc,none
20
+ value : 0.816
21
+
22
+ - name : truthfulqa_mc2
23
+ metrics :
24
+ - name : acc,none
25
+ value : 0.6275
26
+
27
+ - name : winogrande
28
+ metrics :
29
+ - name : acc,none
30
+ value : 0.8477
31
+
32
+ # following are placeholders for mid-level "leaderboard_*" tasks
33
+ # (OpenLLM v2) waiting for info on how to calculate the metric
34
+ # values from the individual sub tasks.
35
+
36
+ # - name: leaderboard_ifeval
37
+ # metrics:
38
+ # - name: inst_level_strict_acc,none
39
+ # value: 0.9089
40
+
41
+ # - name: leaderboard_bbh
42
+ # metrics:
43
+ # - name: acc-norm,none
44
+ # value: 0.6315
45
+
46
+ # TODO: need to identify if this is available
47
+ # - name: leaderboard_math_v_5
48
+ # metrics:
49
+ # - name: exact_match,none
50
+ # value: 0.0017
51
+
52
+ # - name: leaderboard_gpqa
53
+ # metrics:
54
+ # - name: acc-norm,none
55
+ # value: 0.461
56
+
57
+ # - name: leaderboard_musr
58
+ # metrics:
59
+ # - name: acc-norm,none
60
+ # value: 0.4435
61
+
62
+ # - name: leaderboard_mmlu_pro
63
+ # metrics:
64
+ # - name: acc,none
65
+ # value: 0.5189
66
+
67
+ # - name: humaneval
68
+ # metrics:
69
+ # - name: exact_match,none
70
+ # value: 0.832
You can’t perform that action at this time.
0 commit comments