Skip to content

Commit 4b2e910

Browse files
author
github-actions
committed
leaderboard: add eval results
1 parent 19781ef commit 4b2e910

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
{
2+
"_submission_hash": "af5811b3e9bf9e0f8e5967898ce8e61b48772c2c267d10f5f6a47aa3f6221a67",
3+
"_results_hash": "9fec365a2e7272ddaeaa0c93d9a792b07bb371ab61ce58f8a183f701d2118905",
4+
"metadata": {
5+
"name": "Llama3.3 70b distilled DS - 32k ss",
6+
"authors": "SambaNova Systems",
7+
"url": null,
8+
"citation": "",
9+
"type": "PROMPT",
10+
"context": "32,000",
11+
"is_trained_for_function_calling": false,
12+
"details": "Additional model details (e.g. API model revision or Hugging Face model ID) - optional"
13+
},
14+
"closedbook": {
15+
"acc": {
16+
"loose": 0.5146399381846491,
17+
"strict": 0.11049723756906077
18+
},
19+
"rouge": {
20+
"rouge1": {
21+
"precision": 0.39506324834246265,
22+
"recall": 0.5981784417572024,
23+
"fscore": 0.44362514027396305
24+
},
25+
"rouge2": {
26+
"precision": 0.2185731210859842,
27+
"recall": 0.3152650236948555,
28+
"fscore": 0.24433595958015872
29+
},
30+
"rougeL": {
31+
"precision": 0.3279649663747149,
32+
"recall": 0.501721775810348,
33+
"fscore": 0.36860975010534547
34+
}
35+
},
36+
"bleurt": 0.4949774522107938,
37+
"gpt": 0.19337016574585636
38+
},
39+
"openbook": {
40+
"acc": {
41+
"loose": 0.5589082340608355,
42+
"strict": 0.12569060773480664
43+
},
44+
"rouge": {
45+
"rouge1": {
46+
"precision": 0.4648666571003417,
47+
"recall": 0.6315235877039482,
48+
"fscore": 0.5025006057388298
49+
},
50+
"rouge2": {
51+
"precision": 0.2666021904005115,
52+
"recall": 0.35322076393768814,
53+
"fscore": 0.2890946053464048
54+
},
55+
"rougeL": {
56+
"precision": 0.3952035318721278,
57+
"recall": 0.5424450065079786,
58+
"fscore": 0.4286490181909285
59+
}
60+
},
61+
"bleurt": 0.5296848898660906,
62+
"gpt": 0.27624309392265195
63+
},
64+
"evidenceprovided": {
65+
"acc": {
66+
"loose": 0.6585387888500631,
67+
"strict": 0.17955801104972377
68+
},
69+
"rouge": {
70+
"rouge1": {
71+
"precision": 0.5389617314855132,
72+
"recall": 0.7310799636512487,
73+
"fscore": 0.5854004243809355
74+
},
75+
"rouge2": {
76+
"precision": 0.3247304534580084,
77+
"recall": 0.4308209051742917,
78+
"fscore": 0.35338011510338374
79+
},
80+
"rougeL": {
81+
"precision": 0.45586973408819154,
82+
"recall": 0.6202056803486834,
83+
"fscore": 0.4946168890142627
84+
}
85+
},
86+
"bleurt": 0.5824599785431808,
87+
"gpt": 0.4129834254143646
88+
}
89+
}

0 commit comments

Comments
 (0)