1+ {
2+ "_submission_hash" : " af5811b3e9bf9e0f8e5967898ce8e61b48772c2c267d10f5f6a47aa3f6221a67" ,
3+ "_results_hash" : " 9fec365a2e7272ddaeaa0c93d9a792b07bb371ab61ce58f8a183f701d2118905" ,
4+ "metadata" : {
5+ "name" : " Llama3.3 70b distilled DS - 32k ss" ,
6+ "authors" : " SambaNova Systems" ,
7+ "url" : null ,
8+ "citation" : " " ,
9+ "type" : " PROMPT" ,
10+ "context" : " 32,000" ,
11+ "is_trained_for_function_calling" : false ,
12+ "details" : " Additional model details (e.g. API model revision or Hugging Face model ID) - optional"
13+ },
14+ "closedbook" : {
15+ "acc" : {
16+ "loose" : 0.5146399381846491 ,
17+ "strict" : 0.11049723756906077
18+ },
19+ "rouge" : {
20+ "rouge1" : {
21+ "precision" : 0.39506324834246265 ,
22+ "recall" : 0.5981784417572024 ,
23+ "fscore" : 0.44362514027396305
24+ },
25+ "rouge2" : {
26+ "precision" : 0.2185731210859842 ,
27+ "recall" : 0.3152650236948555 ,
28+ "fscore" : 0.24433595958015872
29+ },
30+ "rougeL" : {
31+ "precision" : 0.3279649663747149 ,
32+ "recall" : 0.501721775810348 ,
33+ "fscore" : 0.36860975010534547
34+ }
35+ },
36+ "bleurt" : 0.4949774522107938 ,
37+ "gpt" : 0.19337016574585636
38+ },
39+ "openbook" : {
40+ "acc" : {
41+ "loose" : 0.5589082340608355 ,
42+ "strict" : 0.12569060773480664
43+ },
44+ "rouge" : {
45+ "rouge1" : {
46+ "precision" : 0.4648666571003417 ,
47+ "recall" : 0.6315235877039482 ,
48+ "fscore" : 0.5025006057388298
49+ },
50+ "rouge2" : {
51+ "precision" : 0.2666021904005115 ,
52+ "recall" : 0.35322076393768814 ,
53+ "fscore" : 0.2890946053464048
54+ },
55+ "rougeL" : {
56+ "precision" : 0.3952035318721278 ,
57+ "recall" : 0.5424450065079786 ,
58+ "fscore" : 0.4286490181909285
59+ }
60+ },
61+ "bleurt" : 0.5296848898660906 ,
62+ "gpt" : 0.27624309392265195
63+ },
64+ "evidenceprovided" : {
65+ "acc" : {
66+ "loose" : 0.6585387888500631 ,
67+ "strict" : 0.17955801104972377
68+ },
69+ "rouge" : {
70+ "rouge1" : {
71+ "precision" : 0.5389617314855132 ,
72+ "recall" : 0.7310799636512487 ,
73+ "fscore" : 0.5854004243809355
74+ },
75+ "rouge2" : {
76+ "precision" : 0.3247304534580084 ,
77+ "recall" : 0.4308209051742917 ,
78+ "fscore" : 0.35338011510338374
79+ },
80+ "rougeL" : {
81+ "precision" : 0.45586973408819154 ,
82+ "recall" : 0.6202056803486834 ,
83+ "fscore" : 0.4946168890142627
84+ }
85+ },
86+ "bleurt" : 0.5824599785431808 ,
87+ "gpt" : 0.4129834254143646
88+ }
89+ }
0 commit comments