Skip to content

Commit 60705c6

Browse files
authored
add mistralai/Mistral-Small-24B-Instruct-2501 model accuracy (#9)
1 parent 79ba6a5 commit 60705c6

File tree

4 files changed

+213
-0
lines changed

4 files changed

+213
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# llm-eval-test configs for # storage configs for https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
2+
model: "vllm"
3+
model_args:
4+
pretrained: "mistralai/Mistral-Small-24B-Instruct-2501"
5+
num_fewshot:
6+
apply_chat_template: true
7+
fewshot_as_multiturn: true
8+
add_bos_token: false
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# server configs for https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
2+
model: "mistralai/Mistral-Small-24B-Instruct-2501"
3+
trust-remote-code: true
4+
enable-chunked-prefill: true
5+
max-model-len: 4096
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# collected vllm v0.8.3.post1 on k8s-a100-duo
2+
tasks:
3+
- name: leaderboard_math_algebra_hard
4+
metrics:
5+
- name: exact_match,none
6+
value: 0.703
7+
8+
- name: leaderboard_math_counting_and_prob_hard
9+
metrics:
10+
- name: exact_match,none
11+
value: 0.489
12+
13+
- name: leaderboard_math_geometry_hard
14+
metrics:
15+
- name: exact_match,none
16+
value: 0.366
17+
18+
- name: leaderboard_math_intermediate_algebra_hard
19+
metrics:
20+
- name: exact_match,none
21+
value: 0.283
22+
23+
- name: leaderboard_math_num_theory_hard
24+
metrics:
25+
- name: exact_match,none
26+
value: 0.476
27+
28+
- name: leaderboard_math_prealgebra_hard
29+
metrics:
30+
- name: exact_match,none
31+
value: 0.695
32+
33+
- name: leaderboard_math_precalculus_hard
34+
metrics:
35+
- name: exact_match,none
36+
value: 0.355
37+
38+
- name: leaderboard_bbh_boolean_expressions
39+
metrics:
40+
- name: acc_norm,none
41+
value: 0.876
42+
43+
- name: leaderboard_bbh_causal_judgement
44+
metrics:
45+
- name: acc_norm,none
46+
value: 0.652
47+
48+
- name: leaderboard_bbh_date_understanding
49+
metrics:
50+
- name: acc_norm,none
51+
value: 0.796
52+
53+
- name: leaderboard_bbh_disambiguation_qa
54+
metrics:
55+
- name: acc_norm,none
56+
value: 0.696
57+
58+
- name: leaderboard_bbh_formal_fallacies
59+
metrics:
60+
- name: acc_norm,none
61+
value: 0.684
62+
63+
- name: leaderboard_bbh_geometric_shapes
64+
metrics:
65+
- name: acc_norm,none
66+
value: 0.508
67+
68+
- name: leaderboard_bbh_hyperbaton
69+
metrics:
70+
- name: acc_norm,none
71+
value: 0.78
72+
73+
- name: leaderboard_bbh_logical_deduction_five_objects
74+
metrics:
75+
- name: acc_norm,none
76+
value: 0.632
77+
78+
- name: leaderboard_bbh_logical_deduction_seven_objects
79+
metrics:
80+
- name: acc_norm,none
81+
value: 0.636
82+
83+
- name: leaderboard_bbh_logical_deduction_three_objects
84+
metrics:
85+
- name: acc_norm,none
86+
value: 0.876
87+
88+
- name: leaderboard_bbh_movie_recommendation
89+
metrics:
90+
- name: acc_norm,none
91+
value: 0.848
92+
93+
- name: leaderboard_bbh_navigate
94+
metrics:
95+
- name: acc_norm,none
96+
value: 0.688
97+
98+
- name: leaderboard_bbh_object_counting
99+
metrics:
100+
- name: acc_norm,none
101+
value: 0.42
102+
103+
- name: leaderboard_bbh_penguins_in_a_table
104+
metrics:
105+
- name: acc_norm,none
106+
value: 0.767
107+
108+
- name: leaderboard_bbh_reasoning_about_colored_objects
109+
metrics:
110+
- name: acc_norm,none
111+
value: 0.764
112+
113+
- name: leaderboard_bbh_ruin_names
114+
metrics:
115+
- name: acc_norm,none
116+
value: 0.868
117+
118+
- name: leaderboard_bbh_salient_translation_error_detection
119+
metrics:
120+
- name: acc_norm,none
121+
value: 0.684
122+
123+
- name: leaderboard_bbh_snarks
124+
metrics:
125+
- name: acc_norm,none
126+
value: 0.725
127+
128+
- name: leaderboard_bbh_sports_understanding
129+
metrics:
130+
- name: acc_norm,none
131+
value: 0.836
132+
133+
- name: leaderboard_bbh_temporal_sequences
134+
metrics:
135+
- name: acc_norm,none
136+
value: 0.984
137+
138+
- name: leaderboard_bbh_tracking_shuffled_objects_five_objects
139+
metrics:
140+
- name: acc_norm,none
141+
value: 0.288
142+
143+
- name: leaderboard_bbh_tracking_shuffled_objects_seven_objects
144+
metrics:
145+
- name: acc_norm,none
146+
value: 0.224
147+
148+
- name: leaderboard_bbh_tracking_shuffled_objects_three_objects
149+
metrics:
150+
- name: acc_norm,none
151+
value: 0.348
152+
153+
- name: leaderboard_bbh_web_of_lies
154+
metrics:
155+
- name: acc_norm,none
156+
value: 0.52
157+
158+
- name: leaderboard_gpqa_diamond
159+
metrics:
160+
- name: acc_norm,none
161+
value: 0.399
162+
163+
- name: leaderboard_gpqa_extended
164+
metrics:
165+
- name: acc_norm,none
166+
value: 0.405
167+
168+
- name: leaderboard_gpqa_main
169+
metrics:
170+
- name: acc_norm,none
171+
value: 0.393
172+
173+
- name: leaderboard_musr_murder_mysteries
174+
metrics:
175+
- name: acc_norm,none
176+
value: 0.556
177+
178+
- name: leaderboard_musr_object_placements
179+
metrics:
180+
- name: acc_norm,none
181+
value: 0.437
182+
183+
- name: leaderboard_musr_team_allocation
184+
metrics:
185+
- name: acc_norm,none
186+
value: 0.404
187+
188+
- name: leaderboard_ifeval
189+
metrics:
190+
- name: prompt_level_strict_acc,none
191+
value: 0.582
192+
- name: prompt_level_loose_acc,none
193+
value: 0.647
194+
- name: inst_level_loose_acc,none
195+
value: 0.748
196+
- name: inst_level_strict_acc,none
197+
value: 0.693
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# storage configs for https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
2+
model: hf
3+
data: hf

0 commit comments

Comments
 (0)