2
2
# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
3
3
4
4
import argparse
5
+ import errno , shutil
6
+ import glob
5
7
import os
6
8
from pathlib import Path
7
- import glob
8
- import shutil , errno
9
+
9
10
import yaml
10
- from datasets import load_dataset ,Dataset
11
+ from datasets import Dataset , load_dataset
12
+
11
13
12
14
# get the ifeval from the evals dataset and join it with the original ifeval datasets
13
- def get_ifeval_data (model_name ,output_dir ):
15
+ def get_ifeval_data (model_name , output_dir ):
14
16
print (f"preparing the ifeval data using { model_name } 's evals dataset" )
15
- if model_name not in ["Meta-Llama-3.1-8B-Instruct" ,"Meta-Llama-3.1-70B-Instruct" ,"Meta-Llama-3.1-405B-Instruct" ]:
16
- raise ValueError ("Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for IFEval" )
17
+ if model_name not in [
18
+ "Meta-Llama-3.1-8B-Instruct" ,
19
+ "Meta-Llama-3.1-70B-Instruct" ,
20
+ "Meta-Llama-3.1-405B-Instruct" ,
21
+ ]:
22
+ raise ValueError (
23
+ "Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for IFEval"
24
+ )
17
25
original_dataset_name = "wis-k/instruction-following-eval"
18
26
meta_dataset_name = f"meta-llama/{ model_name } -evals"
19
27
meta_data = load_dataset (
20
28
meta_dataset_name ,
21
29
name = f"{ model_name } -evals__ifeval__strict__details" ,
22
- split = "latest"
23
- )
24
- ifeval_data = load_dataset (
25
- original_dataset_name ,
26
- split = "train"
27
- )
30
+ split = "latest" ,
31
+ )
32
+ ifeval_data = load_dataset (original_dataset_name , split = "train" )
28
33
meta_data = meta_data .map (get_question )
29
34
meta_df = meta_data .to_pandas ()
30
35
ifeval_df = ifeval_data .to_pandas ()
31
36
ifeval_df = ifeval_df .rename (columns = {"prompt" : "input_question" })
32
37
# join the two datasets on the input_question column
33
- joined = meta_df .join (ifeval_df .set_index (' input_question' ), on = "input_question" )
38
+ joined = meta_df .join (ifeval_df .set_index (" input_question" ), on = "input_question" )
34
39
joined = joined .rename (columns = {"input_final_prompts" : "prompt" })
35
40
joined = joined .rename (columns = {"is_correct" : "previous_is_correct" })
36
41
joined = Dataset .from_pandas (joined )
37
- joined = joined .select_columns (["input_question" , "prompt" , "previous_is_correct" ,"instruction_id_list" ,"kwargs" ,"output_prediction_text" ,"key" ])
38
- joined .rename_column ("output_prediction_text" ,"previous_output_prediction_text" )
42
+ joined = joined .select_columns (
43
+ [
44
+ "input_question" ,
45
+ "prompt" ,
46
+ "previous_is_correct" ,
47
+ "instruction_id_list" ,
48
+ "kwargs" ,
49
+ "output_prediction_text" ,
50
+ "key" ,
51
+ ]
52
+ )
53
+ joined .rename_column ("output_prediction_text" , "previous_output_prediction_text" )
39
54
joined .to_parquet (output_dir + f"/joined_ifeval.parquet" )
40
55
56
+
41
57
# get the math_hard data from the evals dataset and join it with the original math_hard dataset
42
- def get_math_data (model_name ,output_dir ):
58
+ def get_math_data (model_name , output_dir ):
43
59
print (f"preparing the math data using { model_name } 's evals dataset" )
44
- if model_name not in ["Meta-Llama-3.1-8B-Instruct" ,"Meta-Llama-3.1-70B-Instruct" ,"Meta-Llama-3.1-405B-Instruct" ]:
45
- raise ValueError ("Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for MATH_hard" )
60
+ if model_name not in [
61
+ "Meta-Llama-3.1-8B-Instruct" ,
62
+ "Meta-Llama-3.1-70B-Instruct" ,
63
+ "Meta-Llama-3.1-405B-Instruct" ,
64
+ ]:
65
+ raise ValueError (
66
+ "Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for MATH_hard"
67
+ )
46
68
original_dataset_name = "lighteval/MATH-Hard"
47
69
meta_dataset_name = f"meta-llama/{ model_name } -evals"
48
70
meta_data = load_dataset (
49
71
meta_dataset_name ,
50
72
name = f"{ model_name } -evals__math_hard__details" ,
51
- split = "latest"
52
- )
53
- math_data = load_dataset (
54
- original_dataset_name ,
55
- split = "test"
56
- )
73
+ split = "latest" ,
74
+ )
75
+ math_data = load_dataset (original_dataset_name , split = "test" )
57
76
meta_df = meta_data .to_pandas ()
58
77
math_df = math_data .to_pandas ()
59
78
math_df = math_df .rename (columns = {"problem" : "input_question" })
60
79
# join the two datasets on the input_question column
61
- joined = meta_df .join (math_df .set_index (' input_question' ), on = "input_question" )
80
+ joined = meta_df .join (math_df .set_index (" input_question" ), on = "input_question" )
62
81
joined = Dataset .from_pandas (joined )
63
- joined = joined .select_columns (["input_question" , "input_correct_responses" , "input_final_prompts" , "is_correct" ,"solution" ,"output_prediction_text" ])
64
- joined = joined .rename_column ("is_correct" ,"previous_is_correct" )
65
- joined = joined .rename_column ("output_prediction_text" ,"previous_output_prediction_text" )
82
+ joined = joined .select_columns (
83
+ [
84
+ "input_question" ,
85
+ "input_correct_responses" ,
86
+ "input_final_prompts" ,
87
+ "is_correct" ,
88
+ "solution" ,
89
+ "output_prediction_text" ,
90
+ ]
91
+ )
92
+ joined = joined .rename_column ("is_correct" , "previous_is_correct" )
93
+ joined = joined .rename_column (
94
+ "output_prediction_text" , "previous_output_prediction_text"
95
+ )
66
96
67
97
joined .to_parquet (output_dir + f"/joined_math.parquet" )
68
98
69
- # get the question from the ifeval dataset
99
+
100
+ # get the question from the ifeval dataset
70
101
def get_question (example ):
71
102
try :
72
- example ["input_question" ] = eval (example ["input_question" ].replace ("null" ,"None" ).replace ("true" ,"True" ).replace ("false" ,"False" ))["dialog" ][0 ]["body" ].replace ("Is it True that the first song" ,"Is it true that the first song" ).replace ("Is the following True" ,"Is the following true" )
103
+ example ["input_question" ] = (
104
+ eval (
105
+ example ["input_question" ]
106
+ .replace ("null" , "None" )
107
+ .replace ("true" , "True" )
108
+ .replace ("false" , "False" )
109
+ )["dialog" ][0 ]["body" ]
110
+ .replace ("Is it True that the first song" , "Is it true that the first song" )
111
+ .replace ("Is the following True" , "Is the following true" )
112
+ )
73
113
example ["input_final_prompts" ] = example ["input_final_prompts" ][0 ]
74
114
return example
75
115
except :
76
116
print (example ["input_question" ])
77
117
return
78
118
119
+
79
120
# change the yaml file to use the correct model name
80
121
def change_yaml (args , base_name ):
81
- for yaml_file in glob .glob (args .template_dir + ' **/*/*.yaml' , recursive = True ):
122
+ for yaml_file in glob .glob (args .template_dir + " **/*/*.yaml" , recursive = True ):
82
123
with open (yaml_file , "r" ) as sources :
83
124
lines = sources .readlines ()
84
- output_path = yaml_file .replace (args .template_dir ,args .work_dir )
125
+ output_path = yaml_file .replace (args .template_dir , args .work_dir )
85
126
print (f"changing { yaml_file } to output_path: { output_path } " )
86
127
path = Path (output_path )
87
128
yaml_dir = path .parent
88
129
with open (output_path , "w" ) as output :
89
130
for line in lines :
90
- output .write (line .replace ("Meta-Llama-3.1-8B" ,base_name ).replace ("WORK_DIR" ,str (yaml_dir )))
131
+ output .write (
132
+ line .replace ("Meta-Llama-3.1-8B" , base_name ).replace (
133
+ "WORK_DIR" , str (yaml_dir )
134
+ )
135
+ )
136
+
91
137
92
138
# copy the files and change the yaml file to use the correct model name
93
139
def copy_and_prepare (args ):
94
140
if not os .path .exists (args .work_dir ):
95
141
# Copy the all files, including yaml files and python files, from template folder to the work folder
96
142
97
- copy_dir (args .template_dir ,args .work_dir )
143
+ copy_dir (args .template_dir , args .work_dir )
98
144
else :
99
145
print ("work_dir already exists, no need to copy files" )
100
146
# Use the template yaml to get the correct model name in work_dir yaml
101
- base_name = args .evals_dataset .split ("/" )[- 1 ].replace ("-evals" ,"" ).replace ("-Instruct" ,"" )
147
+ base_name = (
148
+ args .evals_dataset .split ("/" )[- 1 ].replace ("-evals" , "" ).replace ("-Instruct" , "" )
149
+ )
102
150
change_yaml (args , base_name )
103
151
152
+
104
153
def parse_eval_args ():
105
154
parser = argparse .ArgumentParser (formatter_class = argparse .RawTextHelpFormatter )
106
155
parser .add_argument (
@@ -111,50 +160,69 @@ def parse_eval_args():
111
160
)
112
161
return parser .parse_args ()
113
162
163
+
114
164
def prepare_datasets (args ):
115
165
# Prepare the dataset for the IFeval and MATH_Hard tasks as we need to join the original dataset with the evals dataset by the actual questions.
116
166
# model_name are derived from the evals_dataset name
117
167
task_list = args .tasks .split ("," )
118
- model_name = args .evals_dataset .split ("/" )[- 1 ].replace ("-evals" ,"" )
168
+ model_name = args .evals_dataset .split ("/" )[- 1 ].replace ("-evals" , "" )
119
169
if "meta_instruct" in task_list :
120
- get_ifeval_data (model_name ,args .work_dir )
121
-
122
- get_math_data (model_name ,args .work_dir )
170
+ get_ifeval_data (model_name , args .work_dir )
171
+
172
+ get_math_data (model_name , args .work_dir )
123
173
else :
124
174
if "meta_ifeval" in task_list :
125
- get_ifeval_data (model_name ,args .work_dir )
175
+ get_ifeval_data (model_name , args .work_dir )
126
176
if "meta_math_hard" in task_list :
127
- get_math_data (model_name ,args .work_dir )
177
+ get_math_data (model_name , args .work_dir )
178
+
179
+
128
180
# copy the files from src to dst
129
181
def copy_dir (src , dst ):
130
182
try :
131
183
shutil .copytree (src , dst )
132
- except OSError as exc : # python >2.5
184
+ except OSError as exc : # python >2.5
133
185
if exc .errno in (errno .ENOTDIR , errno .EINVAL ):
134
186
shutil .copy (src , dst )
135
- else : raise
187
+ else :
188
+ raise
189
+
190
+
136
191
# load the config yaml file
137
192
def load_config (config_path : str = "./config.yaml" ):
138
193
# Read the YAML configuration file
139
194
with open (config_path , "r" ) as file :
140
195
config = yaml .safe_load (file )
141
196
return config
197
+
198
+
142
199
if __name__ == "__main__" :
143
200
args = parse_eval_args ()
144
201
config = load_config (args .config_path )
145
202
# Create VLLM model args
146
- for k ,v in config .items ():
147
- args .__setattr__ (k ,v )
203
+ for k , v in config .items ():
204
+ args .__setattr__ (k , v )
148
205
if not os .path .exists (args .template_dir ):
149
206
raise ValueError ("The template_dir does not exist, please check the path" )
150
- if args .evals_dataset not in ["meta-llama/Meta-Llama-3.1-8B-Instruct-evals" ,"meta-llama/Meta-Llama-3.1-70B-Instruct-evals" ,"meta-llama/Meta-Llama-3.1-405B-Instruct-evals" ,"meta-llama/Meta-Llama-3.1-8B-evals" ,"meta-llama/Meta-Llama-3.1-70B-evals" ,"meta-llama/Meta-Llama-3.1-405B-evals" ]:
151
- raise ValueError ("The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection" )
207
+ if args .evals_dataset not in [
208
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-evals" ,
209
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-evals" ,
210
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-evals" ,
211
+ "meta-llama/Meta-Llama-3.1-8B-evals" ,
212
+ "meta-llama/Meta-Llama-3.1-70B-evals" ,
213
+ "meta-llama/Meta-Llama-3.1-405B-evals" ,
214
+ ]:
215
+ raise ValueError (
216
+ "The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection"
217
+ )
152
218
args .model_args = f"pretrained={ args .model_name } ,tensor_parallel_size={ args .tensor_parallel_size } ,dtype=auto,gpu_memory_utilization={ args .gpu_memory_utilization } ,data_parallel_size={ args .data_parallel_size } ,max_model_len={ args .max_model_len } ,add_bos_token=True,seed=42"
153
219
# Copy the all files from template folder to the work folder
154
220
copy_and_prepare (args )
155
221
# Prepare the datasets for the IFeval and MATH_Hard tasks as we need to join the original dataset
156
222
prepare_datasets (args )
157
- print (f"prepration for the { args .model_name } using { args .evals_dataset } is done, all saved the work_dir: { args .work_dir } " )
223
+ print (
224
+ f"prepration for the { args .model_name } using { args .evals_dataset } is done, all saved the work_dir: { args .work_dir } "
225
+ )
158
226
command_str = f"lm_eval --model vllm --model_args { args .model_args } --tasks { args .tasks } --batch_size auto --output_path { args .output_path } --include_path { os .path .abspath (args .work_dir )} --seed 42 "
159
227
if args .limit :
160
228
command_str += f" --limit { args .limit } "
0 commit comments