Skip to content

Commit 38e6de8

Browse files
committed
formating
1 parent eef8b88 commit 38e6de8

File tree

1 file changed

+116
-48
lines changed

1 file changed

+116
-48
lines changed

tools/benchmarks/llm_eval_harness/meta_eval_reproduce/prepare_meta_eval.py

Lines changed: 116 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,105 +2,154 @@
22
# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
33

44
import argparse
5+
import errno, shutil
6+
import glob
57
import os
68
from pathlib import Path
7-
import glob
8-
import shutil, errno
9+
910
import yaml
10-
from datasets import load_dataset,Dataset
11+
from datasets import Dataset, load_dataset
12+
1113

1214
# get the ifeval from the evals dataset and join it with the original ifeval datasets
13-
def get_ifeval_data(model_name,output_dir):
15+
def get_ifeval_data(model_name, output_dir):
1416
print(f"preparing the ifeval data using {model_name}'s evals dataset")
15-
if model_name not in ["Meta-Llama-3.1-8B-Instruct","Meta-Llama-3.1-70B-Instruct","Meta-Llama-3.1-405B-Instruct"]:
16-
raise ValueError("Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for IFEval")
17+
if model_name not in [
18+
"Meta-Llama-3.1-8B-Instruct",
19+
"Meta-Llama-3.1-70B-Instruct",
20+
"Meta-Llama-3.1-405B-Instruct",
21+
]:
22+
raise ValueError(
23+
"Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for IFEval"
24+
)
1725
original_dataset_name = "wis-k/instruction-following-eval"
1826
meta_dataset_name = f"meta-llama/{model_name}-evals"
1927
meta_data = load_dataset(
2028
meta_dataset_name,
2129
name=f"{model_name}-evals__ifeval__strict__details",
22-
split="latest"
23-
)
24-
ifeval_data = load_dataset(
25-
original_dataset_name,
26-
split="train"
27-
)
30+
split="latest",
31+
)
32+
ifeval_data = load_dataset(original_dataset_name, split="train")
2833
meta_data = meta_data.map(get_question)
2934
meta_df = meta_data.to_pandas()
3035
ifeval_df = ifeval_data.to_pandas()
3136
ifeval_df = ifeval_df.rename(columns={"prompt": "input_question"})
3237
# join the two datasets on the input_question column
33-
joined = meta_df.join(ifeval_df.set_index('input_question'),on="input_question")
38+
joined = meta_df.join(ifeval_df.set_index("input_question"), on="input_question")
3439
joined = joined.rename(columns={"input_final_prompts": "prompt"})
3540
joined = joined.rename(columns={"is_correct": "previous_is_correct"})
3641
joined = Dataset.from_pandas(joined)
37-
joined = joined.select_columns(["input_question", "prompt", "previous_is_correct","instruction_id_list","kwargs","output_prediction_text","key"])
38-
joined.rename_column("output_prediction_text","previous_output_prediction_text")
42+
joined = joined.select_columns(
43+
[
44+
"input_question",
45+
"prompt",
46+
"previous_is_correct",
47+
"instruction_id_list",
48+
"kwargs",
49+
"output_prediction_text",
50+
"key",
51+
]
52+
)
53+
joined.rename_column("output_prediction_text", "previous_output_prediction_text")
3954
joined.to_parquet(output_dir + f"/joined_ifeval.parquet")
4055

56+
4157
# get the math_hard data from the evals dataset and join it with the original math_hard dataset
42-
def get_math_data(model_name,output_dir):
58+
def get_math_data(model_name, output_dir):
4359
print(f"preparing the math data using {model_name}'s evals dataset")
44-
if model_name not in ["Meta-Llama-3.1-8B-Instruct","Meta-Llama-3.1-70B-Instruct","Meta-Llama-3.1-405B-Instruct"]:
45-
raise ValueError("Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for MATH_hard")
60+
if model_name not in [
61+
"Meta-Llama-3.1-8B-Instruct",
62+
"Meta-Llama-3.1-70B-Instruct",
63+
"Meta-Llama-3.1-405B-Instruct",
64+
]:
65+
raise ValueError(
66+
"Only Meta-Llama-3.1-8B-Instruct, Meta-Llama-3.1-70B-Instruct, Meta-Llama-3.1-405B-Instruct models are supported for MATH_hard"
67+
)
4668
original_dataset_name = "lighteval/MATH-Hard"
4769
meta_dataset_name = f"meta-llama/{model_name}-evals"
4870
meta_data = load_dataset(
4971
meta_dataset_name,
5072
name=f"{model_name}-evals__math_hard__details",
51-
split="latest"
52-
)
53-
math_data = load_dataset(
54-
original_dataset_name,
55-
split="test"
56-
)
73+
split="latest",
74+
)
75+
math_data = load_dataset(original_dataset_name, split="test")
5776
meta_df = meta_data.to_pandas()
5877
math_df = math_data.to_pandas()
5978
math_df = math_df.rename(columns={"problem": "input_question"})
6079
# join the two datasets on the input_question column
61-
joined = meta_df.join(math_df.set_index('input_question'),on="input_question")
80+
joined = meta_df.join(math_df.set_index("input_question"), on="input_question")
6281
joined = Dataset.from_pandas(joined)
63-
joined = joined.select_columns(["input_question", "input_correct_responses", "input_final_prompts", "is_correct","solution","output_prediction_text"])
64-
joined = joined.rename_column("is_correct","previous_is_correct")
65-
joined = joined.rename_column("output_prediction_text","previous_output_prediction_text")
82+
joined = joined.select_columns(
83+
[
84+
"input_question",
85+
"input_correct_responses",
86+
"input_final_prompts",
87+
"is_correct",
88+
"solution",
89+
"output_prediction_text",
90+
]
91+
)
92+
joined = joined.rename_column("is_correct", "previous_is_correct")
93+
joined = joined.rename_column(
94+
"output_prediction_text", "previous_output_prediction_text"
95+
)
6696

6797
joined.to_parquet(output_dir + f"/joined_math.parquet")
6898

69-
# get the question from the ifeval dataset
99+
100+
# get the question from the ifeval dataset
70101
def get_question(example):
71102
try:
72-
example["input_question"] = eval(example["input_question"].replace("null","None").replace("true","True").replace("false","False"))["dialog"][0]["body"].replace("Is it True that the first song","Is it true that the first song").replace("Is the following True","Is the following true")
103+
example["input_question"] = (
104+
eval(
105+
example["input_question"]
106+
.replace("null", "None")
107+
.replace("true", "True")
108+
.replace("false", "False")
109+
)["dialog"][0]["body"]
110+
.replace("Is it True that the first song", "Is it true that the first song")
111+
.replace("Is the following True", "Is the following true")
112+
)
73113
example["input_final_prompts"] = example["input_final_prompts"][0]
74114
return example
75115
except:
76116
print(example["input_question"])
77117
return
78118

119+
79120
# change the yaml file to use the correct model name
80121
def change_yaml(args, base_name):
81-
for yaml_file in glob.glob(args.template_dir+'**/*/*.yaml', recursive=True):
122+
for yaml_file in glob.glob(args.template_dir + "**/*/*.yaml", recursive=True):
82123
with open(yaml_file, "r") as sources:
83124
lines = sources.readlines()
84-
output_path = yaml_file.replace(args.template_dir,args.work_dir)
125+
output_path = yaml_file.replace(args.template_dir, args.work_dir)
85126
print(f"changing {yaml_file} to output_path: {output_path}")
86127
path = Path(output_path)
87128
yaml_dir = path.parent
88129
with open(output_path, "w") as output:
89130
for line in lines:
90-
output.write(line.replace("Meta-Llama-3.1-8B",base_name).replace("WORK_DIR",str(yaml_dir)))
131+
output.write(
132+
line.replace("Meta-Llama-3.1-8B", base_name).replace(
133+
"WORK_DIR", str(yaml_dir)
134+
)
135+
)
136+
91137

92138
# copy the files and change the yaml file to use the correct model name
93139
def copy_and_prepare(args):
94140
if not os.path.exists(args.work_dir):
95141
# Copy the all files, including yaml files and python files, from template folder to the work folder
96142

97-
copy_dir(args.template_dir,args.work_dir)
143+
copy_dir(args.template_dir, args.work_dir)
98144
else:
99145
print("work_dir already exists, no need to copy files")
100146
# Use the template yaml to get the correct model name in work_dir yaml
101-
base_name = args.evals_dataset.split("/")[-1].replace("-evals","").replace("-Instruct","")
147+
base_name = (
148+
args.evals_dataset.split("/")[-1].replace("-evals", "").replace("-Instruct", "")
149+
)
102150
change_yaml(args, base_name)
103151

152+
104153
def parse_eval_args():
105154
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
106155
parser.add_argument(
@@ -111,50 +160,69 @@ def parse_eval_args():
111160
)
112161
return parser.parse_args()
113162

163+
114164
def prepare_datasets(args):
115165
# Prepare the dataset for the IFeval and MATH_Hard tasks as we need to join the original dataset with the evals dataset by the actual questions.
116166
# model_name are derived from the evals_dataset name
117167
task_list = args.tasks.split(",")
118-
model_name = args.evals_dataset.split("/")[-1].replace("-evals","")
168+
model_name = args.evals_dataset.split("/")[-1].replace("-evals", "")
119169
if "meta_instruct" in task_list:
120-
get_ifeval_data(model_name,args.work_dir)
121-
122-
get_math_data(model_name,args.work_dir)
170+
get_ifeval_data(model_name, args.work_dir)
171+
172+
get_math_data(model_name, args.work_dir)
123173
else:
124174
if "meta_ifeval" in task_list:
125-
get_ifeval_data(model_name,args.work_dir)
175+
get_ifeval_data(model_name, args.work_dir)
126176
if "meta_math_hard" in task_list:
127-
get_math_data(model_name,args.work_dir)
177+
get_math_data(model_name, args.work_dir)
178+
179+
128180
# copy the files from src to dst
129181
def copy_dir(src, dst):
130182
try:
131183
shutil.copytree(src, dst)
132-
except OSError as exc: # python >2.5
184+
except OSError as exc: # python >2.5
133185
if exc.errno in (errno.ENOTDIR, errno.EINVAL):
134186
shutil.copy(src, dst)
135-
else: raise
187+
else:
188+
raise
189+
190+
136191
# load the config yaml file
137192
def load_config(config_path: str = "./config.yaml"):
138193
# Read the YAML configuration file
139194
with open(config_path, "r") as file:
140195
config = yaml.safe_load(file)
141196
return config
197+
198+
142199
if __name__ == "__main__":
143200
args = parse_eval_args()
144201
config = load_config(args.config_path)
145202
# Create VLLM model args
146-
for k,v in config.items():
147-
args.__setattr__(k,v)
203+
for k, v in config.items():
204+
args.__setattr__(k, v)
148205
if not os.path.exists(args.template_dir):
149206
raise ValueError("The template_dir does not exist, please check the path")
150-
if args.evals_dataset not in ["meta-llama/Meta-Llama-3.1-8B-Instruct-evals","meta-llama/Meta-Llama-3.1-70B-Instruct-evals","meta-llama/Meta-Llama-3.1-405B-Instruct-evals","meta-llama/Meta-Llama-3.1-8B-evals","meta-llama/Meta-Llama-3.1-70B-evals","meta-llama/Meta-Llama-3.1-405B-evals"]:
151-
raise ValueError("The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection")
207+
if args.evals_dataset not in [
208+
"meta-llama/Meta-Llama-3.1-8B-Instruct-evals",
209+
"meta-llama/Meta-Llama-3.1-70B-Instruct-evals",
210+
"meta-llama/Meta-Llama-3.1-405B-Instruct-evals",
211+
"meta-llama/Meta-Llama-3.1-8B-evals",
212+
"meta-llama/Meta-Llama-3.1-70B-evals",
213+
"meta-llama/Meta-Llama-3.1-405B-evals",
214+
]:
215+
raise ValueError(
216+
"The evals dataset is not valid, please double check the name, must use the name in the Llama 3.1 Evals collection"
217+
)
152218
args.model_args = f"pretrained={args.model_name},tensor_parallel_size={args.tensor_parallel_size},dtype=auto,gpu_memory_utilization={args.gpu_memory_utilization},data_parallel_size={args.data_parallel_size},max_model_len={args.max_model_len},add_bos_token=True,seed=42"
153219
# Copy the all files from template folder to the work folder
154220
copy_and_prepare(args)
155221
# Prepare the datasets for the IFeval and MATH_Hard tasks as we need to join the original dataset
156222
prepare_datasets(args)
157-
print(f"prepration for the {args.model_name} using {args.evals_dataset} is done, all saved the work_dir: {args.work_dir}")
223+
print(
224+
f"prepration for the {args.model_name} using {args.evals_dataset} is done, all saved the work_dir: {args.work_dir}"
225+
)
158226
command_str = f"lm_eval --model vllm --model_args {args.model_args} --tasks {args.tasks} --batch_size auto --output_path { args.output_path} --include_path {os.path.abspath(args.work_dir)} --seed 42 "
159227
if args.limit:
160228
command_str += f" --limit {args.limit}"

0 commit comments

Comments
 (0)