@@ -87,17 +87,16 @@ def check_taskset_path(dataset_name: str, taskset_path: str) -> str:
8787 subprocess.CalledProcessError: If the generation script fails (due to check=True).
8888
8989 Side Effects:
90- - Modifies `taskset_config` by setting the "path" key to the resolved path.
9190 - May create directories and files on disk via the external generation script.
9291 - Executes a subprocess to run the dataset generation script.
9392
9493 Examples:
95- For dataset_name='guru ' and taskset_config={"path": None},
94+ For dataset_name='guru_math ' and taskset_config={"path": None},
9695 this function will runs the following command and
97- generate the guru dataset to default location (DEFAULT_DATA_PATH in scripts/gen_guru_data .py):
96+ generate the guru_math dataset to default location (DEFAULT_DATA_PATH in scripts/gen_guru_math_data .py):
9897
9998 ```bash
100- python scripts/gen_guru_data .py --local_dir DEFAULT_DATA_PATH
99+ python scripts/gen_guru_math_data .py --local_dir DEFAULT_DATA_PATH
101100 ```
102101 """
103102 if taskset_path :
@@ -108,7 +107,7 @@ def check_taskset_path(dataset_name: str, taskset_path: str) -> str:
108107
109108 dataset_script_map = {
110109 "countdown" : "gen_countdown_data.py" ,
111- "guru " : "gen_guru_data .py" ,
110+ "guru_math " : "gen_guru_math_data .py" ,
112111 }
113112 if dataset_name not in dataset_script_map :
114113 raise ValueError (
@@ -223,16 +222,21 @@ def main(args):
223222 dist .barrier ()
224223 dist .destroy_process_group ()
225224 cmd_list .append ("--dlc" )
226- if args .dataset == "guru" :
227- base_path = os .path .dirname (os .path .abspath (__file__ ))
225+
226+ # load plugins
227+ base_path = os .path .dirname (os .path .abspath (__file__ ))
228+ plugin_dir = os .path .join (base_path , "plugins" , args .dataset )
229+ if os .path .exists (plugin_dir ):
228230 cmd_list .append ("--plugin-dir" )
229- cmd_list .append (os .path .join (base_path , "plugins" ))
231+ cmd_list .append (plugin_dir )
232+
233+ # run command
230234 subprocess .run (cmd_list , check = True )
231235
232236
233237if __name__ == "__main__" :
234238 parser = argparse .ArgumentParser ()
235- parser .add_argument ("dataset" , type = str .lower , choices = ["gsm8k" , "countdown" , "guru " ])
239+ parser .add_argument ("dataset" , type = str .lower , choices = ["gsm8k" , "countdown" , "guru_math " ])
236240 parser .add_argument (
237241 "--dlc" , action = "store_true" , help = "Specify when running in Aliyun PAI DLC."
238242 )
0 commit comments