File tree Expand file tree Collapse file tree 1 file changed +40
-0
lines changed
Expand file tree Collapse file tree 1 file changed +40
-0
lines changed Original file line number Diff line number Diff line change 1+ import os
2+ import argparse
3+ from datasets import Dataset , load_dataset
4+ from tqdm import tqdm
5+
6+
7+ def make_map_fn (split ):
8+ def process_fn (example , idx ):
9+ return {
10+ "data_source" : "openmanus-rl" ,
11+ "prompt" : example ['conversations' ],
12+ "ability" : "instruction-following" ,
13+ "reward_model" : {
14+ "style" : "none" ,
15+ "ground_truth" : None
16+ },
17+ "extra_info" : {
18+ "split" : split ,
19+ "index" : idx ,
20+ "id" : example ['id' ]
21+ }
22+ }
23+ return process_fn
24+
25+
26+ if __name__ == '__main__' :
27+ parser = argparse .ArgumentParser ()
28+ parser .add_argument ('--output_dir' , required = True , help = "Output directory for processed parquet" )
29+ parser .add_argument ('--split' , type = str , default = "train" )
30+
31+ args = parser .parse_args ()
32+
33+ # Load from Hugging Face Hub
34+ dataset = load_dataset ("CharlieDreemur/OpenManus-RL" , split = args .split )
35+
36+ # Apply mapping to Verl format
37+ dataset = dataset .map (function = make_map_fn (args .split ), with_indices = True )
38+
39+ os .makedirs (args .output_dir , exist_ok = True )
40+ dataset .to_parquet (os .path .join (args .output_dir , f"{ args .split } .parquet" ))
You can’t perform that action at this time.
0 commit comments