Skip to content

Commit 7995954

Browse files
committed
[Perf] Adding pretty preview for the first sample of dataset
1 parent 668b994 commit 7995954

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

data/generate_sft_verl.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import argparse
33
from datasets import Dataset, load_dataset
44
from tqdm import tqdm
5-
5+
from pprint import pprint
66

77
def make_map_fn(split):
88
def process_fn(example, idx):
@@ -36,5 +36,8 @@ def process_fn(example, idx):
3636
# Apply mapping to Verl format
3737
dataset = dataset.map(function=make_map_fn(args.split), with_indices=True)
3838

39+
# Pretty preview the first sample
3940
os.makedirs(args.output_dir, exist_ok=True)
4041
dataset.to_parquet(os.path.join(args.output_dir, f"{args.split}.parquet"))
42+
43+
pprint(dataset[0])

0 commit comments

Comments
 (0)