@@ -61,9 +61,14 @@ class ShardInstruction:
61
61
62
62
def convert (self ) -> None :
63
63
"""Converts the shard to the desired file format."""
64
+
64
65
def read_in () -> Iterator [type_utils .KeySerializedExample ]:
65
66
in_dataset = self .in_file_adapter .make_tf_data (filename = self .in_path )
66
- for i , row in tqdm .tqdm (enumerate (in_dataset )):
67
+ for i , row in tqdm .tqdm (
68
+ enumerate (in_dataset ),
69
+ unit = ' examples' ,
70
+ desc = f'Shard { self .in_path .name } ' ,
71
+ ):
67
72
if self .convert_fn is not None :
68
73
yield i , self .convert_fn (row )
69
74
else :
@@ -258,7 +263,11 @@ def _convert_dataset(
258
263
)
259
264
260
265
else :
261
- for shard_instruction in shard_instructions :
266
+ for shard_instruction in tqdm .tqdm (
267
+ shard_instructions ,
268
+ unit = ' shards' ,
269
+ desc = f'Shards in { os .fspath (dataset_dir )} ' ,
270
+ ):
262
271
shard_instruction .convert ()
263
272
264
273
@@ -357,7 +366,9 @@ def _convert_dataset_dirs(
357
366
out_dir = out_dir ,
358
367
)
359
368
else :
360
- for dataset_dir , info in found_dataset_versions .items ():
369
+ for dataset_dir , info in tqdm .tqdm (
370
+ found_dataset_versions .items (), unit = ' datasets'
371
+ ):
361
372
out_dir = from_to_dirs [dataset_dir ]
362
373
convert_dataset_fn (
363
374
info = info ,
@@ -366,7 +377,9 @@ def _convert_dataset_dirs(
366
377
)
367
378
368
379
logging .info ('All shards have been converted. Now converting metadata.' )
369
- for dataset_dir , info in tqdm .tqdm (found_dataset_versions .items ()):
380
+ for dataset_dir , info in tqdm .tqdm (
381
+ found_dataset_versions .items (), unit = ' datasets'
382
+ ):
370
383
out_dir = from_to_dirs [dataset_dir ]
371
384
logging .info ('Converting metadata in %s.' , dataset_dir )
372
385
convert_metadata (
0 commit comments