Skip to content

Commit 24c1e98

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Do not skip converting files if the out folder is different from the in folder
PiperOrigin-RevId: 651741618
1 parent 13d46ae commit 24c1e98

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

tensorflow_datasets/scripts/cli/convert_format_utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def _convert_dataset_dirs(
275275
logging.info('Converting %d datasets.', len(from_to_dirs))
276276

277277
found_dataset_versions: dict[epath.Path, dataset_info.DatasetInfo] = {}
278-
for from_dir in from_to_dirs.keys():
278+
for from_dir, to_dir in from_to_dirs.items():
279279
builder = read_only_builder_lib.builder_from_directory(from_dir)
280280
if out_file_format == builder.info.file_format:
281281
raise ValueError(
@@ -289,14 +289,21 @@ def _convert_dataset_dirs(
289289
' format. Overwriting the shards!',
290290
out_file_format.value,
291291
)
292-
else:
292+
elif from_dir == to_dir:
293293
logging.info(
294294
'The file format to convert to (%s) is already an alternative file'
295295
' format of the dataset in %s. Skipping conversion.',
296296
os.fspath(from_dir),
297297
out_file_format.value,
298298
)
299299
continue
300+
else:
301+
logging.warning(
302+
'The file format to convert to (%s) is already an alternative file'
303+
' format, but the converted output is being written to a different'
304+
' folder, so the shards will be converted anyway.',
305+
out_file_format.value,
306+
)
300307
found_dataset_versions[from_dir] = builder.info
301308

302309
convert_dataset_fn = functools.partial(

0 commit comments

Comments
 (0)