Skip to content

Commit 65c6c60

Browse files
author
The TensorFlow Datasets Authors
committed
Fix bug in get_file_instructions when physical length differs from logical length.
PiperOrigin-RevId: 853730522
1 parent 6bde1a6 commit 65c6c60

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

tensorflow_datasets/core/utils/shard_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def get_file_instructions(
249249
if examples_in_shards is not None:
250250
examples_in_shard = examples_in_shards[shard_index]
251251
if take == -1 and examples_in_shard != length:
252-
take = length
252+
take = length - skip
253253
else:
254254
examples_in_shard = length
255255
file_instructions.append(

tensorflow_datasets/core/utils/shard_utils_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,31 @@ def test_nothing_to_read(self):
155155
)
156156
self.assertEqual(res, [])
157157

158+
def test_get_file_instructions_with_subset_and_skip(self):
159+
logical_length = 412
160+
physical_length = 449
161+
from_index = 156
162+
to_index = 412
163+
164+
res = shard_utils.get_file_instructions(
165+
from_=from_index,
166+
to=to_index,
167+
filenames=['shard_0.tfrecord'],
168+
shard_lengths=[logical_length],
169+
examples_in_shards=[physical_length],
170+
)
171+
self.assertEqual(
172+
res,
173+
[
174+
shard_utils.FileInstruction(
175+
filename='shard_0.tfrecord',
176+
skip=from_index,
177+
take=logical_length - from_index,
178+
examples_in_shard=physical_length,
179+
)
180+
],
181+
)
182+
158183
def test_split_file_instruction(self):
159184
filename = 'data.tfrecord'
160185
file_instruction = shard_utils.FileInstruction(

0 commit comments

Comments
 (0)