Skip to content

Commit 606754a

Browse files
author
The TensorFlow Datasets Authors
committed
Add some clarification to Shuffler error message.
PiperOrigin-RevId: 713225767
1 parent 1a8fed7 commit 606754a

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

tensorflow_datasets/core/shuffle.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import math
2020
import os
2121
import struct
22+
import typing
2223
from typing import Optional
2324
import uuid
2425
from absl import logging
@@ -280,7 +281,9 @@ def add(self, key: type_utils.Key, data: bytes) -> bool:
280281
raise AssertionError('add() cannot be called after __iter__.')
281282
if not isinstance(data, bytes):
282283
raise AssertionError(
283-
f'Only bytes (not {type(data)}) can be stored in Shuffler!'
284+
f'Only bytes (not {type(data)}) can be stored in Shuffler! This'
285+
' likely indicates that non-integer keys were used when generating'
286+
' the dataset.'
284287
)
285288
hkey = self._hasher.hash_key(key)
286289
if self._ignore_duplicates:
@@ -289,7 +292,7 @@ def add(self, key: type_utils.Key, data: bytes) -> bool:
289292
self._seen_keys.add(hkey)
290293
if self._disable_shuffling:
291294
# Use the original key and not the hashed key to maintain the order.
292-
hkey = key
295+
hkey = typing.cast(int, key)
293296
self._total_bytes += len(data)
294297
if self._in_memory:
295298
self._add_to_mem_buffer(hkey, data)

0 commit comments

Comments
 (0)