Skip to content

Commit e1e02a2

Browse files
author
The TensorFlow Datasets Authors
committed
Add conversion options from non-TFDS to TFDS valid features.
PiperOrigin-RevId: 650913004
1 parent e9d777a commit e1e02a2

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

tensorflow_datasets/core/utils/huggingface_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ def convert_hf_value(
238238
# Ensure RGB format for PNG encoding.
239239
return hf_value.convert('RGB')
240240
case feature_lib.Tensor():
241+
if isinstance(hf_value, float):
242+
# In some cases, for example when loading jsonline files using pandas,
243+
# empty non-float values, such as strings, are converted to float nan.
244+
# We spot those occurrences as the feature.np_dtype is not float.
245+
if np.isnan(hf_value) and not dtype_utils.is_floating(feature.np_dtype):
246+
return _get_default_value(feature)
241247
return hf_value
242248

243249
raise TypeError(

tensorflow_datasets/core/utils/huggingface_utils_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,12 @@ def test_convert_value_raises(hf_value, feature):
211211
),
212212
{'foo': b''},
213213
),
214+
# nan, but the feature type is not float
215+
(
216+
np.nan,
217+
feature_lib.Text(),
218+
b'',
219+
),
214220
],
215221
)
216222
def test_convert_value(hf_value, feature, expected_value):

0 commit comments

Comments
 (0)