Skip to content

Commit 354315c

Browse files
author
The TensorFlow Datasets Authors
committed
Fix issue in audio conversion
PiperOrigin-RevId: 688126350
1 parent 7d88e04 commit 354315c

File tree

3 files changed

+10
-3
lines changed

3 files changed

+10
-3
lines changed

tensorflow_datasets/core/utils/conversion_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,11 @@ def to_tfds_value(value: Any, feature: feature_lib.FeatureConnector) -> Any:
154154
case feature_lib.Audio():
155155
if (array := value.get('array')) is not None:
156156
# Hugging Face uses floats, TFDS uses integers.
157-
return [int(sample * feature.sample_rate) for sample in array]
157+
# Here we convert the float in [-1, 1] range into signed int32
158+
# range [-2**32, 2**32-1]. Nevertheless, the mantissa size of
159+
# float32 is 23 bits, therefore the maximum bit depth possible is 23.
160+
dtype = feature.dtype
161+
return (array * np.iinfo(dtype).max).astype(dtype=dtype)
158162
elif (path := value.get('path')) and (path := epath.Path(path)).exists():
159163
return path
160164
case feature_lib.Image():

tensorflow_datasets/core/utils/huggingface_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,10 @@ def convert_hf_features(hf_features) -> feature_lib.FeatureConnector:
113113
case hf_datasets.Image():
114114
return feature_lib.Image(encoding_format=_IMAGE_ENCODING_FORMAT)
115115
case hf_datasets.Audio():
116-
return feature_lib.Audio(sample_rate=hf_features.sampling_rate)
116+
return feature_lib.Audio(
117+
sample_rate=hf_features.sampling_rate,
118+
dtype=np.int32,
119+
)
117120

118121
raise TypeError(f'Type {type(hf_features)} is not supported.')
119122

tensorflow_datasets/core/utils/huggingface_utils_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def test_convert_hf_features_raises_value_error():
100100
),
101101
(
102102
hf_datasets.Audio(sampling_rate=48000),
103-
feature_lib.Audio(sample_rate=48000),
103+
feature_lib.Audio(sample_rate=48000, dtype=np.int32),
104104
),
105105
],
106106
)

0 commit comments

Comments
 (0)