Skip to content

Commit cc46f9c

Browse files
author
The TensorFlow Datasets Authors
committed
CroissantBuilder supports sc:DateTime features.
PiperOrigin-RevId: 785862132
1 parent 5a8ac9c commit cc46f9c

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

tensorflow_datasets/core/dataset_builders/croissant_builder.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,11 @@ def datatype_converter(
134134
np.float32.
135135
136136
Returns:
137-
Converted datatype for TFDS.
137+
Converted datatype for TFDS, or None when a Field does not specify a type.
138138
139139
Raises:
140-
NotImplementedError
140+
NotImplementedError when the feature is not supported yet, or ValueError
141+
when a Field is malformed.
141142
"""
142143
if field.is_enumeration:
143144
raise NotImplementedError('Not implemented yet.')
@@ -151,7 +152,7 @@ def datatype_converter(
151152
field_data_type = field.data_type
152153

153154
if not field_data_type:
154-
# Fields with sub fields are of type None
155+
# Fields with sub fields are of type None.
155156
if field.sub_fields:
156157
feature = features_dict.FeaturesDict(
157158
{
@@ -170,8 +171,8 @@ def datatype_converter(
170171
feature = dtype_mapping[field_data_type]
171172
elif enp.lazy.is_np_dtype(field_data_type):
172173
feature = field_data_type
173-
# We return a text feature for mlc.DataType.DATE and mlc.DataType.TIME
174-
# features.
174+
# We return a text feature for date-time features (mlc.DataType.DATE,
175+
# mlc.DataType.DATETIME, and mlc.DataType.TIME).
175176
elif field_data_type == pd.Timestamp or field_data_type == datetime.time:
176177
feature = text_feature.Text(doc=field.description)
177178
elif field_data_type == mlc.DataType.IMAGE_OBJECT:
@@ -195,7 +196,9 @@ def datatype_converter(
195196
doc=field.description, sample_rate=field.source.sampling_rate
196197
)
197198
else:
198-
raise ValueError(f'Unknown data type: {field_data_type}.')
199+
raise ValueError(
200+
f'Unknown data type: {field_data_type} for field {field.id}.'
201+
)
199202

200203
if feature and field.is_array:
201204
feature = array_datatype_converter(

tensorflow_datasets/core/dataset_builders/croissant_builder_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,13 @@ def test_bbox_datatype_converter_with_invalid_format():
165165
text_feature.Text,
166166
None,
167167
),
168+
(
169+
mlc.Field(
170+
data_types=mlc.DataType.DATETIME, description="DateTime feature"
171+
),
172+
text_feature.Text,
173+
None,
174+
),
168175
(
169176
mlc.Field(data_types=mlc.DataType.TIME, description="Time feature"),
170177
text_feature.Text,
@@ -223,6 +230,13 @@ def test_complex_datatype_converter(field, feature_type, subfield_types):
223230
)
224231

225232

233+
def test_datatype_converter_none():
234+
field = mlc.Field(
235+
name="my_field", id="my_field", description="Field with empty data type."
236+
)
237+
assert croissant_builder.datatype_converter(field) is None
238+
239+
226240
def test_multidimensional_datatype_converter():
227241
field = mlc.Field(
228242
data_types=mlc.DataType.TEXT,

0 commit comments

Comments
 (0)