Skip to content

Commit a75711e

Browse files
author
The TensorFlow Datasets Authors
committed
Add naming fix for CroissantBuilder get_record_set.
PiperOrigin-RevId: 635857393
1 parent ecd99cd commit a75711e

File tree

2 files changed

+5
-8
lines changed

2 files changed

+5
-8
lines changed

tensorflow_datasets/core/dataset_builders/croissant_builder.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,14 @@
4242
from tensorflow_datasets.core import dataset_builder
4343
from tensorflow_datasets.core import dataset_info
4444
from tensorflow_datasets.core import download
45-
from tensorflow_datasets.core import naming
4645
from tensorflow_datasets.core import split_builder as split_builder_lib
4746
from tensorflow_datasets.core import splits as splits_lib
4847
from tensorflow_datasets.core.features import feature as feature_lib
4948
from tensorflow_datasets.core.features import features_dict
5049
from tensorflow_datasets.core.features import image_feature
5150
from tensorflow_datasets.core.features import sequence_feature
5251
from tensorflow_datasets.core.features import text_feature
53-
from tensorflow_datasets.core.utils import py_utils
52+
from tensorflow_datasets.core.utils import huggingface_utils
5453
from tensorflow_datasets.core.utils import type_utils
5554
from tensorflow_datasets.core.utils import version as version_utils
5655
from tensorflow_datasets.core.utils.lazy_imports_utils import mlcroissant as mlc
@@ -174,9 +173,7 @@ def __init__(
174173
if mapping is None:
175174
mapping = {}
176175
self.dataset = mlc.Dataset(jsonld, mapping=mapping)
177-
self.name = py_utils.make_valid_name(
178-
naming.camelcase_to_snakecase(self.dataset.metadata.name)
179-
)
176+
self.name = huggingface_utils.convert_hf_name(self.dataset.metadata.name)
180177
self.metadata = self.dataset.metadata
181178

182179
# In TFDS, version is a mandatory attribute, while in Croissant it is only a
@@ -192,7 +189,7 @@ def __init__(
192189
record_set.id for record_set in self.metadata.record_sets
193190
]
194191
config_names = [
195-
py_utils.make_valid_name(naming.camelcase_to_snakecase(record_set))
192+
huggingface_utils.convert_hf_name(record_set)
196193
for record_set in record_set_ids
197194
]
198195
self.BUILDER_CONFIGS: Sequence[dataset_builder.BuilderConfig] = [ # pylint: disable=invalid-name
@@ -228,7 +225,7 @@ def _info(self) -> dataset_info.DatasetInfo:
228225
def get_record_set(self, record_set_id: str):
229226
"""Returns the desired record set from self.metadata."""
230227
for record_set in self.dataset.metadata.record_sets:
231-
if py_utils.make_valid_name(record_set.id) == record_set_id:
228+
if huggingface_utils.convert_hf_name(record_set.id) == record_set_id:
232229
return record_set
233230
raise ValueError(
234231
f'Did not find any record set with the name {record_set_id}.'

tensorflow_datasets/core/dataset_builders/croissant_builder_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def setUpClass(cls):
194194
)
195195

196196
def test_dataset_info(self):
197-
assert self.builder.name == "dummy_dataset"
197+
assert self.builder.name == "dummydataset"
198198
assert self.builder.version == "1.2.0"
199199
assert (
200200
self.builder._info().citation

0 commit comments

Comments
 (0)