Skip to content

Commit ecd99cd

Browse files
author
The TensorFlow Datasets Authors
committed
Update dataset name and config names for CroissantBuilder.
PiperOrigin-RevId: 635736242
1 parent 4ba2eca commit ecd99cd

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

tensorflow_datasets/core/dataset_builders/croissant_builder.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from tensorflow_datasets.core import dataset_builder
4343
from tensorflow_datasets.core import dataset_info
4444
from tensorflow_datasets.core import download
45+
from tensorflow_datasets.core import naming
4546
from tensorflow_datasets.core import split_builder as split_builder_lib
4647
from tensorflow_datasets.core import splits as splits_lib
4748
from tensorflow_datasets.core.features import feature as feature_lib
@@ -173,7 +174,9 @@ def __init__(
173174
if mapping is None:
174175
mapping = {}
175176
self.dataset = mlc.Dataset(jsonld, mapping=mapping)
176-
self.name = py_utils.make_valid_name(self.dataset.metadata.name)
177+
self.name = py_utils.make_valid_name(
178+
naming.camelcase_to_snakecase(self.dataset.metadata.name)
179+
)
177180
self.metadata = self.dataset.metadata
178181

179182
# In TFDS, version is a mandatory attribute, while in Croissant it is only a
@@ -189,7 +192,8 @@ def __init__(
189192
record_set.id for record_set in self.metadata.record_sets
190193
]
191194
config_names = [
192-
py_utils.make_valid_name(record_set) for record_set in record_set_ids
195+
py_utils.make_valid_name(naming.camelcase_to_snakecase(record_set))
196+
for record_set in record_set_ids
193197
]
194198
self.BUILDER_CONFIGS: Sequence[dataset_builder.BuilderConfig] = [ # pylint: disable=invalid-name
195199
dataset_builder.BuilderConfig(name=config_name)

tensorflow_datasets/core/dataset_builders/croissant_builder_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def setUpClass(cls):
194194
)
195195

196196
def test_dataset_info(self):
197-
assert self.builder.name == "DummyDataset"
197+
assert self.builder.name == "dummy_dataset"
198198
assert self.builder.version == "1.2.0"
199199
assert (
200200
self.builder._info().citation

0 commit comments

Comments
 (0)