@@ -217,15 +217,17 @@ def __init__(
217
217
"""
218
218
if mapping is None :
219
219
mapping = {}
220
- self .dataset = mlc .Dataset (jsonld , mapping = mapping )
221
- self .name = croissant_utils .get_tfds_dataset_name (self .dataset )
222
- self .metadata = self .dataset .metadata
220
+ self .jsonld = jsonld
221
+ self .mapping = mapping
222
+ dataset = mlc .Dataset (jsonld , mapping = mapping )
223
+ self .name = croissant_utils .get_tfds_dataset_name (dataset )
224
+ self .metadata = dataset .metadata
223
225
224
226
# In TFDS, version is a mandatory attribute, while in Croissant it is only a
225
227
# recommended attribute. If the version is unspecified in Croissant, we set
226
228
# it to `1.0.0` in TFDS.
227
229
self .VERSION = version_lib .Version ( # pylint: disable=invalid-name
228
- overwrite_version or self .dataset . metadata .version or '1.0.0'
230
+ overwrite_version or self .metadata .version or '1.0.0'
229
231
)
230
232
self .RELEASE_NOTES = {} # pylint: disable=invalid-name
231
233
@@ -260,11 +262,11 @@ def builder_config(self) -> dataset_builder.BuilderConfig:
260
262
def _info (self ) -> dataset_info .DatasetInfo :
261
263
return dataset_info .DatasetInfo (
262
264
builder = self ,
263
- description = self .dataset . metadata .description ,
265
+ description = self .metadata .description ,
264
266
features = self .get_features (),
265
- homepage = self .dataset . metadata .url ,
266
- citation = self .dataset . metadata .cite_as ,
267
- license = _get_license (self .dataset . metadata ),
267
+ homepage = self .metadata .url ,
268
+ citation = self .metadata .cite_as ,
269
+ license = _get_license (self .metadata ),
268
270
disable_shuffling = self ._disable_shuffling ,
269
271
)
270
272
@@ -331,7 +333,8 @@ def _generate_examples(
331
333
record_set = croissant_utils .get_record_set (
332
334
self .builder_config .name , metadata = self .metadata
333
335
)
334
- records = self .dataset .records (record_set .id , filters = filters )
336
+ dataset = mlc .Dataset (self .jsonld , mapping = self .mapping )
337
+ records = dataset .records (record_set .id , filters = filters )
335
338
336
339
def convert_to_tfds_format (
337
340
global_index : int ,
0 commit comments