@@ -226,17 +226,21 @@ def __init__(
226
226
)
227
227
self .VERSION = version_lib .Version (version ) # pylint: disable=invalid-name
228
228
self .name = conversion_utils .to_tfds_name (hf_repo_id )
229
+ self .homepage = f'https://huggingface.co/datasets/{ hf_repo_id } '
229
230
self ._hf_hub_token = hf_hub_token
230
231
self ._hf_num_proc = hf_num_proc
231
232
self ._tfds_num_proc = tfds_num_proc
232
233
self ._verification_mode = (
233
234
'no_checks' if ignore_verifications else 'all_checks'
234
235
)
235
236
if self ._hf_config :
237
+ description = self ._get_text_field ('description' )
238
+ if self ._is_gated ():
239
+ description = self ._gated_text + '\n ' + description
236
240
self ._converted_builder_config = dataset_builder .BuilderConfig (
237
241
name = tfds_config ,
238
242
version = self .VERSION ,
239
- description = self . _get_text_field ( ' description' ) ,
243
+ description = description ,
240
244
)
241
245
else :
242
246
self ._converted_builder_config = None
@@ -277,6 +281,48 @@ def _hf_hub_info(self) -> huggingface_hub.hf_api.DatasetInfo:
277
281
self ._hf_repo_id , token = self ._hf_hub_token
278
282
)
279
283
284
+ def _is_gated (self ) -> bool :
285
+ """Whether the dataset is gated."""
286
+ # Gated datasets return a string ('manual' or 'automatic').
287
+ if isinstance (self ._hf_hub_info .gated , str ):
288
+ return True
289
+ return False
290
+
291
+ @property
292
+ def _gated_dataset_warning (self ) -> str :
293
+ """The warning message for a gated dataset."""
294
+ return (
295
+ 'WARNING: This dataset is gated. Before using it, make sure to sign'
296
+ f' the conditions at: { self .homepage } . Important: access requests are'
297
+ ' always granted to individual users rather than to entire'
298
+ ' organizations.'
299
+ )
300
+
301
+ @property
302
+ def _gated_text (self ) -> str | None :
303
+ """Returns the conditions for a dataset, if it is gated.
304
+
305
+ All datasets share the same default conditions. Extra conditions are stored
306
+ in the dataset card:
307
+ https://huggingface.co/docs/hub/en/datasets-gated
308
+
309
+ Returns:
310
+ The gated text if the dataset is gated. None otherwise.
311
+ """
312
+ if self ._is_gated ():
313
+ # This condition is the same for all gated datasets.
314
+ conditions = (
315
+ 'The conditions consist of:\n By agreeing you accept to share your'
316
+ ' contact information (email and username) with the repository'
317
+ ' authors.'
318
+ )
319
+ if dataset_card := self ._hf_hub_info .card_data :
320
+ gated_text = dataset_card .get ('extra_gated_prompt' , None )
321
+ if gated_text :
322
+ conditions = conditions + '\n ' + gated_text
323
+ return self ._gated_dataset_warning + '\n ' + conditions
324
+ return None
325
+
280
326
def _hf_features (self ) -> hf_datasets .Features :
281
327
if not self ._hf_info .features :
282
328
# We need to download and prepare the data to know its features.
@@ -285,13 +331,19 @@ def _hf_features(self) -> hf_datasets.Features:
285
331
return self ._hf_info .features
286
332
287
333
def _info (self ) -> dataset_info_lib .DatasetInfo :
334
+ ds_description = self ._get_text_field ('description' )
335
+ ds_license = self ._get_license ()
336
+ if self ._is_gated ():
337
+ ds_description = self ._gated_text + '\n ' + ds_description
338
+ ds_license = ds_license + ' ' + self ._gated_dataset_warning
288
339
return dataset_info_lib .DatasetInfo (
289
340
builder = self ,
290
- description = self . _get_text_field ( 'description' ) ,
341
+ description = ds_description ,
291
342
features = huggingface_utils .convert_hf_features (self ._hf_features ()),
292
343
citation = self ._get_text_field ('citation' ),
293
- license = self . _get_license () ,
344
+ license = ds_license ,
294
345
supervised_keys = _extract_supervised_keys (self ._hf_info ),
346
+ homepage = self .homepage ,
295
347
)
296
348
297
349
def _split_generators (
0 commit comments