Skip to content

Commit b60ebb8

Browse files
Updated inconsistent output in documentation examples for ClassLabel (#7293)
* Updated ClassLabel output in features.py * Updated ClassLabel output in load.py * Updated ClassLabel output in iterable_daaset.py * Updated ClassLabel output in dataset_dict.py * Updated ClassLabel output in builder.py * Updated ClassLabel output in arrow_dataset.py * Updated docs * Added missing comma * Updated python code * Update src/datasets/builder.py Co-authored-by: Steven Liu <[email protected]> --------- Co-authored-by: Steven Liu <[email protected]>
1 parent c9d3450 commit b60ebb8

File tree

11 files changed

+34
-34
lines changed

11 files changed

+34
-34
lines changed

docs/source/about_dataset_features.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Let's have a look at the features of the MRPC dataset from the GLUE benchmark:
1111
>>> dataset = load_dataset('glue', 'mrpc', split='train')
1212
>>> dataset.features
1313
{'idx': Value(dtype='int32', id=None),
14-
'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None),
14+
'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),
1515
'sentence1': Value(dtype='string', id=None),
1616
'sentence2': Value(dtype='string', id=None),
1717
}

docs/source/load_hub.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Movie Review Dataset. This is a dataset of containing 5,331 positive and 5,331 n
2020

2121
# Inspect dataset features
2222
>>> ds_builder.info.features
23-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
23+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
2424
'text': Value(dtype='string', id=None)}
2525
```
2626

docs/source/loading.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ Now when you look at your dataset features, you can see it uses the custom label
435435
```py
436436
>>> dataset['train'].features
437437
{'text': Value(dtype='string', id=None),
438-
'label': ClassLabel(num_classes=6, names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], names_file=None, id=None)}
438+
'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)}
439439
```
440440

441441
## (Legacy) Local loading script

docs/source/process.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ The [`~Dataset.cast`] function transforms the feature type of one or more column
225225
>>> dataset.features
226226
{'sentence1': Value(dtype='string', id=None),
227227
'sentence2': Value(dtype='string', id=None),
228-
'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None),
228+
'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),
229229
'idx': Value(dtype='int32', id=None)}
230230

231231
>>> from datasets import ClassLabel, Value
@@ -236,7 +236,7 @@ The [`~Dataset.cast`] function transforms the feature type of one or more column
236236
>>> dataset.features
237237
{'sentence1': Value(dtype='string', id=None),
238238
'sentence2': Value(dtype='string', id=None),
239-
'label': ClassLabel(num_classes=2, names=['negative', 'positive'], names_file=None, id=None),
239+
'label': ClassLabel(names=['negative', 'positive'], id=None),
240240
'idx': Value(dtype='int64', id=None)}
241241
```
242242

docs/source/stream.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ When you need to remove one or more columns, give [`IterableDataset.remove_colum
229229
>>> dataset.features
230230
{'sentence1': Value(dtype='string', id=None),
231231
'sentence2': Value(dtype='string', id=None),
232-
'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None),
232+
'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),
233233
'idx': Value(dtype='int32', id=None)}
234234

235235
>>> from datasets import ClassLabel, Value
@@ -240,7 +240,7 @@ When you need to remove one or more columns, give [`IterableDataset.remove_colum
240240
>>> dataset.features
241241
{'sentence1': Value(dtype='string', id=None),
242242
'sentence2': Value(dtype='string', id=None),
243-
'label': ClassLabel(num_classes=2, names=['negative', 'positive'], names_file=None, id=None),
243+
'label': ClassLabel(names=['negative', 'positive'], id=None),
244244
'idx': Value(dtype='int64', id=None)}
245245
```
246246

src/datasets/arrow_dataset.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,14 +2023,14 @@ def cast(
20232023
>>> from datasets import load_dataset, ClassLabel, Value
20242024
>>> ds = load_dataset("rotten_tomatoes", split="validation")
20252025
>>> ds.features
2026-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2026+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
20272027
'text': Value(dtype='string', id=None)}
20282028
>>> new_features = ds.features.copy()
20292029
>>> new_features['label'] = ClassLabel(names=['bad', 'good'])
20302030
>>> new_features['text'] = Value('large_string')
20312031
>>> ds = ds.cast(new_features)
20322032
>>> ds.features
2033-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
2033+
{'label': ClassLabel(names=['bad', 'good'], id=None),
20342034
'text': Value(dtype='large_string', id=None)}
20352035
```
20362036
"""
@@ -2078,14 +2078,14 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Option
20782078
Example:
20792079
20802080
```py
2081-
>>> from datasets import load_dataset
2081+
>>> from datasets import load_dataset, ClassLabel
20822082
>>> ds = load_dataset("rotten_tomatoes", split="validation")
20832083
>>> ds.features
2084-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2084+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
20852085
'text': Value(dtype='string', id=None)}
20862086
>>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
20872087
>>> ds.features
2088-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
2088+
{'label': ClassLabel(names=['bad', 'good'], id=None),
20892089
'text': Value(dtype='string', id=None)}
20902090
```
20912091
"""

src/datasets/builder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,9 +510,9 @@ def get_all_exported_dataset_infos(cls) -> DatasetInfosDict:
510510
511511
```py
512512
>>> from datasets import load_dataset_builder
513-
>>> ds_builder = load_dataset_builder('rotten_tomatoes')
513+
>>> ds_builder = load_dataset_builder('vivos')
514514
>>> ds_builder.get_all_exported_dataset_infos()
515-
{'default': DatasetInfo(description="Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed\nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo\nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for\nsentiment categorization with respect to rating scales.'', Proceedings of the\nACL, 2005.\n", citation='@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n', homepage='http://www.cs.cornell.edu/people/pabo/movie-review-data/', license='', features={'text': Value(dtype='string', id=None), 'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None)}, post_processed=None, supervised_keys=SupervisedKeysData(input='', output=''), builder_name='rotten_tomatoes_movie_review', config_name='default', version=1.0.0, splits={'train': SplitInfo(name='train', num_bytes=1074810, num_examples=8530, dataset_name='rotten_tomatoes_movie_review'), 'validation': SplitInfo(name='validation', num_bytes=134679, num_examples=1066, dataset_name='rotten_tomatoes_movie_review'), 'test': SplitInfo(name='test', num_bytes=135972, num_examples=1066, dataset_name='rotten_tomatoes_movie_review')}, download_checksums={'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz': {'num_bytes': 487770, 'checksum': 'a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9'}}, download_size=487770, post_processing_size=None, dataset_size=1345461, size_in_bytes=1833231)}
515+
{'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)}
516516
```
517517
"""
518518
return DatasetInfosDict.from_directory(cls.get_imported_module_dir())
@@ -526,7 +526,7 @@ def get_exported_dataset_info(self) -> DatasetInfo:
526526
>>> from datasets import load_dataset_builder
527527
>>> ds_builder = load_dataset_builder('rotten_tomatoes')
528528
>>> ds_builder.get_exported_dataset_info()
529-
DatasetInfo(description="Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed\nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo\nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for\nsentiment categorization with respect to rating scales.'', Proceedings of the\nACL, 2005.\n", citation='@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n', homepage='http://www.cs.cornell.edu/people/pabo/movie-review-data/', license='', features={'text': Value(dtype='string', id=None), 'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None)}, post_processed=None, supervised_keys=SupervisedKeysData(input='', output=''), builder_name='rotten_tomatoes_movie_review', config_name='default', version=1.0.0, splits={'train': SplitInfo(name='train', num_bytes=1074810, num_examples=8530, dataset_name='rotten_tomatoes_movie_review'), 'validation': SplitInfo(name='validation', num_bytes=134679, num_examples=1066, dataset_name='rotten_tomatoes_movie_review'), 'test': SplitInfo(name='test', num_bytes=135972, num_examples=1066, dataset_name='rotten_tomatoes_movie_review')}, download_checksums={'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz': {'num_bytes': 487770, 'checksum': 'a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9'}}, download_size=487770, post_processing_size=None, dataset_size=1345461, size_in_bytes=1833231)
529+
DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)
530530
```
531531
"""
532532
return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo())

src/datasets/dataset_dict.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -275,17 +275,17 @@ def cast(self, features: Features) -> "DatasetDict":
275275
Example:
276276
277277
```py
278-
>>> from datasets import load_dataset
278+
>>> from datasets import load_dataset, ClassLabel, Value
279279
>>> ds = load_dataset("rotten_tomatoes")
280280
>>> ds["train"].features
281-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
281+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
282282
'text': Value(dtype='string', id=None)}
283283
>>> new_features = ds["train"].features.copy()
284284
>>> new_features['label'] = ClassLabel(names=['bad', 'good'])
285285
>>> new_features['text'] = Value('large_string')
286286
>>> ds = ds.cast(new_features)
287287
>>> ds["train"].features
288-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
288+
{'label': ClassLabel(names=['bad', 'good'], id=None),
289289
'text': Value(dtype='large_string', id=None)}
290290
```
291291
"""
@@ -307,14 +307,14 @@ def cast_column(self, column: str, feature) -> "DatasetDict":
307307
Example:
308308
309309
```py
310-
>>> from datasets import load_dataset
310+
>>> from datasets import load_dataset, ClassLabel
311311
>>> ds = load_dataset("rotten_tomatoes")
312312
>>> ds["train"].features
313-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
313+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
314314
'text': Value(dtype='string', id=None)}
315315
>>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
316316
>>> ds["train"].features
317-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
317+
{'label': ClassLabel(names=['bad', 'good'], id=None),
318318
'text': Value(dtype='string', id=None)}
319319
```
320320
"""
@@ -2201,14 +2201,14 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDatasetDict
22012201
Example:
22022202
22032203
```py
2204-
>>> from datasets import load_dataset
2204+
>>> from datasets import load_dataset, ClassLabel
22052205
>>> ds = load_dataset("rotten_tomatoes", streaming=True)
22062206
>>> ds["train"].features
2207-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2207+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
22082208
'text': Value(dtype='string', id=None)}
22092209
>>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
22102210
>>> ds["train"].features
2211-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
2211+
{'label': ClassLabel(names=['bad', 'good'], id=None),
22122212
'text': Value(dtype='string', id=None)}
22132213
```
22142214
"""
@@ -2240,14 +2240,14 @@ def cast(
22402240
>>> from datasets import load_dataset
22412241
>>> ds = load_dataset("rotten_tomatoes", streaming=True)
22422242
>>> ds["train"].features
2243-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2243+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
22442244
'text': Value(dtype='string', id=None)}
22452245
>>> new_features = ds["train"].features.copy()
22462246
>>> new_features['label'] = ClassLabel(names=['bad', 'good'])
22472247
>>> new_features['text'] = Value('large_string')
22482248
>>> ds = ds.cast(new_features)
22492249
>>> ds["train"].features
2250-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
2250+
{'label': ClassLabel(names=['bad', 'good'], id=None),
22512251
'text': Value(dtype='large_string', id=None)}
22522252
```
22532253
"""

src/datasets/features/features.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -966,10 +966,10 @@ class ClassLabel:
966966
Example:
967967
968968
```py
969-
>>> from datasets import Features
969+
>>> from datasets import Features, ClassLabel
970970
>>> features = Features({'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'])})
971971
>>> features
972-
{'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'], id=None)}
972+
{'label': ClassLabel(names=['bad', 'ok', 'good'], id=None)}
973973
```
974974
"""
975975

@@ -1156,7 +1156,7 @@ class Sequence:
11561156
>>> from datasets import Features, Sequence, Value, ClassLabel
11571157
>>> features = Features({'post': Sequence(feature={'text': Value(dtype='string'), 'upvotes': Value(dtype='int32'), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'])})})
11581158
>>> features
1159-
{'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'], id=None)}, length=-1, id=None)}
1159+
{'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(names=['hot', 'cold'], id=None)}, length=-1, id=None)}
11601160
```
11611161
"""
11621162

@@ -2110,7 +2110,7 @@ def copy(self) -> "Features":
21102110
>>> ds = load_dataset("rotten_tomatoes", split="train")
21112111
>>> copy_of_features = ds.features.copy()
21122112
>>> copy_of_features
2113-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2113+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
21142114
'text': Value(dtype='string', id=None)}
21152115
```
21162116
"""

src/datasets/iterable_dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2951,17 +2951,17 @@ def cast(
29512951
Example:
29522952
29532953
```py
2954-
>>> from datasets import load_dataset
2954+
>>> from datasets import load_dataset, ClassLabel, Value
29552955
>>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True)
29562956
>>> ds.features
2957-
{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
2957+
{'label': ClassLabel(names=['neg', 'pos'], id=None),
29582958
'text': Value(dtype='string', id=None)}
29592959
>>> new_features = ds.features.copy()
29602960
>>> new_features["label"] = ClassLabel(names=["bad", "good"])
29612961
>>> new_features["text"] = Value("large_string")
29622962
>>> ds = ds.cast(new_features)
29632963
>>> ds.features
2964-
{'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None),
2964+
{'label': ClassLabel(names=['bad', 'good'], id=None),
29652965
'text': Value(dtype='large_string', id=None)}
29662966
```
29672967
"""

0 commit comments

Comments
 (0)