diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index e499fb6f9ba..81dbe7634b8 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -2041,7 +2041,8 @@ def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16) -> "Datas 'question': Value('string'), 'answers': {'text': List(Value('string')), 'answer_start': List(Value('int32'))}} - >>> ds.flatten() + >>> ds = ds.flatten() + >>> ds Dataset({ features: ['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'], num_rows: 87599 @@ -2399,7 +2400,8 @@ def select_columns(self, column_names: Union[str, list[str]], new_fingerprint: O ```py >>> from datasets import load_dataset >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation") - >>> ds.select_columns(['text']) + >>> ds = ds.select_columns(['text']) + >>> ds Dataset({ features: ['text'], num_rows: 1066 @@ -3869,12 +3871,14 @@ def filter( ```py >>> from datasets import load_dataset >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation") - >>> ds.filter(lambda x: x["label"] == 1) + >>> ds = ds.filter(lambda x: x["label"] == 1) + >>> ds Dataset({ features: ['text', 'label'], num_rows: 533 }) ``` + """ if len(self.list_indexes()) > 0: raise DatasetTransformationNotAllowedError( @@ -4041,7 +4045,8 @@ def select( ```py >>> from datasets import load_dataset >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation") - >>> ds.select(range(4)) + >>> ds = ds.select(range(4)) + >>> ds Dataset({ features: ['text', 'label'], num_rows: 4 @@ -4936,7 +4941,8 @@ def shard( features: ['text', 'label'], num_rows: 1066 }) - >>> ds.shard(num_shards=2, index=0) + >>> ds = ds.shard(num_shards=2, index=0) + >>> ds Dataset({ features: ['text', 'label'], num_rows: 533 @@ -6005,7 +6011,8 @@ def add_column( >>> from datasets import load_dataset >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation") >>> more_text = ds["text"] - >>> ds.add_column(name="text_2", column=more_text) + >>> ds = ds.add_column(name="text_2", column=more_text) + >>> ds Dataset({ features: ['text', 'label', 'text_2'], num_rows: 1066