huggingface · lhoestq · Jul 17, 2025 · Jun 27, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -2041,7 +2041,9 @@ def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16) -> "Datas
          'question': Value('string'),
          'answers': {'text': List(Value('string')),
          'answer_start': List(Value('int32'))}}
-        >>> ds.flatten()
+        >>> # Note: this method returns a new dataset and does not modify in-place
+        >>> ds = ds.flatten()
+        >>> ds
         Dataset({
             features: ['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'],
             num_rows: 87599
@@ -2399,7 +2401,9 @@ def select_columns(self, column_names: Union[str, list[str]], new_fingerprint: O
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
-        >>> ds.select_columns(['text'])
+        >>> # Note: this method returns a new dataset and does not modify in-place
+        >>> ds = ds.select_columns(['text'])
+        >>> ds
         Dataset({
             features: ['text'],
             num_rows: 1066
@@ -3869,12 +3873,15 @@ def filter(
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
-        >>> ds.filter(lambda x: x["label"] == 1)
+        >>> # Note: this method returns a new dataset and does not modify in-place
+        >>> ds = ds.filter(lambda x: x["label"] == 1)
+        >>> ds
         Dataset({
             features: ['text', 'label'],
             num_rows: 533
         })
         ```
+
         """
         if len(self.list_indexes()) > 0:
             raise DatasetTransformationNotAllowedError(
@@ -4041,7 +4048,9 @@ def select(
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
-        >>> ds.select(range(4))
+        >>> # Note: this method returns a new dataset and does not modify in-place
+        >>> ds = ds.select(range(4))
+        >>> ds
         Dataset({
             features: ['text', 'label'],
             num_rows: 4
@@ -4936,7 +4945,9 @@ def shard(
             features: ['text', 'label'],
             num_rows: 1066
         })
-        >>> ds.shard(num_shards=2, index=0)
+        >>> # Note: this method returns a new dataset and does not modify in-place
+        >>> ds = ds.shard(num_shards=2, index=0)
+        >>> ds
         Dataset({
             features: ['text', 'label'],
             num_rows: 533
@@ -6005,7 +6016,8 @@ def add_column(
         >>> from datasets import load_dataset
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
         >>> more_text = ds["text"]
-        >>> ds.add_column(name="text_2", column=more_text)
+        >>> ds = ds.add_column(name="text_2", column=more_text)
+        >>> ds
         Dataset({
             features: ['text', 'label', 'text_2'],
             num_rows: 1066