modify automatic batching doc

dario-coscia · dario-coscia · commit 77e1e6ae126c · 2025-03-17T12:29:40.000+01:00
diff --git a/pina/data/data_module.py b/pina/data/data_module.py
@@ -81,7 +81,16 @@ def __init__(
         :param dict max_conditions_lengths: ``dict`` containing the maximum
             number  of data points to consider in a single batch for
             each condition.
-        :param bool automatic_batching: Whether to enable automatic batching.
+        :param bool automatic_batching: Whether to enable automatic batching. 
+            If ``True``, automatic PyTorch batching
+            is performed, which consists of extracting one element at a time
+            from the dataset and collating them into a batch. This is useful
+            when the dataset is too large to fit into memory. On the other hand,
+            if ``False``, the items are retrieved from the dataset all at once
+            avoind the overhead of collating them into a batch and reducing the
+            __getitem__ calls to the dataset. This is useful when the dataset
+            fits into memory. Avoid using automatic batching when ``batch_size``
+            is large. Default is ``False``.
         :param PinaDataset dataset: The dataset where the data is stored.
         """
 
diff --git a/pina/trainer.py b/pina/trainer.py
@@ -170,7 +170,15 @@ def _create_datamodule(
             validation dataset.
         :param int batch_size: The number of samples per batch to load.
         :param bool automatic_batching: Whether to perform automatic batching
-            with PyTorch.
+            with PyTorch. If ``True``, automatic PyTorch batching
+            is performed, which consists of extracting one element at a time
+            from the dataset and collating them into a batch. This is useful
+            when the dataset is too large to fit into memory. On the other hand,
+            if ``False``, the items are retrieved from the dataset all at once
+            avoind the overhead of collating them into a batch and reducing the
+            __getitem__ calls to the dataset. This is useful when the dataset
+            fits into memory. Avoid using automatic batching when ``batch_size``
+            is large. Default is ``False``.
         :param bool pin_memory: Whether to use pinned memory for faster data
             transfer to GPU.
         :param int num_workers: The number of worker threads for data loading.