Enable augmentation to parts of the data or the whole data

stefanradev93 · stefanradev93 · commit 8d38aa041859 · 2025-05-08T13:49:14.000-04:00
diff --git a/bayesflow/datasets/disk_dataset.py b/bayesflow/datasets/disk_dataset.py
@@ -36,7 +36,7 @@ def __init__(
         load_fn: Callable = None,
         adapter: Adapter | None,
         stage: str = "training",
-        augmentations: Mapping[str, Callable] = None,
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
         """
@@ -57,9 +57,11 @@ def __init__(
             Optional adapter to transform the loaded batch.
         stage : str, default="training"
             Current stage (e.g., "training", "validation", etc.) used by the adapter.
-        augmentations : Mapping[str, Callable], optional
-            Dictionary of augmentation functions to apply to each corresponding key in the batch.
-            Note - augmentations are applied before the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
         **kwargs
             Additional keyword arguments passed to the base `PyDataset`.
         """
@@ -85,9 +87,13 @@ def __getitem__(self, item) -> dict[str, np.ndarray]:
 
         batch = tree_stack(batch)
 
-        if self.augmentations is not None:
+        if isinstance(self.augmentations, Mapping):
             for key in self.augmentations:
                 batch[key] = self.augmentations[key](batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
 
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)
diff --git a/bayesflow/datasets/offline_dataset.py b/bayesflow/datasets/offline_dataset.py
@@ -23,7 +23,7 @@ def __init__(
         num_samples: int = None,
         *,
         stage: str = "training",
-        augmentations: Mapping[str, Callable] = None,
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
         """
@@ -41,9 +41,11 @@ def __init__(
             Number of samples in the dataset. If None, it will be inferred from the data.
         stage : str, default="training"
             Current stage (e.g., "training", "validation", etc.) used by the adapter.
-        augmentations : Mapping[str, Callable], optional
-            Dictionary of augmentation functions to apply to each corresponding key in the batch.
-            Note - augmentations are applied before the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
         **kwargs
             Additional keyword arguments passed to the base `PyDataset`.
         """
@@ -95,9 +97,13 @@ def __getitem__(self, item: int) -> dict[str, np.ndarray]:
             for key, value in self.data.items()
         }
 
-        if self.augmentations is not None:
+        if isinstance(self.augmentations, Mapping):
             for key in self.augmentations:
                 batch[key] = self.augmentations[key](batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
 
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)
diff --git a/bayesflow/datasets/online_dataset.py b/bayesflow/datasets/online_dataset.py
@@ -20,7 +20,7 @@ def __init__(
         adapter: Adapter | None,
         *,
         stage: str = "training",
-        augmentations: Mapping[str, Callable] = None,
+        augmentations: Mapping[str, Callable] | Callable = None,
         **kwargs,
     ):
         """
@@ -38,9 +38,11 @@ def __init__(
             Optional adapter to transform the simulated batch.
         stage : str, default="training"
             Current stage (e.g., "training", "validation", etc.) used by the adapter.
-        augmentations : dict of str to Callable, optional
-            Dictionary of augmentation functions to apply to each corresponding key in the batch.
-            Note - augmentations are applied before the adapter.
+        augmentations : dict of str to Callable or Callable, optional
+            Dictionary of augmentation functions to apply to each corresponding key in the batch
+            or a function to apply to the entire batch (possibly adding new keys).
+            Note - augmentations are applied before the adapter is called and are generally
+            transforms that you only want to apply during training.
         **kwargs
             Additional keyword arguments passed to the base `PyDataset`.
         """
@@ -69,9 +71,13 @@ def __getitem__(self, item: int) -> dict[str, np.ndarray]:
         """
         batch = self.simulator.sample((self.batch_size,))
 
-        if self.augmentations is not None:
+        if isinstance(self.augmentations, Mapping):
             for key in self.augmentations:
                 batch[key] = self.augmentations[key](batch[key])
+        elif isinstance(self.augmentations, Callable):
+            batch = self.augmentations(batch)
+        else:
+            raise RuntimeError(f"Could not apply augmentations of type {type(self.augmentations)}.")
 
         if self.adapter is not None:
             batch = self.adapter(batch, stage=self.stage)