update datasets for usage with numpy arrays

LarsKue · LarsKue · commit 69e238764cae · 2025-03-25T17:58:42.000+01:00
since we don't use keras tensors anymore, we can get rid of the multiprocessing start methods
diff --git a/bayesflow/datasets/disk_dataset.py b/bayesflow/datasets/disk_dataset.py
@@ -42,7 +42,7 @@ def __init__(
 
         self.shuffle()
 
-    def __getitem__(self, item):
+    def __getitem__(self, item) -> dict[str, np.ndarray]:
         if not 0 <= item < self.num_batches:
             raise IndexError(f"Index {item} is out of bounds for dataset with {self.num_batches} batches.")
 
diff --git a/bayesflow/datasets/online_dataset.py b/bayesflow/datasets/online_dataset.py
@@ -1,8 +1,8 @@
 import keras
+import numpy as np
 
 from bayesflow.adapters import Adapter
 from bayesflow.simulators.simulator import Simulator
-from bayesflow.types import Tensor
 
 
 class OnlineDataset(keras.utils.PyDataset):
@@ -20,18 +20,12 @@ def __init__(
     ):
         super().__init__(**kwargs)
 
-        if keras.backend.backend() == "torch" and kwargs.get("use_multiprocessing"):
-            # keras workaround: https://github.com/keras-team/keras/issues/19346
-            import multiprocessing as mp
-
-            mp.set_start_method("spawn", force=True)
-
         self.batch_size = batch_size
         self._num_batches = num_batches
         self.adapter = adapter
         self.simulator = simulator
 
-    def __getitem__(self, item: int) -> dict[str, Tensor]:
+    def __getitem__(self, item: int) -> dict[str, np.ndarray]:
         batch = self.simulator.sample((self.batch_size,))
 
         if self.adapter is not None:
diff --git a/bayesflow/datasets/rounds_dataset.py b/bayesflow/datasets/rounds_dataset.py
@@ -1,8 +1,8 @@
 import keras
+import numpy as np
 
 from bayesflow.adapters import Adapter
 from bayesflow.simulators.simulator import Simulator
-from bayesflow.types import Tensor
 from bayesflow.utils import logging
 
 
@@ -22,12 +22,6 @@ def __init__(
     ):
         super().__init__(**kwargs)
 
-        if keras.backend.backend() == "torch" and kwargs.get("use_multiprocessing"):
-            # keras workaround: https://github.com/keras-team/keras/issues/19346
-            import multiprocessing as mp
-
-            mp.set_start_method("spawn", force=True)
-
         self.batches = None
         self._num_batches = num_batches
         self.batch_size = batch_size
@@ -46,7 +40,7 @@ def __init__(
 
         self.regenerate()
 
-    def __getitem__(self, item: int) -> dict[str, Tensor]:
+    def __getitem__(self, item: int) -> dict[str, np.ndarray]:
         """Get a batch of pre-simulated data"""
         batch = self.batches[item]