Fix init bugs, adapt tnotebooks

stefanradev93 · stefanradev93 · commit d79b17a54e66 · 2025-05-22T22:34:52.000-04:00
diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
@@ -49,6 +49,7 @@ def __init__(
         self.inference_network = inference_network
         self.summary_network = summary_network
         self.standardize = standardize
+
         self.inference_variables_norm = None
         self.summary_variables_norm = None
         self.inference_conditions_norm = None
@@ -59,7 +60,6 @@ def build_adapter(
         inference_variables: Sequence[str],
         inference_conditions: Sequence[str] = None,
         summary_variables: Sequence[str] = None,
-        standardize: bool = True,
         sample_weight: str = None,
     ) -> Adapter:
         """Create an :py:class:`~bayesflow.adapters.Adapter` suited for the approximator.
@@ -72,8 +72,6 @@ def build_adapter(
             Names of the inference conditions in the data
         summary_variables : Sequence of str, optional
             Names of the summary variables in the data
-        standardize : bool, optional
-            Decide whether to standardize all variables, default is True
         sample_weight : str, optional
             Name of the sample weights
         """
@@ -95,9 +93,6 @@ def build_adapter(
 
         adapter.keep(["inference_variables", "inference_conditions", "summary_variables", "sample_weight"])
 
-        if standardize:
-            adapter.standardize(exclude="sample_weight")
-
         return adapter
 
     def compile(
@@ -118,7 +113,7 @@ def compile(
 
         return super().compile(*args, **kwargs)
 
-    def build_from_data(self, adapted_data: dict[str, any]) -> None:
+    def build_from_data(self, adapted_data: dict[str, any]):
         # Determine input standardization
         if self.standardize == "all":
             keys = ["inference_variables", "summary_variables", "inference_conditions"]
@@ -129,13 +124,15 @@ def build_from_data(self, adapted_data: dict[str, any]) -> None:
         else:
             keys = []
 
-        if "inference_variables" in keys:
+        if "inference_variables" in adapted_data and "inference_variables" in keys:
             self.inference_variables_norm = Standardization()
             self.inference_variables_norm(adapted_data["inference_variables"])
-        if "summary_variables" in keys and self.summary_network:
+
+        if "summary_variables" in adapted_data and "summary_variables" in keys and self.summary_network:
             self.summary_variables_norm = Standardization()
             self.summary_variables_norm(adapted_data["summary_variables"])
-        if "inference_conditions" in keys:
+
+        if "inference_conditions" in adapted_data and "inference_conditions" in keys:
             self.inference_conditions_norm = Standardization()
             self.inference_conditions_norm(adapted_data["inference_conditions"])
 
@@ -394,21 +391,18 @@ def sample(
 
         # Optionally standardize conditions
         if "summary_variables" in conditions and self.summary_variables_norm:
-            conditions["summary_variables"] = self.summary_variables_norm(
-                conditions["summary_variables"], stage="inference"
-            )
+            conditions["summary_variables"] = self.summary_variables_norm(conditions["summary_variables"])
 
         if "inference_conditions" in conditions and self.inference_conditions_norm:
-            conditions["inference_conditions"] = self.inference_conditions_norm(
-                conditions["inference_conditions"], stage="inference"
-            )
+            conditions["inference_conditions"] = self.inference_conditions_norm(conditions["inference_conditions"])
+
         conditions = keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)
 
         # Sample and undo optional standardization
         samples = self._sample(num_samples=num_samples, **conditions, **kwargs)
 
         if self.inference_variables_norm:
-            samples = self.inference_variables_norm(samples, stage="inference", forward=False)
+            samples = self.inference_variables_norm(samples, forward=False)
 
         samples = {"inference_variables": samples}
         samples = keras.tree.map_structure(keras.ops.convert_to_numpy, samples)
@@ -512,16 +506,14 @@ def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray | dic
 
         # Optionally standardize conditions and variables
         if "summary_variables" in data and self.summary_variables_norm:
-            data["summary_variables"] = self.summary_variables_norm(data["summary_variables"], stage="inference")
+            data["summary_variables"] = self.summary_variables_norm(data["summary_variables"])
 
         if "inference_conditions" in data and self.inference_conditions_norm:
-            data["inference_conditions"] = self.inference_conditions_norm(
-                data["inference_conditions"], stage="inference"
-            )
+            data["inference_conditions"] = self.inference_conditions_norm(data["inference_conditions"])
 
         if self.inference_variables_norm:
             data["inference_variables"], log_det_jac = self.summary_variables_norm(
-                data["inference_variables"], stage="inference", log_det_jac=True
+                data["inference_variables"], log_det_jac=True
             )
             log_det_jac = keras.ops.convert_to_numpy(log_det_jac)
         else:
diff --git a/bayesflow/networks/standardization/standardization.py b/bayesflow/networks/standardization/standardization.py
@@ -3,13 +3,13 @@
 import keras
 
 from bayesflow.types import Tensor, Shape
-from bayesflow.utils.serialization import serialize, serializable
+from bayesflow.utils.serialization import serialize, deserialize, serializable
 from bayesflow.utils import expand_left_as
 
 
 @serializable("bayesflow.networks")
 class Standardization(keras.Layer):
-    def __init__(self, momentum: float = 0.99):
+    def __init__(self, momentum: float = 0.95, epsilon: float = 1e-6):
         """
         Initializes a Standardization layer that will keep track of the running mean and
         running standard deviation across a batch of tensors.
@@ -19,27 +19,28 @@ def __init__(self, momentum: float = 0.99):
         momentum : float, optional
             Momentum for the exponential moving average used to update the mean and
             standard deviation during training. Must be between 0 and 1.
-            Default is 0.99.
+            Default is 0.95.
+        epsilon: float, optional
+            Stability parameter to avoid division by zero.
         """
         super().__init__()
 
         self.momentum = momentum
+        self.epsilon = epsilon
         self.moving_mean = None
         self.moving_std = None
 
-    def build(self, input_shape: Shape, **kwargs):
-        self.moving_mean = self.add_weight(shape=(input_shape[-1],), initializer="ones", name="scale", trainable=False)
-        self.moving_std = self.add_weight(shape=(input_shape[-1],), initializer="zeros", name="bias", trainable=False)
+    def build(self, input_shape: Shape):
+        self.moving_mean = self.add_weight(shape=(input_shape[-1],), initializer="zeros", trainable=False)
+        self.moving_std = self.add_weight(shape=(input_shape[-1],), initializer="ones", trainable=False)
 
     def get_config(self) -> dict:
-        config = {"momentum": self.momentum}
+        config = {"momentum": self.momentum, "epsilon": self.epsilon}
         return serialize(config)
 
-    def _update_moments(self, x: Tensor):
-        mean = keras.ops.mean(x, axis=list(range(keras.ops.ndim(x)))[:-1])
-        std = keras.ops.std(x, axis=list(range(keras.ops.ndim(x)))[:-1])
-        self.moving_mean.assign(self.momentum * self.moving_mean + (1.0 - self.momentum) * mean)
-        self.moving_std.assign(self.momentum * self.moving_std + (1.0 - self.momentum) * std)
+    @classmethod
+    def from_config(cls, config, custom_objects=None):
+        return cls(**deserialize(config, custom_objects=custom_objects))
 
     def call(
         self, x: Tensor, stage: str = "inference", forward: bool = True, log_det_jac: bool = False, **kwargs
@@ -53,7 +54,7 @@ def call(
             Input tensor of shape (..., dim).
         stage : str, optional
             Indicates the stage of computation. If "training", the running statistics
-            are updated. Default is "training".
+            are updated. Default is "inference".
         forward : bool, optional
             If True, apply standardization: (x - mean) / std.
             If False, apply inverse transformation: x * std + mean and return the log-determinant
@@ -84,3 +85,11 @@ def call(
             return x, ldj
 
         return x
+
+    def _update_moments(self, x: Tensor):
+        mean = keras.ops.mean(x, axis=tuple(range(keras.ops.ndim(x) - 1)))
+        std = keras.ops.std(x, axis=tuple(range(keras.ops.ndim(x) - 1)))
+        std = keras.ops.maximum(std, self.epsilon)
+
+        self.moving_mean.assign(self.momentum * self.moving_mean + (1.0 - self.momentum) * mean)
+        self.moving_std.assign(self.momentum * self.moving_std + (1.0 - self.momentum) * std)
diff --git a/examples/Linear_Regression_Starter.ipynb b/examples/Linear_Regression_Starter.ipynb
@@ -382,7 +382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2025-02-14T10:51:30.684080Z",
@@ -401,7 +401,6 @@
     "    .broadcast(\"N\", to=\"x\")\n",
     "    .as_set([\"x\", \"y\"])\n",
     "    .constrain(\"sigma\", lower=0)\n",
-    "    .standardize(exclude=[\"N\"])\n",
     "    .sqrt(\"N\")\n",
     "    .convert_dtype(\"float64\", \"float32\")\n",
     "    .concatenate([\"beta\", \"sigma\"], into=\"inference_variables\")\n",
@@ -424,9 +423,6 @@
     "\n",
     "The `.constrain(\"sigma\", lower=0)` transform ensures that the residual standard deviation parameter `sigma` will always be positive. Without this constrain, the neural networks may attempt to predict negative `sigma` which of course would not make much sense.\n",
     "\n",
-    "Standardidazation via `.standardize()` is important for neural networks to learn\n",
-    "reliably without, for example, exploding or vanishing gradients during training. However, we need to exclude the variable `N` from standardization, via `standardize(exclude=[\"N\"])`. This is because `N` is a constant within each batch of training data and can hence not be standardized. In the future, bayesflow will automatically detect this case so that we don't have to manually exclude such constant variables from standardization.\n",
-    "\n",
     "Let's check the shape of our processed data to be passed to the neural networks:"
    ]
   },
@@ -1028,7 +1024,8 @@
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "name": "python",
+   "version": "3.11.12"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
diff --git a/examples/SIR_Posterior_Estimation.ipynb b/examples/SIR_Posterior_Estimation.ipynb