Add standardization to continuous approximator and test

stefanradev93 · stefanradev93 · commit ceab303aa1bf · 2025-05-22T17:40:30.000-04:00
diff --git a/bayesflow/approximators/approximator.py b/bayesflow/approximators/approximator.py
@@ -1,5 +1,3 @@
-from collections.abc import Mapping
-
 import multiprocessing as mp
 
 import keras
@@ -22,8 +20,8 @@ def build_adapter(cls, **kwargs) -> Adapter:
         # implemented by each respective architecture
         raise NotImplementedError
 
-    def build_from_data(self, data: Mapping[str, any]) -> None:
-        self.compute_metrics(**filter_kwargs(data, self.compute_metrics), stage="training")
+    def build_from_data(self, adapted_data: dict[str, any]) -> None:
+        self.compute_metrics(**filter_kwargs(adapted_data, self.compute_metrics), stage="training")
         self.built = True
 
     @classmethod
diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
@@ -11,6 +11,7 @@
 from bayesflow.utils.serialization import serialize, deserialize, serializable
 
 from .approximator import Approximator
+from ..networks.standardization import Standardization
 
 
 @serializable("bayesflow.approximators")
@@ -40,12 +41,17 @@ def __init__(
         adapter: Adapter,
         inference_network: InferenceNetwork,
         summary_network: SummaryNetwork = None,
+        standardize: str | Sequence[str] | None = "all",
         **kwargs,
     ):
         super().__init__(**kwargs)
         self.adapter = adapter
         self.inference_network = inference_network
         self.summary_network = summary_network
+        self.standardize = standardize
+        self.inference_variables_norm = None
+        self.summary_variables_norm = None
+        self.inference_conditions_norm = None
 
     @classmethod
     def build_adapter(
@@ -112,6 +118,31 @@ def compile(
 
         return super().compile(*args, **kwargs)
 
+    def build_from_data(self, adapted_data: dict[str, any]) -> None:
+        # Determine input standardization
+        if self.standardize == "all":
+            keys = ["inference_variables", "summary_variables", "inference_conditions"]
+        elif isinstance(self.standardize, str):
+            keys = [self.standardize]
+        elif isinstance(self.standardize, Sequence):
+            keys = self.standardize
+        else:
+            keys = []
+
+        if "inference_variables" in keys:
+            self.inference_variables_norm = Standardization()
+            self.inference_variables_norm(adapted_data["inference_variables"])
+        if "summary_variables" in keys and self.summary_network:
+            self.summary_variables_norm = Standardization()
+            self.summary_variables_norm(adapted_data["summary_variables"])
+        if "inference_conditions" in keys:
+            self.inference_conditions_norm = Standardization()
+            self.inference_conditions_norm(adapted_data["inference_conditions"])
+
+        # Call compute metrics once to build inner networks
+        self.compute_metrics(**filter_kwargs(adapted_data, self.compute_metrics), stage="training")
+        self.built = True
+
     def compile_from_config(self, config):
         self.compile(**deserialize(config))
         if hasattr(self, "optimizer") and self.built:
@@ -126,6 +157,10 @@ def compute_metrics(
         sample_weight: Tensor = None,
         stage: str = "training",
     ) -> dict[str, Tensor]:
+        # Optionally standardize optional inference conditions
+        if inference_conditions and self.inference_conditions_norm:
+            inference_conditions = self.inference_conditions_norm(inference_conditions, stage=stage)
+
         if self.summary_network is None:
             if summary_variables is not None:
                 raise ValueError("Cannot compute summary metrics without a summary network.")
@@ -135,6 +170,9 @@ def compute_metrics(
             if summary_variables is None:
                 raise ValueError("Summary variables are required when a summary network is present.")
 
+            if self.summary_variables_norm is not None:
+                summary_variables = self.summary_variables_norm(summary_variables, stage=stage)
+
             summary_metrics = self.summary_network.compute_metrics(summary_variables, stage=stage)
             summary_outputs = summary_metrics.pop("outputs")
 
@@ -146,6 +184,10 @@ def compute_metrics(
 
         # Force a conversion to Tensor
         inference_variables = keras.tree.map_structure(keras.ops.convert_to_tensor, inference_variables)
+
+        if self.inference_variables_norm is not None:
+            inference_variables = self.inference_variables_norm(inference_variables, stage=stage)
+
         inference_metrics = self.inference_network.compute_metrics(
             inference_variables, conditions=inference_conditions, sample_weight=sample_weight, stage=stage
         )
@@ -223,6 +265,7 @@ def get_config(self):
             "adapter": self.adapter,
             "inference_network": self.inference_network,
             "summary_network": self.summary_network,
+            "standardize": self.standardize,
         }
 
         return base_config | serialize(config)
@@ -349,16 +392,33 @@ def sample(
         # Ensure only keys relevant for sampling are present in the conditions dictionary
         conditions = {k: v for k, v in conditions.items() if k in ContinuousApproximator.SAMPLE_KEYS}
 
+        # Optionally standardize conditions
+        if "summary_variables" in conditions and self.summary_variables_norm:
+            conditions["summary_variables"] = self.summary_variables_norm(
+                conditions["summary_variables"], stage="inference"
+            )
+
+        if "inference_conditions" in conditions and self.inference_conditions_norm:
+            conditions["inference_conditions"] = self.inference_conditions_norm(
+                conditions["inference_conditions"], stage="inference"
+            )
         conditions = keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)
-        conditions = {"inference_variables": self._sample(num_samples=num_samples, **conditions, **kwargs)}
-        conditions = keras.tree.map_structure(keras.ops.convert_to_numpy, conditions)
+
+        # Sample and undo optional standardization
+        samples = self._sample(num_samples=num_samples, **conditions, **kwargs)
+
+        if self.inference_variables_norm:
+            samples = self.inference_variables_norm(samples, stage="inference", forward=False)
+
+        samples = {"inference_variables": samples}
+        samples = keras.tree.map_structure(keras.ops.convert_to_numpy, samples)
 
         # Back-transform quantities and samples
-        conditions = self.adapter(conditions, inverse=True, strict=False, **kwargs)
+        samples = self.adapter(samples, inverse=True, strict=False, **kwargs)
 
         if split:
-            conditions = split_arrays(conditions, axis=-1)
-        return conditions
+            samples = split_arrays(samples, axis=-1)
+        return samples
 
     def _sample(
         self,
@@ -400,37 +460,35 @@ def _sample(
             **filter_kwargs(kwargs, self.inference_network.sample),
         )
 
-    def summaries(self, data: Mapping[str, np.ndarray], **kwargs):
+    def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
         """
-        Computes the summaries of given data.
+        Computes the learned summary statistics of given inputs.
 
         The `data` dictionary is preprocessed using the `adapter` and passed through the summary network.
 
         Parameters
         ----------
         data : Mapping[str, np.ndarray]
-            Dictionary of data as NumPy arrays.
+            Dictionary of simulated or real quantities as NumPy arrays.
         **kwargs : dict
             Additional keyword arguments for the adapter and the summary network.
 
         Returns
         -------
         summaries : np.ndarray
-            Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))`
-
-        Raises
-        ------
-        ValueError
-            If the approximator does not have a summary network, or the adapter does not produce the output required
-            by the summary network.
+            The learned summary statistics.
         """
         if self.summary_network is None:
-            raise ValueError("A summary network is required to compute summeries.")
+            raise ValueError("A summary network is required to compute summaries.")
+
         data_adapted = self.adapter(data, strict=False, stage="inference", **kwargs)
         if "summary_variables" not in data_adapted or data_adapted["summary_variables"] is None:
             raise ValueError("Summary variables are required to compute summaries.")
+
         summary_variables = keras.ops.convert_to_tensor(data_adapted["summary_variables"])
         summaries = self.summary_network(summary_variables, **filter_kwargs(kwargs, self.summary_network.call))
+        summaries = keras.ops.convert_to_numpy(summaries)
+
         return summaries
 
     def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]:
@@ -451,6 +509,24 @@ def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray | dic
             Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))`
         """
         data, log_det_jac = self.adapter(data, strict=False, stage="inference", log_det_jac=True, **kwargs)
+
+        # Optionally standardize conditions and variables
+        if "summary_variables" in data and self.summary_variables_norm:
+            data["summary_variables"] = self.summary_variables_norm(data["summary_variables"], stage="inference")
+
+        if "inference_conditions" in data and self.inference_conditions_norm:
+            data["inference_conditions"] = self.inference_conditions_norm(
+                data["inference_conditions"], stage="inference"
+            )
+
+        if self.inference_variables_norm:
+            data["inference_variables"], log_det_jac = self.summary_variables_norm(
+                data["inference_variables"], stage="inference", log_det_jac=True
+            )
+            log_det_jac = keras.ops.convert_to_numpy(log_det_jac)
+        else:
+            log_det_jac = 0.0
+
         data = keras.tree.map_structure(keras.ops.convert_to_tensor, data)
         log_prob = self._log_prob(**data, **kwargs)
         log_prob = keras.tree.map_structure(keras.ops.convert_to_numpy, log_prob)
diff --git a/bayesflow/networks/standardization/__init__.py b/bayesflow/networks/standardization/__init__.py
@@ -0,0 +1 @@
+from .standardization import Standardization
diff --git a/bayesflow/networks/standardization/standardization.py b/bayesflow/networks/standardization/standardization.py
@@ -0,0 +1,86 @@
+from collections.abc import Sequence
+
+import keras
+
+from bayesflow.types import Tensor, Shape
+from bayesflow.utils.serialization import serialize, serializable
+from bayesflow.utils import expand_left_as
+
+
+@serializable("bayesflow.networks")
+class Standardization(keras.Layer):
+    def __init__(self, momentum: float = 0.99):
+        """
+        Initializes a Standardization layer that will keep track of the running mean and
+        running standard deviation across a batch of tensors.
+
+        Parameters
+        ----------
+        momentum : float, optional
+            Momentum for the exponential moving average used to update the mean and
+            standard deviation during training. Must be between 0 and 1.
+            Default is 0.99.
+        """
+        super().__init__()
+
+        self.momentum = momentum
+        self.moving_mean = None
+        self.moving_std = None
+
+    def build(self, input_shape: Shape, **kwargs):
+        self.moving_mean = self.add_weight(shape=(input_shape[-1],), initializer="ones", name="scale", trainable=False)
+        self.moving_std = self.add_weight(shape=(input_shape[-1],), initializer="zeros", name="bias", trainable=False)
+
+    def get_config(self) -> dict:
+        config = {"momentum": self.momentum}
+        return serialize(config)
+
+    def _update_moments(self, x: Tensor):
+        mean = keras.ops.mean(x, axis=list(range(keras.ops.ndim(x)))[:-1])
+        std = keras.ops.std(x, axis=list(range(keras.ops.ndim(x)))[:-1])
+        self.moving_mean.assign(self.momentum * self.moving_mean + (1.0 - self.momentum) * mean)
+        self.moving_std.assign(self.momentum * self.moving_std + (1.0 - self.momentum) * std)
+
+    def call(
+        self, x: Tensor, stage: str = "inference", forward: bool = True, log_det_jac: bool = False, **kwargs
+    ) -> Tensor | Sequence[Tensor]:
+        """
+        Apply standardization or its inverse to the input tensor, optionally compute the log det of the Jacobian.
+
+        Parameters
+        ----------
+        x : Tensor
+            Input tensor of shape (..., dim).
+        stage : str, optional
+            Indicates the stage of computation. If "training", the running statistics
+            are updated. Default is "training".
+        forward : bool, optional
+            If True, apply standardization: (x - mean) / std.
+            If False, apply inverse transformation: x * std + mean and return the log-determinant
+            of the Jacobian. Default is True.
+        log_det_jac: bool, optional
+            Whether to return the log determinant of the transformation. Default is False.
+
+        Returns
+        -------
+        Tensor or Sequence[Tensor]
+            If `forward` is True, returns the standardized tensor, otherwise un-standardizes.
+            If `log_det_jec` is True, returns a tuple: (transformed tensor, log-determinant) otherwise just
+            transformed tensor.
+        """
+        if stage == "training":
+            self._update_moments(x)
+
+        if forward:
+            x = (x - expand_left_as(self.moving_mean, x)) / expand_left_as(self.moving_std, x)
+        else:
+            x = expand_left_as(self.moving_mean, x) + expand_left_as(self.moving_std, x) * x
+
+        if log_det_jac:
+            ldj = keras.ops.sum(keras.ops.log(keras.ops.abs(self.moving_std)), axis=-1)
+            ldj = keras.ops.broadcast_to(ldj, keras.ops.shape(x)[:-1])
+            if forward:
+                ldj = -ldj
+            return x, ldj
+
+        return x
diff --git a/tests/test_networks/test_standardization.py b/tests/test_networks/test_standardization.py
@@ -0,0 +1,51 @@
+import numpy as np
+import keras
+
+from bayesflow.networks.standardization import Standardization
+
+
+def test_forward_standardization_training():
+    random_input = keras.random.normal((8, 4))
+
+    layer = Standardization(momentum=0.0)  # no EMA for test stability
+    layer.build(random_input.shape)
+
+    out = layer(random_input, stage="training", forward=True)
+
+    moving_mean = keras.ops.convert_to_numpy(layer.moving_mean)
+    moving_std = keras.ops.convert_to_numpy(layer.moving_std)
+    random_input = keras.ops.convert_to_numpy(random_input)
+    out = keras.ops.convert_to_numpy(out)
+
+    # mean should now match the batch input
+    np.testing.assert_allclose(moving_mean, np.mean(random_input, axis=0), atol=1e-5)
+    np.testing.assert_allclose(moving_std, np.std(random_input, axis=0), atol=1e-5)
+
+    assert out.shape == random_input.shape
+    assert not np.any(np.isnan(out))
+
+
+def test_inverse_standardization_ldj():
+    random_input = keras.random.normal((1, 3))
+
+    layer = Standardization(momentum=0.0)
+    layer.build(random_input.shape)
+
+    _ = layer(random_input, stage="training", forward=True)  # trigger moment update
+    inv_x, ldj = layer(random_input, stage="inference", forward=False)
+
+    assert inv_x.shape == random_input.shape
+    assert ldj.shape == random_input.shape[:-1]
+
+
+def test_consistency_forward_inverse():
+    random_input = keras.random.normal((4, 20, 5))
+    layer = Standardization(momentum=0.0)
+    layer.build((5,))
+    standardized = layer(random_input, stage="training", forward=True)
+    recovered, _ = layer(standardized, stage="inference", forward=False)
+
+    random_input = keras.ops.convert_to_numpy(random_input)
+    recovered = keras.ops.convert_to_numpy(recovered)
+
+    np.testing.assert_allclose(random_input, recovered, atol=1e-4)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .standardization import Standardization`