breaking: fix bugs regarding count in standardization layer

vpratz · vpratz · commit 0b329983adf9 · 2025-06-30T13:39:05.000Z
Fixes #524 This fixes the two bugs described in c4cc133: - count was accidentally updated, leading to wrong values - count was calculated wrongly, as only the batch size was used. Correct is the product of all reduce dimensions. This lead to wrong standard deviations While the batch dimension is the same for all inputs, the size of the second dimension might vary. For this reason, we need to introduce an input-specific `count` variable. This breaks serialization.
diff --git a/bayesflow/networks/standardization/standardization.py b/bayesflow/networks/standardization/standardization.py
@@ -40,7 +40,7 @@ def moving_std(self, index: int) -> Tensor:
         """
         return keras.ops.where(
             self.moving_m2[index] > 0,
-            keras.ops.sqrt(self.moving_m2[index] / self.count),
+            keras.ops.sqrt(self.moving_m2[index] / self.count[index]),
             1.0,
         )
 
@@ -53,7 +53,7 @@ def build(self, input_shape: Shape):
         self.moving_m2 = [
             self.add_weight(shape=(shape[-1],), initializer="zeros", trainable=False) for shape in flattened_shapes
         ]
-        self.count = self.add_weight(shape=(), initializer="zeros", trainable=False)
+        self.count = [self.add_weight(shape=(), initializer="zeros", trainable=False) for _ in flattened_shapes]
 
     def call(
         self,
@@ -150,7 +150,7 @@ def _update_moments(self, x: Tensor, index: int):
         """
 
         reduce_axes = tuple(range(x.ndim - 1))
-        batch_count = keras.ops.cast(keras.ops.shape(x)[0], self.count.dtype)
+        batch_count = keras.ops.cast(keras.ops.prod(keras.ops.shape(x)[:-1]), self.count[index].dtype)
 
         # Compute batch mean and M2 per feature
         batch_mean = keras.ops.mean(x, axis=reduce_axes)
@@ -159,7 +159,7 @@ def _update_moments(self, x: Tensor, index: int):
         # Read current totals
         mean = self.moving_mean[index]
         m2 = self.moving_m2[index]
-        count = self.count
+        count = self.count[index]
 
         total_count = count + batch_count
         delta = batch_mean - mean
@@ -169,4 +169,4 @@ def _update_moments(self, x: Tensor, index: int):
 
         self.moving_mean[index].assign(new_mean)
         self.moving_m2[index].assign(new_m2)
-        self.count.assign(total_count)
+        self.count[index].assign(total_count)
diff --git a/tests/test_approximators/test_approximator_standardization/test_approximator_standardization.py b/tests/test_approximators/test_approximator_standardization/test_approximator_standardization.py
@@ -1,5 +1,6 @@
 import keras
 from tests.utils import assert_models_equal
+import numpy as np
 
 
 def test_save_and_load(tmp_path, approximator, train_dataset, validation_dataset):
@@ -8,7 +9,7 @@ def test_save_and_load(tmp_path, approximator, train_dataset, validation_dataset
     approximator.build(data_shapes)
     for layer in approximator.standardize_layers.values():
         assert layer.built
-        assert layer.count == 0
+        np.testing.assert_allclose([c.value.numpy() for c in layer.count], 0.0)
     approximator.compute_metrics(**train_dataset[0])
 
     keras.saving.save_model(approximator, tmp_path / "model.keras")