Remove aggregate and fix sample weight

stefanradev93 · stefanradev93 · commit 22c75d132dd5 · 2025-04-07T20:25:51.000-04:00
diff --git a/bayesflow/networks/consistency_models/consistency_model.py b/bayesflow/networks/consistency_models/consistency_model.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from bayesflow.types import Tensor
-from bayesflow.utils import find_network, keras_kwargs, serialize_value_or_type, deserialize_value_or_type
+from bayesflow.utils import find_network, keras_kwargs, serialize_value_or_type, deserialize_value_or_type, weighted_sum
 
 
 from ..inference_network import InferenceNetwork
@@ -285,7 +285,9 @@ def consistency_function(self, x: Tensor, t: Tensor, conditions: Tensor = None,
         out = skip * x + out * f
         return out
 
-    def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "training") -> dict[str, Tensor]:
+    def compute_metrics(
+        self, x: Tensor, conditions: Tensor = None, sample_weight: Tensor = None, stage: str = "training"
+    ) -> dict[str, Tensor]:
         base_metrics = super().compute_metrics(x, conditions=conditions, stage=stage)
 
         # The discretization schedule requires the number of passed training steps.
@@ -328,6 +330,7 @@ def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "tr
         lam = 1 / (t2 - t1)
 
         # Pseudo-huber loss, see [2], Section 3.3
-        loss = ops.mean(lam * (ops.sqrt(ops.square(teacher_out - student_out) + self.c_huber2) - self.c_huber))
+        loss = lam * (ops.sqrt(ops.square(teacher_out - student_out) + self.c_huber2) - self.c_huber)
+        loss = weighted_sum(loss, sample_weight)
 
         return base_metrics | {"loss": loss}
diff --git a/bayesflow/networks/coupling_flow/coupling_flow.py b/bayesflow/networks/coupling_flow/coupling_flow.py
@@ -2,7 +2,13 @@
 from keras.saving import register_keras_serializable as serializable
 
 from bayesflow.types import Tensor
-from bayesflow.utils import find_permutation, keras_kwargs, serialize_value_or_type, deserialize_value_or_type
+from bayesflow.utils import (
+    find_permutation,
+    keras_kwargs,
+    serialize_value_or_type,
+    deserialize_value_or_type,
+    weighted_sum,
+)
 
 from .actnorm import ActNorm
 from .couplings import DualCoupling
@@ -158,11 +164,9 @@ def _inverse(
     def compute_metrics(
         self, x: Tensor, conditions: Tensor = None, sample_weight: Tensor = None, stage: str = "training"
     ) -> dict[str, Tensor]:
-        if sample_weight is not None:
-            print(sample_weight)
-        base_metrics = super().compute_metrics(x, conditions=conditions, sample_weight=sample_weight, stage=stage)
+        base_metrics = super().compute_metrics(x, conditions=conditions, stage=stage)
 
         z, log_density = self(x, conditions=conditions, inverse=False, density=True)
-        loss = self.aggregate(-log_density, sample_weight)
+        loss = weighted_sum(-log_density, sample_weight)
 
         return base_metrics | {"loss": loss}
diff --git a/bayesflow/networks/flow_matching/flow_matching.py b/bayesflow/networks/flow_matching/flow_matching.py
@@ -13,6 +13,7 @@
     optimal_transport,
     serialize_value_or_type,
     deserialize_value_or_type,
+    weighted_sum,
 )
 from ..inference_network import InferenceNetwork
 
@@ -254,11 +255,11 @@ def compute_metrics(
             x = t * x1 + (1 - t) * x0
             target_velocity = x1 - x0
 
-        base_metrics = super().compute_metrics(x1, conditions, sample_weight, stage)
+        base_metrics = super().compute_metrics(x1, conditions=conditions, stage=stage)
 
         predicted_velocity = self.velocity(x, time=t, conditions=conditions, training=stage == "training")
 
         loss = self.loss_fn(target_velocity, predicted_velocity)
-        loss = self.aggregate(loss, sample_weight)
+        loss = weighted_sum(loss, sample_weight)
 
         return base_metrics | {"loss": loss}
diff --git a/bayesflow/networks/inference_network.py b/bayesflow/networks/inference_network.py
@@ -48,9 +48,7 @@ def log_prob(self, samples: Tensor, conditions: Tensor = None, **kwargs) -> Tens
         _, log_density = self(samples, conditions=conditions, inverse=False, density=True, **kwargs)
         return log_density
 
-    def compute_metrics(
-        self, x: Tensor, conditions: Tensor = None, sample_weight: Tensor = None, stage: str = "training"
-    ) -> dict[str, Tensor]:
+    def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "training") -> dict[str, Tensor]:
         if not self.built:
             xz_shape = keras.ops.shape(x)
             conditions_shape = None if conditions is None else keras.ops.shape(conditions)
@@ -66,10 +64,3 @@ def compute_metrics(
                 metrics[metric.name] = metric(samples, x)
 
         return metrics
-
-    def aggregate(self, losses: Tensor, weights: Tensor = None):
-        if weights is not None:
-            weighted = losses * weights
-        else:
-            weighted = losses
-        return keras.ops.mean(weighted)
diff --git a/bayesflow/utils/__init__.py b/bayesflow/utils/__init__.py
@@ -71,6 +71,7 @@
     tree_concatenate,
     tree_stack,
     fill_triangular_matrix,
+    weighted_sum,
 )
 from .validators import check_lengths_same
 from .workflow_utils import find_inference_network, find_summary_network
diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py
@@ -140,6 +140,26 @@ def pad(x: Tensor, value: float | Tensor, n: int, axis: int, side: str = "both")
             raise TypeError(f"Invalid side type {type(side)!r}. Must be str.")
 
 
+def weighted_sum(elements: Tensor, weights: Tensor = None) -> Tensor:
+    """
+    Compute the (optionally) weighted mean of the input tensor.
+
+    Parameters
+    ----------
+    elements : Tensor
+        A tensor containing the elements to average.
+    weights : Tensor, optional
+        A tensor of the same shape as `elements` representing weights.
+        If None, the mean is computed without weights.
+
+    Returns
+    -------
+    Tensor
+        A scalar tensor representing the (weighted) mean.
+    """
+    return keras.ops.mean(elements * weights if weights is not None else elements)
+
+
 def searchsorted(sorted_sequence: Tensor, values: Tensor, side: str = "left") -> Tensor:
     """
     Find indices where elements should be inserted to maintain order.

Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@`
`71`	`71`	`tree_concatenate,`
`72`	`72`	`tree_stack,`
`73`	`73`	`fill_triangular_matrix,`
	`74`	`+ weighted_sum,`
`74`	`75`	`)`
`75`	`76`	`from .validators import check_lengths_same`
`76`	`77`	`from .workflow_utils import find_inference_network, find_summary_network`