automatic head building for multiple scoring rules

han-ol · han-ol · commit 0cd3110a3815 · 2025-01-08T14:59:46.000+01:00
diff --git a/bayesflow/__init__.py b/bayesflow/__init__.py
@@ -12,7 +12,7 @@
 )
 
 from .workflows import BasicWorkflow
-from .approximators import ContinuousApproximator, ContinuousPointApproximator
+from .approximators import ContinuousApproximator, PointApproximator
 from .adapters import Adapter
 from .datasets import OfflineDataset, OnlineDataset, DiskDataset
 from .simulators import make_simulator
diff --git a/bayesflow/approximators/__init__.py b/bayesflow/approximators/__init__.py
@@ -1,4 +1,4 @@
 from .approximator import Approximator
 from .continuous_approximator import ContinuousApproximator
-from .continuous_point_approximator import ContinuousPointApproximator
+from .point_approximator import PointApproximator
 from .model_comparison_approximator import ModelComparisonApproximator
diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
@@ -16,7 +16,7 @@
 
 
 @serializable(package="bayesflow.approximators")
-class ContinuousPointApproximator(Approximator):
+class PointApproximator(Approximator):
     """
     Defines a workflow for performing fast posterior or likelihood inference.
     The distribution is approximated by a point with an feed-forward network and an optional summary network.
@@ -142,7 +142,12 @@ def estimate(
         conditions = keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)
         conditions = {"inference_variables": self._estimate(**conditions)}
         conditions = keras.tree.map_structure(keras.ops.convert_to_numpy, conditions)
-        conditions = self.adapter(conditions, inverse=True, strict=False, **kwargs)
+        conditions["inference_variables"] = {
+            key: self.adapter(
+                dict(inference_variables=conditions["inference_variables"][key]), inverse=True, strict=False, **kwargs
+            )
+            for key in conditions["inference_variables"].keys()
+        }
 
         if split:
             conditions = split_arrays(conditions, axis=-1)
diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
@@ -1,14 +1,89 @@
 import keras
 
+from math import prod
+
+from collections.abc import Callable
+
+from bayesflow.utils import keras_kwargs, find_network
 from bayesflow.types import Shape, Tensor
+from bayesflow.scoring_rules import ScoringRule
+
+# TODO:
+# * [ ] weight initialization
+# * [ ] serializable ?
+# * [ ] testing
+# * [ ] docstrings
 
 
 class PointInferenceNetwork(keras.Layer):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(
+        self,
+        scoring_rules: dict[str, ScoringRule],
+        body_subnet: str | type = "mlp",  # naming: shared_subnet / body / subnet ?
+        heads_subnet: dict[str, str | keras.Layer] = None,  # TODO: `type` instead of `keras.Layer` ? Too specific ?
+        activations: dict[str, keras.layers.Activation | Callable | str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            **keras_kwargs(kwargs)
+        )  # TODO: need for bf.utils.keras_kwargs in regular InferenceNetwork class? seems to be a bug
+
+        self.scoring_rules = scoring_rules
+        # For now PointInferenceNetwork uses the same scoring rules for all parameters
+        # To support using different sets of scoring rules for different parameter (blocks),
+        # we can look into renaming this class to sth like `HeadCollection` and
+        # handle the split in a higher-level object. (PointApproximator?)
+
+        self.body_subnet = find_network(body_subnet, **kwargs.get("body_subnet_kwargs", {}))
+
+        if heads_subnet:
+            self.heads = {
+                key: [find_network(value, **kwargs.get("heads_subnet_kwargs", {}).get(key, {}))]
+                for key, value in heads_subnet.items()
+            }
+        else:
+            self.heads = {key: [] for key in self.scoring_rules.keys()}
+
+        if activations:
+            self.activations = {
+                key: (value if isinstance(value, keras.layers.Activation) else keras.layers.Activation(value))
+                for key, value in activations.items()
+            }  # make sure that each value is an Activation object
+        else:
+            self.activations = {key: keras.layers.Activation("linear") for key in self.scoring_rules.keys()}
+            # TODO: Stefan suggested to call these link functions, decide on this
+
+        for key in self.heads.keys():
+            self.heads[key] += [
+                keras.layers.Dense(units=None),
+                keras.layers.Reshape(target_shape=(None,)),
+                self.activations[key],
+            ]
+
+        # TODO: allow key-wise overriding of the default, instead of just complete default or totally custom choices
+
+        assert set(self.scoring_rules.keys()) == set(self.heads.keys()) == set(self.activations.keys())
 
     def build(self, xz_shape: Shape, conditions_shape: Shape = None) -> None:
-        pass
+        # build the shared body network
+        input_shape = conditions_shape
+        self.body_subnet.build(input_shape)
+        body_output_shape = self.body_subnet.compute_output_shape(input_shape)
+
+        for key in self.heads.keys():
+            # head_output_shape (excluding batch_size) convention is (*prediction_shape, *parameter_block_shape)
+            prediction_shape = self.scoring_rules[key].prediction_shape
+            head_output_shape = prediction_shape + xz_shape[1:]
+
+            # set correct head shape
+            self.heads[key][-3].units = prod(head_output_shape)
+            self.heads[key][-2].target_shape = head_output_shape
+
+            # build head block by block
+            input_shape = body_output_shape
+            for head_block in self.heads[key]:
+                head_block.build(input_shape)
+                input_shape = head_block.compute_output_shape(input_shape)
 
     def call(
         self,
@@ -17,19 +92,37 @@ def call(
         training: bool = False,
         **kwargs,
     ) -> Tensor | tuple[Tensor, Tensor]:
+        # TODO: remove unnecessary simularity with InferenceNetwork
         return self._forward(xz, conditions=conditions, training=training, **kwargs)
 
     def _forward(
         self, x: Tensor, conditions: Tensor = None, training: bool = False, **kwargs
     ) -> Tensor | tuple[Tensor, Tensor]:
-        raise NotImplementedError
+        body_output = self.body_subnet(conditions)
+
+        output = dict()
+        for key, head in self.heads.items():
+            y = body_output
+            for head_block in head:
+                y = head_block(y)
+
+            output |= {key: y}
+        return output
 
     def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "training") -> dict[str, Tensor]:
         if not self.built:
             xz_shape = keras.ops.shape(x)
             conditions_shape = None if conditions is None else keras.ops.shape(conditions)
             self.build(xz_shape, conditions_shape=conditions_shape)
 
+        output = self(x, conditions)
+
+        # calculate negative score as mean over all heads
+        neg_score = 0
+        for key, rule in self.scoring_rules.items():
+            neg_score += rule.score(output[key], x)
+        neg_score /= len(self.scoring_rules)
+
         metrics = {}
 
         if stage != "training" and any(self.metrics):
@@ -41,7 +134,7 @@ def compute_metrics(self, x: Tensor, conditions: Tensor = None, stage: str = "tr
             pass
             # TODO: instead compute estimate based metrics
 
-        return metrics
+        return metrics | {"loss": neg_score}
 
     def estimate(self, conditions: Tensor = None) -> Tensor:
         return self._forward(None, conditions)
diff --git a/bayesflow/scoring_rules/__init__.py b/bayesflow/scoring_rules/__init__.py
@@ -0,0 +1 @@
+from .scoring_rules import ScoringRule, NormedDifferenceLoss, QuantileLoss
diff --git a/bayesflow/scoring_rules/scoring_rules.py b/bayesflow/scoring_rules/scoring_rules.py
@@ -0,0 +1,77 @@
+from collections.abc import Callable, Sequence
+
+from bayesflow.types import Tensor
+
+import keras
+
+
+class ScoringRule:
+    def __init__(
+        self,
+        name: str = None,
+    ):
+        self.name = name  # TODO: names for scoring rules may be unnecessary ?
+
+    def score(self, target, reference):
+        raise NotImplementedError
+
+
+class NormedDifferenceLoss(ScoringRule):
+    def __init__(
+        self,
+        k: int = 2,  # results in an estimator for the mean
+        name: str = "normed_difference",
+    ):
+        super().__init__(name)
+
+        self.k = k
+        self.target_shape = (1,)
+
+    def score(self, target: Tensor, reference: Tensor) -> Tensor:
+        pointwise_differance = target - reference[:, None, :]
+        score = keras.ops.absolute(pointwise_differance) ** self.k
+        score = keras.ops.mean(score)
+        return score
+
+
+class WeightedNormedDifferenceLoss(ScoringRule):
+    def __init__(
+        self,
+        weighting_function: Callable,
+        k: int = 2,
+        name: str = "weighted_normed_difference",
+    ):
+        super().__init__(name)
+
+        if weighting_function:
+            self.weighting_function = weighting_function
+        else:
+            self.weighting_function = lambda input: 1
+        self.k = k
+        self.target_shape = (1,)
+
+    def score(self, target: Tensor, reference: Tensor) -> Tensor:
+        pointwise_differance = target - reference[:, None, :]
+        score = self.weighting_function(reference) * keras.ops.absolute(pointwise_differance) ** self.k
+        score = keras.ops.mean(score)
+        return score
+
+
+class QuantileLoss(ScoringRule):
+    def __init__(
+        self,
+        quantile_levels: Sequence[float] = [0.1, 0.5, 0.9],
+        name: str = "quantile",
+    ):
+        super().__init__(name)
+        self.quantile_levels = keras.ops.convert_to_tensor(quantile_levels)
+        self.target_shape = (len(self.quantile_levels),)
+
+    def score(self, target: Tensor, reference: Tensor) -> Tensor:
+        pointwise_differance = target - reference[:, None, :]
+
+        score = pointwise_differance * (
+            keras.ops.cast(pointwise_differance > 0, float) - self.quantile_levels[None, :, None]
+        )
+        score = keras.ops.mean(score)
+        return score

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`)`
`13`	`13`
`14`	`14`	`from .workflows import BasicWorkflow`
`15`		`-from .approximators import ContinuousApproximator, ContinuousPointApproximator`
	`15`	`+from .approximators import ContinuousApproximator, PointApproximator`
`16`	`16`	`from .adapters import Adapter`
`17`	`17`	`from .datasets import OfflineDataset, OnlineDataset, DiskDataset`
`18`	`18`	`from .simulators import make_simulator`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .scoring_rules import ScoringRule, NormedDifferenceLoss, QuantileLoss`