Add docs to backend approximator interfaces

stefanradev93 · stefanradev93 · commit 9767f6a10a82 · 2025-06-01T09:25:38.000-04:00
diff --git a/bayesflow/approximators/backend_approximators/jax_approximator.py b/bayesflow/approximators/backend_approximators/jax_approximator.py
@@ -5,9 +5,40 @@
 
 
 class JAXApproximator(keras.Model):
+    """
+    Base class for approximators using JAX and Keras' stateless training interface.
+
+    This class enables stateless training and evaluation steps with JAX, supporting
+    JAX-compatible gradient computation and variable updates through the `StatelessScope`.
+
+    Notes
+    -----
+    Subclasses must implement:
+        - compute_metrics(self, *args, **kwargs) -> dict[str, jax.Array]
+        - _batch_size_from_data(self, data: dict[str, any]) -> int
+    """
+
     # noinspection PyMethodOverriding
     def compute_metrics(self, *args, **kwargs) -> dict[str, jax.Array]:
-        # implemented by each respective architecture
+        """
+        Compute and return a dictionary of metrics for the current batch.
+
+        This method is expected to be implemented by each subclass to compute
+        task-specific metrics using JAX arrays. It is compatible with stateless
+        execution and must be differentiable under JAX's `grad` system.
+
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments passed to the metric computation function.
+        **kwargs : dict
+            Keyword arguments passed to the metric computation function.
+
+        Returns
+        -------
+        dict of str to jax.Array
+            Dictionary containing named metric values as JAX arrays.
+        """
         raise NotImplementedError
 
     def stateless_compute_metrics(
@@ -19,17 +50,34 @@ def stateless_compute_metrics(
         stage: str = "training",
     ) -> (jax.Array, tuple):
         """
-        Things we do for jax:
-        1. Accept trainable variables as the first argument
-            (can be at any position as indicated by the argnum parameter
-             in autograd, but needs to be an explicit arg)
-        2. Accept, potentially modify, and return other state variables
-        3. Return just the loss tensor as the first value
-        4. Return all other values in a tuple as the second value
-
-        This ensures:
-        1. The function is stateless
-        2. The function can be differentiated with jax autograd
+        Stateless computation of metrics required for JAX autograd.
+
+        This method performs a stateless forward pass using the given model
+        variables and returns both the loss and auxiliary information for
+        further updates.
+
+        Parameters
+        ----------
+        trainable_variables : Any
+            Current values of the trainable weights.
+        non_trainable_variables : Any
+            Current values of non-trainable variables (e.g., batch norm statistics).
+        metrics_variables : Any
+            Current values of metric tracking variables.
+        data : dict of str to any
+            Input data dictionary passed to `compute_metrics`.
+        stage : str, default="training"
+            Whether the computation is for "training" or "validation".
+
+        Returns
+        -------
+        loss : jax.Array
+            Scalar loss tensor for gradient computation.
+        aux : tuple
+            Tuple containing:
+                - metrics (dict of str to jax.Array)
+                - updated non-trainable variables
+                - updated metrics variables
         """
         state_mapping = []
         state_mapping.extend(zip(self.trainable_variables, trainable_variables))
@@ -48,6 +96,23 @@ def stateless_compute_metrics(
         return metrics["loss"], (metrics, non_trainable_variables, metrics_variables)
 
     def stateless_test_step(self, state: tuple, data: dict[str, any]) -> (dict[str, jax.Array], tuple):
+        """
+        Stateless validation step compatible with JAX.
+
+        Parameters
+        ----------
+        state : tuple
+            Tuple of (trainable_variables, non_trainable_variables, metrics_variables).
+        data : dict of str to any
+            Input data for validation.
+
+        Returns
+        -------
+        metrics : dict of str to jax.Array
+            Dictionary of computed evaluation metrics.
+        state : tuple
+            Updated state tuple after evaluation.
+        """
         trainable_variables, non_trainable_variables, metrics_variables = state
 
         loss, aux = self.stateless_compute_metrics(
@@ -61,6 +126,25 @@ def stateless_test_step(self, state: tuple, data: dict[str, any]) -> (dict[str,
         return metrics, state
 
     def stateless_train_step(self, state: tuple, data: dict[str, any]) -> (dict[str, jax.Array], tuple):
+        """
+        Stateless training step compatible with JAX autograd and stateless optimization.
+
+        Computes gradients and applies optimizer updates in a purely functional style.
+
+        Parameters
+        ----------
+        state : tuple
+            Tuple of (trainable_variables, non_trainable_variables, optimizer_variables, metrics_variables).
+        data : dict of str to any
+            Input data for training.
+
+        Returns
+        -------
+        metrics : dict of str to jax.Array
+            Dictionary of computed training metrics.
+        state : tuple
+            Updated state tuple after training.
+        """
         trainable_variables, non_trainable_variables, optimizer_variables, metrics_variables = state
 
         grad_fn = jax.value_and_grad(self.stateless_compute_metrics, has_aux=True)
@@ -80,17 +164,61 @@ def stateless_train_step(self, state: tuple, data: dict[str, any]) -> (dict[str,
         return metrics, state
 
     def test_step(self, *args, **kwargs):
+        """
+        Alias to `stateless_test_step` for compatibility with `keras.Model`.
+
+        Parameters
+        ----------
+        *args, **kwargs : Any
+            Passed through to `stateless_test_step`.
+
+        Returns
+        -------
+        See `stateless_test_step`.
+        """
         return self.stateless_test_step(*args, **kwargs)
 
     def train_step(self, *args, **kwargs):
+        """
+        Alias to `stateless_train_step` for compatibility with `keras.Model`.
+
+        Parameters
+        ----------
+        *args, **kwargs : Any
+            Passed through to `stateless_train_step`.
+
+        Returns
+        -------
+        See `stateless_train_step`.
+        """
         return self.stateless_train_step(*args, **kwargs)
 
     def _update_metrics(self, loss: jax.Array, metrics_variables: any, sample_weight: any = None) -> any:
-        # update the loss progress bar, and possibly metrics variables along with it
+        """
+        Updates metric tracking variables in a stateless JAX-compatible way.
+
+        This method updates the loss tracker (and any other Keras metrics)
+        and returns updated metric variable states for downstream use.
+
+        Parameters
+        ----------
+        loss : jax.Array
+            Scalar loss used for metric tracking.
+        metrics_variables : Any
+            Current metric variable states.
+        sample_weight : Any, optional
+            Sample weights to apply during update.
+
+        Returns
+        -------
+        metrics_variables : Any
+            Updated metrics variable states.
+        """
         state_mapping = list(zip(self.metrics_variables, metrics_variables))
         with keras.StatelessScope(state_mapping) as scope:
             self._loss_tracker.update_state(loss, sample_weight=sample_weight)
 
+        # JAX is stateless, so we need to return the metrics as state in downstream functions
         metrics_variables = [scope.get_current_value(v) for v in self.metrics_variables]
 
         return metrics_variables
diff --git a/bayesflow/approximators/backend_approximators/tensorflow_approximator.py b/bayesflow/approximators/backend_approximators/tensorflow_approximator.py
@@ -5,18 +5,83 @@
 
 
 class TensorFlowApproximator(keras.Model):
+    """
+    Base class for approximators using TensorFlow and Keras training logic.
+
+    This class supports training and evaluation loops using TensorFlow backends.
+    Subclasses are responsible for implementing the `compute_metrics` method and
+    `_batch_size_from_data`, which extracts batch size information from data inputs.
+
+    Notes
+    -----
+    Subclasses must implement:
+        - compute_metrics(self, *args, **kwargs) -> dict[str, tf.Tensor]
+        - _batch_size_from_data(self, data: dict[str, any]) -> int
+    """
+
     # noinspection PyMethodOverriding
     def compute_metrics(self, *args, **kwargs) -> dict[str, tf.Tensor]:
-        # implemented by each respective architecture
+        """
+        Compute and return a dictionary of metrics for the current batch.
+
+        This method is expected to be implemented by each subclass to compute task-specific
+        metrics (e.g., loss, accuracy). The arguments are dynamically filtered based on the
+        architecture's metric signature.
+
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments passed to the metric computation function.
+        **kwargs : dict
+            Keyword arguments passed to the metric computation function.
+
+        Returns
+        -------
+        dict of str to tf.Tensor
+            Dictionary containing named metric values as TensorFlow tensors.
+        """
         raise NotImplementedError
 
     def test_step(self, data: dict[str, any]) -> dict[str, tf.Tensor]:
+        """
+        Performs a single validation step.
+
+        Filters relevant keyword arguments for metric computation and updates internal
+        metric trackers using the validation data.
+
+        Parameters
+        ----------
+        data : dict of str to any
+            Input dictionary containing model inputs and possibly additional information
+            such as sample_weight or mask.
+
+        Returns
+        -------
+        dict of str to tf.Tensor
+            Dictionary of computed validation metrics.
+        """
         kwargs = filter_kwargs(data | {"stage": "validation"}, self.compute_metrics)
         metrics = self.compute_metrics(**kwargs)
         self._update_metrics(metrics, self._batch_size_from_data(data))
         return metrics
 
     def train_step(self, data: dict[str, any]) -> dict[str, tf.Tensor]:
+        """
+        Performs a single training step with gradient update.
+
+        Computes gradients of the loss with respect to the trainable variables, applies
+        the update, and updates internal metric trackers.
+
+        Parameters
+        ----------
+        data : dict of str to any
+            Input dictionary containing model inputs and training targets.
+
+        Returns
+        -------
+        dict of str to tf.Tensor
+            Dictionary of computed training metrics.
+        """
         with tf.GradientTape() as tape:
             kwargs = filter_kwargs(data | {"stage": "training"}, self.compute_metrics)
             metrics = self.compute_metrics(**kwargs)
@@ -29,7 +94,19 @@ def train_step(self, data: dict[str, any]) -> dict[str, tf.Tensor]:
         self._update_metrics(metrics, self._batch_size_from_data(data))
         return metrics
 
-    def _update_metrics(self, metrics, sample_weight=None):
+    def _update_metrics(self, metrics: dict[str, any], sample_weight: tf.Tensor = None):
+        """
+        Updates internal Keras metric objects with the given values.
+
+        If a new metric name is encountered, it is added as a new `keras.metrics.Mean` instance.
+
+        Parameters
+        ----------
+        metrics : dict of str to any
+            Dictionary of computed metric values to update.
+        sample_weight : tf.Tensor, optional
+            Sample weights to apply during metric update.
+        """
         for name, value in metrics.items():
             try:
                 metric_index = self.metrics_names.index(name)
diff --git a/bayesflow/approximators/backend_approximators/torch_approximator.py b/bayesflow/approximators/backend_approximators/torch_approximator.py