Estimate method in BasicWorkflow and docs for PointApproximator

han-ol · han-ol · commit 4251bbda1884 · 2025-03-24T17:56:16.000+01:00
diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
@@ -23,17 +23,17 @@ def estimate(
         conditions: dict[str, np.ndarray],
         split: bool = False,
         **kwargs,
-    ) -> dict[str, dict[str, np.ndarray]]:
+    ) -> dict[str, dict[str, np.ndarray | dict[str, np.ndarray]]]:
         """
-        Provides point estimates based on provided conditions (e.g., observables).
+        Estimates point summaries of inference variables based on specified conditions.
 
         This method processes input conditions, computes estimates, applies necessary adapter transformations,
         and optionally splits the resulting arrays along the last axis.
 
         Parameters
         ----------
         conditions : dict[str, np.ndarray]
-            A dictionary mapping variable names to NumPy arrays representing the conditions
+            A dictionary mapping variable names to arrays representing the conditions
             for the estimation process.
         split : bool, optional
             If True, the estimated arrays are split along the last axis, by default False.
@@ -42,9 +42,15 @@ def estimate(
 
         Returns
         -------
-        dict[str, dict[str, np.ndarray]]
-            A nested dictionary where the top-level keys correspond to original variable names,
-            and values contain dictionaries mapping estimation results to NumPy arrays.
+        estimates : dict[str, dict[str, np.ndarray or dict[str, np.ndarray]]]
+            The estimates of inference variables in a nested dictionary.
+
+            1. Each first-level key is the name of an inference variable.
+            2. Each second-level key is the name of a scoring rule.
+            3. (If the scoring rule comprises multiple estimators, each third-level key is the name of an estimator.)
+
+            Each estimator output (i.e., dictionary value that is not itself a dictionary) is an array
+            of shape (num_datasets, point_estimate_size, variable_block_size).
         """
 
         conditions = self._prepare_conditions(conditions, **kwargs)
@@ -67,39 +73,43 @@ def sample(
         conditions: dict[str, np.ndarray],
         split: bool = False,
         **kwargs,
-    ) -> dict[str, np.ndarray]:
+    ) -> dict[str, dict[str, np.ndarray]]:
         """
-        Generate samples from point estimates based on provided conditions. These samples
-        will generally not correspond to samples from the fully Bayesian posterior, since
-        they will assume some parametric form (e.g., Gaussian in the case of mean score).
+        Draws samples from a parametric distribution based on point estimates for given input conditions.
 
-        This method draws a specified number of samples according to the given conditions,
-        applies necessary transformations, and optionally splits the resulting arrays along the last axis.
+        These samples will generally not correspond to samples from the fully Bayesian posterior, since
+        they will assume some parametric form (e.g., multivariate normal when using the MultivariateNormalScore).
 
         Parameters
         ----------
         num_samples : int
             The number of samples to generate.
         conditions : dict[str, np.ndarray]
-            A dictionary mapping variable names to NumPy arrays representing the conditions
+            A dictionary mapping variable names to arrays representing the conditions
             for the sampling process.
         split : bool, optional
             If True, the sampled arrays are split along the last axis, by default False.
+            Currently not supported for `PointApproximator`.
         **kwargs
             Additional keyword arguments passed to underlying processing functions.
 
         Returns
         -------
-        dict[str, np.ndarray]
-            A dictionary where keys correspond to variable names and values are NumPy arrays
-            containing the generated samples.
-        """
+        samples : dict[str, np.ndarray or dict[str, np.ndarray]]
+            Samples for all inference variables and all parametric scoring rules in a nested dictionary.
+
+            1. Each first-level key is the name of an inference variable.
+            2. (If there are multiple parametric scores, each second-level key is the name of such a score.)
 
+            Each output (i.e., dictionary value that is not itself a dictionary) is an array
+            of shape (num_datasets, num_samples, variable_block_size).
+        """
         conditions = self._prepare_conditions(conditions, **kwargs)
         samples = self._sample(num_samples, **conditions, **kwargs)
         samples = self._apply_inverse_adapter_to_samples(samples, **kwargs)
         # Optionally split the arrays along the last axis.
         if split:
+            raise NotImplementedError("split=True is currently not supported for `PointApproximator`.")
             samples = split_arrays(samples, axis=-1)
         # Squeeze samples if there's only one key-value pair.
         samples = self._squeeze_samples(samples)
diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
@@ -128,7 +128,7 @@ def call(
         conditions: Tensor = None,
         training: bool = False,
         **kwargs,
-    ) -> dict[str, Tensor]:
+    ) -> dict[str, dict[str, Tensor]]:
         if xz is None and not self.built:
             raise ValueError("Cannot build inference network without inference variables.")
         if conditions is None:  # unconditional estimation uses a fixed input vector
diff --git a/bayesflow/workflows/basic_workflow.py b/bayesflow/workflows/basic_workflow.py
@@ -290,6 +290,36 @@ def sample(
         """
         return self.approximator.sample(num_samples=num_samples, conditions=conditions, **kwargs)
 
+    def estimate(
+        self,
+        *,
+        conditions: dict[str, np.ndarray],
+        **kwargs,
+    ) -> dict[str, dict[str, np.ndarray | dict[str, np.ndarray]]]:
+        """
+        Estimates point summaries of inference variables based on specified conditions.
+
+        Parameters
+        ----------
+        conditions : dict[str, np.ndarray]
+            A dictionary mapping variable names to arrays representing the conditions for the estimation process.
+        **kwargs
+            Additional keyword arguments passed to underlying processing functions.
+
+        Returns
+        -------
+        estimates : dict[str, dict[str, np.ndarray or dict[str, np.ndarray]]]
+            The estimates of inference variables in a nested dictionary.
+
+            1. Each first-level key is the name of an inference variable.
+            2. Each second-level key is the name of a scoring rule.
+            3. (If the scoring rule comprises multiple estimators, each third-level key is the name of an estimator.)
+
+            Each estimator output (i.e., dictionary value that is not itself a dictionary) is an array
+            of shape (num_datasets, point_estimate_size, variable_block_size).
+        """
+        return self.approximator.estimate(conditions=conditions, **kwargs)
+
     def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
         """
         Compute the log probability of given variables under the approximator.