Document how step methods provide progress bar stats

jessegrabowski · jessegrabowski · commit a8af2e8ddcfd · 2025-03-20T16:11:36.000+08:00
diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
@@ -21,14 +21,15 @@
 import warnings
 
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Callable, Iterable, Mapping, Sequence
 from dataclasses import field
 from enum import IntEnum, unique
 from typing import Any
 
 import numpy as np
 
 from pytensor.graph.basic import Variable
+from rich.progress import ProgressColumn
 
 from pymc.blocking import PointType, StatDtype, StatsDict, StatShape, StatsType
 from pymc.model import modelcontext
@@ -181,17 +182,72 @@ def __new__(cls, *args, **kwargs):
             step.__newargs = (vars, *args), kwargs
             return step
 
-    @staticmethod
-    def _progressbar_config(n_chains=1):
+    def _progressbar_config(self, n_chains: int = 1):
+        """
+        Get progressbar configuration for this step sampler.
+
+        By default, the progress bar displays no stats columns, only basic info (number of draws and sampling time).
+        Specific step methods should overload this method to specify which stats to display and how.
+
+        Parameters
+        ----------
+        n_chains: int
+            Number of chains being sampled. This controls the number of progress bars that will be displayed.
+
+        Returns
+        -------
+        columns: list of rich.progress.ProgressColumn
+            List of columns to display in the progress bar.
+
+        stats: dict
+            Dictionary of statistics associated with each column.
+        """
         columns = []
         stats = {}
 
         return columns, stats
 
-    @staticmethod
-    def _make_update_stats_function():
-        def update_stats(stats, step_stats, chain_idx):
-            return stats
+    def _make_update_stats_function(self) -> Callable[[dict, dict, int], dict]:
+        """
+        Create an update function used by the progress bar to update statistics during sampling.
+
+        By default, the update is a no-op. Specific step methods should implement special logic for which
+        statistics to display and how.
+
+        Returns
+        -------
+        update_stats: Callable
+            Function that updates displayed statistics for the current chain, given statistics generated by the step
+            during the most recent step.
+        """
+
+        def update_stats(
+            displayed_stats: dict[str, np.ndarray],
+            step_stats: dict[str, str | float | int | bool | None],
+            chain_idx: int,
+        ) -> dict[str, np.ndarray]:
+            """
+            Update the statistics displayed in the progress bar after each step.
+
+            Parameters
+            ----------
+            displayed_stats: dict
+                Dictionary of statistics displayed in the progress bar. The keys are the names of the statistics and
+                the values are the current values of the statistics, with one value per chain being sampled.
+
+            step_stats: dict
+                Dictionary of statistics generated by the step sampler when taking the current step. The keys are the
+                names of the statistics and the values are the values of the statistics generated by the step sampler.
+
+            chain_idx: int
+                The chain number associated with the current step
+
+            Returns
+            -------
+            dict
+                The updated statistics dictionary to be displayed in the progress bar.
+            """
+            return displayed_stats
 
         return update_stats
 
@@ -311,7 +367,28 @@ def set_rng(self, rng: RandomGenerator):
         for method, _rng in zip(self.methods, _rngs):
             method.set_rng(_rng)
 
-    def _progressbar_config(self, n_chains=1):
+    def _progressbar_config(
+        self, n_chains: int = 1
+    ) -> tuple[list[ProgressColumn], dict[str, np.ndarray | float]]:
+        """
+        Get progressbar configuration for this step sampler.
+
+        The columns of the rich progress bar displayed during sampler are chosen by the step samplers themselves. In
+        the compound step case, we display the set union of all columns from the sub-step samplers.
+
+        Parameters
+        ----------
+        n_chains: int
+            Number of chains being sampled. This controls the number of progress bars that will be displayed.
+
+        Returns
+        -------
+        columns: list of rich.progress.ProgressColumn
+            List of columns to display in the progress bar.
+
+        stats: dict
+            Dictionary of statistics associated with each column.
+        """
         from functools import reduce
 
         column_lists, stat_dict_list = zip(
@@ -332,14 +409,56 @@ def _progressbar_config(self, n_chains=1):
 
         return columns, stats
 
-    def _make_update_stats_function(self):
+    def _make_update_stats_function(self) -> Callable[[dict, list[dict], int], dict]:
+        """
+        Create an update function used by the progress bar to update statistics during sampling.
+
+        Returns
+        -------
+        update_stats: Callable
+            Function that updates displayed statistics for the current chain, given statistics generated by the step
+            during the most recent step.
+        """
         update_fns = [method._make_update_stats_function() for method in self.methods]
 
-        def update_stats(stats, step_stats, chain_idx):
+        def update_stats(
+            displayed_stats: dict[str, np.ndarray],
+            step_stats: list[dict[str, str | float | int | bool | None]],
+            chain_idx: int,
+        ) -> dict[str, np.ndarray]:
+            """
+            Update the statistics displayed in the progress bar after each step.
+
+            Parameters
+            ----------
+            displayed_stats: dict
+                Dictionary of statistics displayed in the progress bar. The keys are the names of the statistics and
+                the values are the current values of the statistics, with one value per chain being sampled.
+
+            step_stats: list of dict
+                List of dictionaries containing statistics generated by **each** step sampler in the CompoundStep when
+                 taking the current step. For each dictionary, the keys are names of statistics and the values are
+                 the values of the statistics generated by the step sampler.
+
+            chain_idx: int
+                The chain number associated with the current step
+
+            Returns
+            -------
+            dict
+                The updated statistics dictionary to be displayed in the progress bar.
+            """
+            # TODO: The compound step is commonly made of many instances of the same step (e.g. 3 Metropolis steps).
+            #  In this case, the current loop logic is just overriding each Metropolis steps' stats with those of the
+            #  next step (so the user only ever sees the 3rd step's stats). We should have a better way to aggregate
+            #  the stats from each step.
+            if not isinstance(step_stats, list):
+                step_stats = [step_stats]
+
             for step_stat, update_fn in zip(step_stats, update_fns):
-                stats = update_fn(stats, step_stat, chain_idx)
+                displayed_stats = update_fn(displayed_stats, step_stat, chain_idx)
 
-            return stats
+            return displayed_stats
 
         return update_stats
 
diff --git a/pymc/step_methods/hmc/nuts.py b/pymc/step_methods/hmc/nuts.py
@@ -15,12 +15,13 @@
 from __future__ import annotations
 
 from collections import namedtuple
+from collections.abc import Callable
 from dataclasses import field
 
 import numpy as np
 
 from pytensor import config
-from rich.progress import TextColumn
+from rich.progress import ProgressColumn, TextColumn
 from rich.table import Column
 
 from pymc.stats.convergence import SamplerWarning
@@ -231,8 +232,25 @@ def competence(var, has_grad):
             return Competence.PREFERRED
         return Competence.INCOMPATIBLE
 
-    @staticmethod
-    def _progressbar_config(n_chains=1):
+    def _progressbar_config(
+        self, n_chains: int = 1
+    ) -> tuple[list[ProgressColumn], dict[str, np.ndarray | float]]:
+        """
+        Get progressbar configuration for this step sampler.
+
+        Parameters
+        ----------
+        n_chains: int
+            Number of chains being sampled. This controls the number of progress bars that will be displayed.
+
+        Returns
+        -------
+        columns: list of rich.progress.ProgressColumn
+            List of columns to display in the progress bar.
+
+        stats: dict
+            Dictionary of statistics associated with each column.
+        """
         columns = [
             TextColumn("{task.fields[divergences]}", table_column=Column("Divergences", ratio=1)),
             TextColumn("{task.fields[step_size]:0.2f}", table_column=Column("Step size", ratio=1)),
@@ -247,18 +265,52 @@ def _progressbar_config(n_chains=1):
 
         return columns, stats
 
-    @staticmethod
-    def _make_update_stats_function():
-        def update_stats(stats, step_stats, chain_idx):
+    def _make_update_stats_function(self) -> Callable[[dict, dict, int], dict]:
+        """
+        Create an update function used by the progress bar to update statistics during sampling.
+
+        Returns
+        -------
+        update_stats: Callable
+            Function that updates displayed statistics for the current chain, given statistics generated by the step
+            during the most recent step.
+        """
+
+        def update_stats(
+            displayed_stats: dict[str, np.ndarray],
+            step_stats: dict[str, str | float | int | bool | None],
+            chain_idx: int,
+        ) -> dict[str, np.ndarray]:
+            """
+            Update the statistics displayed in the progress bar after each step.
+
+            Parameters
+            ----------
+            displayed_stats: dict
+                Dictionary of statistics displayed in the progress bar. The keys are the names of the statistics and
+                the values are the current values of the statistics, with one value per chain being sampled.
+
+            step_stats: dict
+                Dictionary of statistics generated by the step sampler when taking the current step. The keys are the
+                names of the statistics and the values are the values of the statistics generated by the step sampler.
+
+            chain_idx: int
+                The chain number associated with the current step
+
+            Returns
+            -------
+            dict
+                The updated statistics dictionary to be displayed in the progress bar.
+            """
             if isinstance(step_stats, list):
                 step_stats = step_stats[0]
 
             if not step_stats["tune"]:
-                stats["divergences"][chain_idx] += step_stats["diverging"]
+                displayed_stats["divergences"][chain_idx] += step_stats["diverging"]
 
-            stats["step_size"][chain_idx] = step_stats["step_size"]
-            stats["tree_size"][chain_idx] = step_stats["tree_size"]
-            return stats
+            displayed_stats["step_size"][chain_idx] = step_stats["step_size"]
+            displayed_stats["tree_size"][chain_idx] = step_stats["tree_size"]
+            return displayed_stats
 
         return update_stats
 
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
@@ -24,7 +24,7 @@
 from pytensor import tensor as pt
 from pytensor.graph.fg import MissingInputError
 from pytensor.tensor.random.basic import BernoulliRV, CategoricalRV
-from rich.progress import TextColumn
+from rich.progress import ProgressColumn, TextColumn
 from rich.table import Column
 
 import pymc as pm
@@ -327,8 +327,25 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]:
     def competence(var, has_grad):
         return Competence.COMPATIBLE
 
-    @staticmethod
-    def _progressbar_config(n_chains=1):
+    def _progressbar_config(
+        self, n_chains: int = 1
+    ) -> tuple[list[ProgressColumn], dict[str, np.ndarray | float]]:
+        """
+        Get progressbar configuration for this step sampler.
+
+        Parameters
+        ----------
+        n_chains: int
+            Number of chains being sampled. This controls the number of progress bars that will be displayed.
+
+        Returns
+        -------
+        columns: list of rich.progress.ProgressColumn
+            List of columns to display in the progress bar.
+
+        stats: dict
+            Dictionary of statistics associated with each column.
+        """
         columns = [
             TextColumn("{task.fields[tune]}", table_column=Column("Tuning", ratio=1)),
             TextColumn("{task.fields[scaling]:0.2f}", table_column=Column("Scaling", ratio=1)),
@@ -345,17 +362,51 @@ def _progressbar_config(n_chains=1):
 
         return columns, stats
 
-    @staticmethod
-    def _make_update_stats_function():
-        def update_stats(stats, step_stats, chain_idx):
+    def _make_update_stats_function(self) -> Callable[[dict, dict, int], dict]:
+        """
+        Create an update function used by the progress bar to update statistics during sampling.
+
+        Returns
+        -------
+        update_stats: Callable
+            Function that updates displayed statistics for the current chain, given statistics generated by the step
+            during the most recent step.
+        """
+
+        def update_stats(
+            displayed_stats: dict[str, np.ndarray],
+            step_stats: dict[str, str | float | int | bool | None],
+            chain_idx: int,
+        ) -> dict[str, np.ndarray]:
+            """
+            Update the statistics displayed in the progress bar after each step.
+
+            Parameters
+            ----------
+            displayed_stats: dict
+                Dictionary of statistics displayed in the progress bar. The keys are the names of the statistics and
+                the values are the current values of the statistics, with one value per chain being sampled.
+
+            step_stats: dict
+                Dictionary of statistics generated by the step sampler when taking the current step. The keys are the
+                names of the statistics and the values are the values of the statistics generated by the step sampler.
+
+            chain_idx: int
+                The chain number associated with the current step
+
+            Returns
+            -------
+            dict
+                The updated statistics dictionary to be displayed in the progress bar.
+            """
             if isinstance(step_stats, list):
                 step_stats = step_stats[0]
 
-            stats["tune"][chain_idx] = step_stats["tune"]
-            stats["accept_rate"][chain_idx] = step_stats["accept"]
-            stats["scaling"][chain_idx] = step_stats["scaling"]
+            displayed_stats["tune"][chain_idx] = step_stats["tune"]
+            displayed_stats["accept_rate"][chain_idx] = step_stats["accept"]
+            displayed_stats["scaling"][chain_idx] = step_stats["scaling"]
 
-            return stats
+            return displayed_stats
 
         return update_stats
 
diff --git a/pymc/step_methods/slicer.py b/pymc/step_methods/slicer.py