Refactor where the intial point function is created

jessegrabowski · jessegrabowski · commit 075b47e82241 · 2024-11-14T18:33:32.000+08:00
diff --git a/python/nutpie/compile_pymc.py b/python/nutpie/compile_pymc.py
@@ -2,14 +2,15 @@
 import itertools
 import warnings
 from dataclasses import dataclass
-from functools import partial
+from functools import wraps
 from importlib.util import find_spec
 from math import prod
-from typing import TYPE_CHECKING, Any, Callable, Literal, Optional
+from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union
 
 import numpy as np
 import pandas as pd
 from numpy.typing import NDArray
+from pymc.initial_point import make_initial_point_fn
 
 from nutpie import _lib
 from nutpie.compiled_pyfunc import SeedType, from_pyfunc
@@ -26,6 +27,59 @@ def intrinsic(f):
 if TYPE_CHECKING:
     import numba.core.ccallback
     import pymc as pm
+    from pytensor.tensor import TensorVariable, Variable
+
+
+def rv_dict_to_flat_array_wrapper(
+    fn: Callable[[SeedType], dict[str, np.ndarray]],
+    names: list[str],
+    shapes: list[tuple[int]],
+) -> Callable[[SeedType], np.ndarray]:
+    """
+    Wraps a function that returns a dictionary of string:array key:value pairs
+    and returns a single flat float64 array. Also checks that the shapes of
+    the arrays match the expected shapes.
+
+    Parameters
+    ----------
+    fn: Callable
+        Function that takes a seed and return a dictionary of variable names
+        to initial values. This function should be the output of
+        pymc.initial_point.make_initial_point_fn
+    names: list of str
+        List of random variable names in the model
+    shapes: list of tuple of int
+        Shape of random variables in the model
+
+    Returns
+    -------
+    seeded_array_fn: Callable
+        Function that takes a seed and returns a flat, contiguous float64
+        array of initial values. The ordering of the random variables inside
+        the array is controlled by the ``names`` parameter.
+    """
+
+    @wraps(fn)
+    def seeded_array_fn(seed: SeedType = None):
+        inital_value_dict = fn(seed)
+        total_size = sum(np.prod(shape) for shape in shapes)
+        flat_array = np.empty(total_size, dtype="float64", order="C")
+        cursor = 0
+
+        for name, shape in zip(names, shapes):
+            initial_value = inital_value_dict[name]
+            n = int(np.prod(initial_value.shape))
+            if initial_value.shape != shape:
+                raise ValueError(
+                    f"Size of initial value for {name} is {initial_value.shape}, "
+                    f"expected {shape}"
+                )
+            flat_array[cursor : cursor + n] = initial_value.ravel().astype("float64")
+            cursor += n
+
+        return flat_array
+
+    return seeded_array_fn
 
 
 @intrinsic
@@ -159,7 +213,11 @@ def make_user_data(shared_vars, shared_data):
     return user_data
 
 
-def _compile_pymc_model_numba(model: "pm.Model", **kwargs) -> CompiledPyMCModel:
+def _compile_pymc_model_numba(
+    model: "pm.Model",
+    initial_point_fn: Callable[[SeedType], dict[str, np.ndarray]],
+    **kwargs,
+) -> CompiledPyMCModel:
     if find_spec("numba") is None:
         raise ImportError(
             "Numba is not installed in the current environment. "
@@ -174,7 +232,6 @@ def _compile_pymc_model_numba(model: "pm.Model", **kwargs) -> CompiledPyMCModel:
         n_expanded,
         logp_fn_pt,
         expand_fn_pt,
-        initial_fn_pt,
         shape_info,
     ) = _make_functions(model, mode="NUMBA", compute_grad=True, join_expanded=True)
 
@@ -223,15 +280,17 @@ def _compile_pymc_model_numba(model: "pm.Model", **kwargs) -> CompiledPyMCModel:
         expand_numba = numba.cfunc(c_sig_expand, **kwargs)(expand_numba_raw)
 
     dims, coords = _prepare_dims_and_coords(model, shape_info)
-
+    initial_point_fn_array = rv_dict_to_flat_array_wrapper(
+        initial_point_fn, names=shape_info[0], shapes=shape_info[-1]
+    )
     return CompiledPyMCModel(
         _n_dim=n_dim,
         dims=dims,
         _coords=coords,
         _shapes={name: tuple(shape) for name, _, shape in zip(*shape_info)},
         compiled_logp_func=logp_numba,
         compiled_expand_func=expand_numba,
-        initial_point_func=initial_fn_pt,
+        initial_point_func=initial_point_fn_array,
         shared_data=shared_data,
         user_data=user_data,
         n_expanded=n_expanded,
@@ -266,7 +325,13 @@ def _prepare_dims_and_coords(model, shape_info):
     return dims, coords
 
 
-def _compile_pymc_model_jax(model, *, gradient_backend=None, **kwargs):
+def _compile_pymc_model_jax(
+    model,
+    *,
+    gradient_backend=None,
+    initial_point_fn: Callable[[SeedType], dict[str, np.ndarray]],
+    **kwargs,
+):
     if find_spec("jax") is None:
         raise ImportError(
             "Jax is not installed in the current environment. "
@@ -286,7 +351,6 @@ def _compile_pymc_model_jax(model, *, gradient_backend=None, **kwargs):
         _,
         logp_fn_pt,
         expand_fn_pt,
-        make_initial_point_py,
         shape_info,
     ) = _make_functions(
         model,
@@ -343,11 +407,15 @@ def expand(x, **shared):
 
     dims, coords = _prepare_dims_and_coords(model, shape_info)
 
+    initial_point_fn_array = rv_dict_to_flat_array_wrapper(
+        initial_point_fn, names=shape_info[0], shapes=shape_info[-1]
+    )
+
     return from_pyfunc(
         ndim=n_dim,
         make_logp_fn=make_logp_func,
         make_expand_fn=make_expand_func,
-        make_initial_point_fn=make_initial_point_py,
+        make_initial_point_fn=initial_point_fn_array,
         expanded_dtypes=dtypes,
         expanded_shapes=shapes,
         expanded_names=names,
@@ -362,6 +430,9 @@ def compile_pymc_model(
     *,
     backend: Literal["numba", "jax"] = "numba",
     gradient_backend: Literal["pytensor", "jax"] = "pytensor",
+    overrides: dict[Union["Variable", str], np.ndarray | float | int] | None = None,
+    jitter_rvs: set["TensorVariable"] | None = None,
+    default_strategy: Literal["support_point", "prior"] = "support_point",
     **kwargs,
 ) -> CompiledModel:
     """Compile necessary functions for sampling a pymc model.
@@ -375,7 +446,13 @@ def compile_pymc_model(
     gradient_backend: ["pytensor", "jax"]
         Which library is used to compute the gradients. This can only be
         changed to "jax" if the jax backend is used.
-
+    jitter_rvs : set
+        The set (or list or tuple) of random variables for which a U(-1, +1) jitter should be
+        added to the initial value. Only available for variables that have a transform or real-valued support.
+    default_strategy : str
+        Which of { "support_point", "prior" } to prefer if the initval setting for an RV is None.
+    overrides : dict
+        Initial value (strategies) to use instead of what's specified in `Model.initial_values`.
     Returns
     -------
     compiled_model : CompiledPyMCModel
@@ -390,13 +467,26 @@ def compile_pymc_model(
             "and restart your kernel in case you are in an interactive session."
         )
 
+    initial_point_fn = make_initial_point_fn(
+        model=model,
+        overrides=overrides,
+        default_strategy=default_strategy,
+        jitter_rvs=jitter_rvs,
+        return_transformed=False,
+    )
+
     if backend.lower() == "numba":
         if gradient_backend == "jax":
             raise ValueError("Gradient backend cannot be jax when using numba backend")
-        return _compile_pymc_model_numba(model, **kwargs)
+        return _compile_pymc_model_numba(
+            model, initial_point_fn=initial_point_fn, **kwargs
+        )
     elif backend.lower() == "jax":
         return _compile_pymc_model_jax(
-            model, gradient_backend=gradient_backend, **kwargs
+            model,
+            gradient_backend=gradient_backend,
+            initial_point_fn=initial_point_fn,
+            **kwargs,
         )
     else:
         raise ValueError(f"Backend must be one of numba and jax. Got {backend}")
@@ -434,12 +524,7 @@ def _compute_shapes(model):
 def _make_functions(
     model, *, mode, compute_grad, join_expanded
 ) -> tuple[
-    int,
-    int,
-    Callable,
-    Callable,
-    Callable,
-    tuple[list[str], list[slice], list[tuple[int, ...]]]
+    int, int, Callable, Callable, tuple[list[str], list[slice], list[tuple[int, ...]]]
 ]:
     """
     Compile functions required by nuts-rs from a given PyMC model.
@@ -468,18 +553,14 @@ def _make_functions(
         and the gradient, otherwise only the logp is returned.
     expand_fn_pt: Callable
         Compiled pytensor function that computes the remaining variables for the trace
-    init_point_fn_pt: Callable
-        ...
     param_data: tuple of lists
         Tuple containing data necessary to unravel a flat array of model variables back into a ragged list of arrays.
         The first list contains the names of the variables, the second list contains the slices that correspond to the
         variables in the flat array, and the third list contains the shapes of the variables.
     """
     import pytensor
     import pytensor.tensor as pt
-    from pymc.initial_point import make_initial_point_fn
     from pymc.pytensorf import compile_pymc
-    from pymc.initial_point import make_initial_point_fn
 
     shapes = _compute_shapes(model)
 
@@ -549,10 +630,6 @@ def _make_functions(
         with model:
             logp_fn_pt = compile_pymc((joined,), (logp,), mode=mode)
 
-    make_initial_point_py = partial(make_initial_point_fn,
-                                    model=model,
-                                    return_transformed=True)
-
     # Make function that computes remaining variables for the trace
     remaining_rvs = [
         var for var in model.unobserved_value_vars if var.name not in joined_names
@@ -591,7 +668,6 @@ def _make_functions(
         num_expanded,
         logp_fn_pt,
         expand_fn_pt,
-        make_initial_point_py,
         (all_names, all_slices, all_shapes),
     )
 
diff --git a/python/nutpie/compiled_pyfunc.py b/python/nutpie/compiled_pyfunc.py
@@ -1,7 +1,7 @@
 import dataclasses
 from dataclasses import dataclass
-from functools import partial, wraps
-from typing import TYPE_CHECKING, Any, Callable, Literal, Union
+from functools import partial
+from typing import Any, Callable
 
 import numpy as np
 
@@ -11,62 +11,6 @@
 SeedType = int | float | np.random.Generator | None
 
 
-if TYPE_CHECKING:
-    from pytensor.tensor import TensorVariable, Variable
-
-
-def rv_dict_to_flat_array_wrapper(
-    fn: Callable[[SeedType], dict[str, np.ndarray]],
-    names: list[str],
-    shapes: list[tuple[int]],
-) -> Callable[[SeedType], np.ndarray]:
-    """
-    Wraps a function that returns a dictionary of string:array key:value pairs
-    and returns a single flat float64 array. Also checks that the shapes of
-    the arrays match the expected shapes.
-
-    Parameters
-    ----------
-    fn: Callable
-        Function that takes a seed and return a dictionary of variable names
-        to initial values. This function should be the output of
-        pymc.initial_point.make_initial_point_fn
-    names: list of str
-        List of random variable names in the model
-    shapes: list of tuple of int
-        Shape of random variables in the model
-
-    Returns
-    -------
-    seeded_array_fn: Callable
-        Function that takes a seed and returns a flat, contiguous float64
-        array of initial values. The ordering of the random variables inside
-        the array is controlled by the ``names`` parameter.
-    """
-
-    @wraps(fn)
-    def seeded_array_fn(seed: SeedType = None):
-        inital_value_dict = fn(seed)
-        total_size = sum(np.prod(shape) for shape in shapes)
-        flat_array = np.empty(total_size, dtype="float64", order="C")
-        cursor = 0
-
-        for name, shape in zip(names, shapes):
-            initial_value = inital_value_dict[name]
-            n = int(np.prod(initial_value.shape))
-            if initial_value.shape != shape:
-                raise ValueError(
-                    f"Size of initial value for {name} is {initial_value.shape}, "
-                    f"expected {shape}"
-                )
-            flat_array[cursor : cursor + n] = initial_value.ravel().astype("float64")
-            cursor += n
-
-        return flat_array
-
-    return seeded_array_fn
-
-
 @dataclass(frozen=True)
 class PyFuncModel(CompiledModel):
     _make_logp_func: Callable
@@ -129,15 +73,11 @@ def from_pyfunc(
     ndim: int,
     make_logp_fn: Callable,
     make_expand_fn: Callable,
-    make_initial_point_fn: Callable[[Any, Any, Any], Callable[[SeedType], np.ndarray]],
+    make_initial_point_fn: Callable[[SeedType], dict[str, np.ndarray]],
     expanded_dtypes: list[np.dtype],
     expanded_shapes: list[tuple[int, ...]],
     expanded_names: list[str],
     *,
-    initial_values: dict[Union["Variable", str], np.ndarray | float | int]
-    | None = None,
-    jitter_rvs: set["TensorVariable"] | None = None,
-    default_initialization: Literal["support_point", "prior"] = "support_point",
     coords: dict[str, Any] | None = None,
     dims: dict[str, tuple[str, ...]] | None = None,
     shared_data: dict[str, Any] | None = None,
@@ -162,13 +102,10 @@ def from_pyfunc(
     if shared_data is None:
         shared_data = {}
 
-    initial_point_fn = make_initial_point_fn(
-        overrides=initial_values,
-        default_strategy=default_initialization,
-        jitter_rvs=jitter_rvs,
-    )
+    from nutpie.compile_pymc import rv_dict_to_flat_array_wrapper
+
     initial_point_fn = rv_dict_to_flat_array_wrapper(
-        initial_point_fn, names=expanded_names, shapes=expanded_shapes
+        make_initial_point_fn, names=expanded_names, shapes=expanded_shapes
     )
 
     return PyFuncModel(