Benchmark radon model function

ricardoV94 · ricardoV94 · commit 27a6becc8f8d · 2025-10-09T13:49:05.000+02:00
diff --git a/pytensor/compile/mode.py b/pytensor/compile/mode.py
@@ -459,8 +459,11 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
         RewriteDatabaseQuery(include=["fast_run", "py_only"]),
     )
 
+C = Mode("c", "fast_run")
+C_VM = Mode("cvm", "fast_run")
+
 NUMBA = Mode(
-    NumbaLinker(),
+    "numba",
     RewriteDatabaseQuery(
         include=["fast_run", "numba"],
         exclude=[
@@ -473,7 +476,7 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
 )
 
 JAX = Mode(
-    JAXLinker(),
+    "jax",
     RewriteDatabaseQuery(
         include=["fast_run", "jax"],
         exclude=[
@@ -489,7 +492,7 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
     ),
 )
 PYTORCH = Mode(
-    PytorchLinker(),
+    "pytorch",
     RewriteDatabaseQuery(
         include=["fast_run"],
         exclude=[
@@ -508,6 +511,8 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
 predefined_modes = {
     "FAST_COMPILE": FAST_COMPILE,
     "FAST_RUN": FAST_RUN,
+    "C": C,
+    "C_VM": C_VM,
     "JAX": JAX,
     "NUMBA": NUMBA,
     "PYTORCH": PYTORCH,
diff --git a/tests/compile/function/test_types.py b/tests/compile/function/test_types.py
@@ -12,7 +12,9 @@
 from pytensor.compile.io import In, Out
 from pytensor.compile.mode import Mode, get_default_mode
 from pytensor.configdefaults import config
-from pytensor.graph.basic import Constant
+from pytensor.graph.basic import Constant, explicit_graph_inputs
+from pytensor.graph.replace import graph_replace
+from pytensor.graph.rewriting import rewrite_graph
 from pytensor.graph.rewriting.basic import PatternNodeRewriter, WalkingGraphRewriter
 from pytensor.graph.utils import MissingInputError
 from pytensor.link.vm import VMLinker
@@ -1357,3 +1359,214 @@ def test_minimal_random_function_call_benchmark(trust_input, benchmark):
 
     rng_val = np.random.default_rng()
     benchmark(f, rng_val)
+
+
+def create_radon_model(
+    intercept_dist="normal", sigma_dist="halfnormal", centered=False
+):
+    def halfnormal(name, *, sigma=1.0, model_logp):
+        log_value = pt.scalar(f"{name}_log")
+        value = pt.exp(log_value)
+
+        logp = (
+            -0.5 * ((value / sigma) ** 2) + pt.log(pt.sqrt(2.0 / np.pi)) - pt.log(sigma)
+        )
+        logp = pt.switch(value >= 0, logp, -np.inf)
+        model_logp.append(logp + value)
+        return value
+
+    def normal(name, *, mu=0.0, sigma=1.0, model_logp, observed=None):
+        value = pt.scalar(name) if observed is None else pt.as_tensor(observed)
+
+        logp = (
+            -0.5 * (((value - mu) / sigma) ** 2)
+            - pt.log(pt.sqrt(2.0 * np.pi))
+            - pt.log(sigma)
+        )
+        model_logp.append(logp)
+        return value
+
+    def lognormal(name, *, mu=0.0, sigma=1.0, model_logp):
+        value = normal(name, mu=mu, sigma=sigma, model_logp=model_logp)
+        return pt.exp(value)
+
+    def zerosumnormal(name, *, sigma=1.0, size, model_logp):
+        raw_value = pt.vector(f"{name}_zerosum", shape=(size - 1,))
+        n = raw_value.shape[0] + 1
+        sum_vals = raw_value.sum(0, keepdims=True)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        fill_value = norm - sum_vals / pt.sqrt(n)
+        value = pt.concatenate([raw_value, fill_value]) - norm
+
+        shape = value.shape
+        _full_size = pt.prod(shape)
+        _degrees_of_freedom = pt.prod(shape[-1:].inc(-1))
+        logp = pt.sum(
+            -0.5 * ((value / sigma) ** 2)
+            - (pt.log(pt.sqrt(2.0 * np.pi)) + pt.log(sigma))
+            * (_degrees_of_freedom / _full_size)
+        )
+        model_logp.append(logp)
+        return value
+
+    dist_fn_map = {
+        fn.__name__: fn for fn in (halfnormal, normal, lognormal, zerosumnormal)
+    }
+
+    rng = np.random.default_rng(1)
+    n_counties = 85
+    county_idx = rng.integers(n_counties, size=919)
+    county_idx.sort()
+    floor = rng.binomial(n=1, p=0.5, size=919).astype(np.float64)
+    log_radon = rng.normal(size=919)
+
+    model_logp = []
+    intercept = dist_fn_map[intercept_dist](
+        "intercept", sigma=10, model_logp=model_logp
+    )
+
+    # County effects
+    county_sd = halfnormal("county_sd", model_logp=model_logp)
+    if centered:
+        county_effect = zerosumnormal(
+            "county_raw", sigma=county_sd, size=n_counties, model_logp=model_logp
+        )
+    else:
+        county_raw = zerosumnormal("county_raw", size=n_counties, model_logp=model_logp)
+        county_effect = county_raw * county_sd
+
+    # Global floor effect
+    floor_effect = normal("floor_effect", sigma=2, model_logp=model_logp)
+
+    county_floor_sd = halfnormal("county_floor_sd", model_logp=model_logp)
+    if centered:
+        county_floor_effect = zerosumnormal(
+            "county_floor_raw",
+            sigma=county_floor_sd,
+            size=n_counties,
+            model_logp=model_logp,
+        )
+    else:
+        county_floor_raw = zerosumnormal(
+            "county_floor_raw", size=n_counties, model_logp=model_logp
+        )
+        county_floor_effect = county_floor_raw * county_floor_sd
+
+    mu = (
+        intercept
+        + county_effect[county_idx]
+        + floor_effect * floor
+        + county_floor_effect[county_idx] * floor
+    )
+
+    sigma = dist_fn_map[sigma_dist]("sigma", model_logp=model_logp)
+    _ = normal(
+        "log_radon",
+        mu=mu,
+        sigma=sigma,
+        observed=log_radon,
+        model_logp=model_logp,
+    )
+
+    model_logp = pt.sum([logp.sum() for logp in model_logp])
+    model_logp = rewrite_graph(
+        model_logp, include=("canonicalize", "stabilize"), clone=False
+    )
+    params = list(explicit_graph_inputs(model_logp))
+    model_dlogp = pt.concatenate([term.ravel() for term in pt.grad(model_logp, params)])
+
+    size = sum(int(np.prod(p.type.shape)) for p in params)
+    joined_inputs = pt.vector("joined_inputs", shape=(size,))
+    idx = 0
+    replacement = {}
+    for param in params:
+        param_shape = param.type.shape
+        param_size = int(np.prod(param_shape))
+        replacement[param] = joined_inputs[idx : idx + param_size].reshape(param_shape)
+        idx += param_size
+    assert idx == joined_inputs.type.shape[0]
+
+    model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement)
+    return joined_inputs, [model_logp, model_dlogp]
+
+
+@pytest.fixture(scope="session")
+def radon_model():
+    return create_radon_model()
+
+
+@pytest.fixture(scope="session")
+def radon_model_variants():
+    # Convert to list comp
+    return [
+        create_radon_model(
+            intercept_dist=intercept_dist,
+            sigma_dist=sigma_dist,
+            centered=centered,
+        )
+        for centered in (True, False)
+        for intercept_dist in ("normal", "lognormal")
+        for sigma_dist in ("halfnormal", "lognormal")
+    ]
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM", "NUMBA"])
+def test_radon_model_repeated_compile_benchmark(mode, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    def compile_and_call_once():
+        fn = function(
+            [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+        )
+        fn(x)
+
+    benchmark(compile_and_call_once)
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM", "NUMBA"])
+def test_radon_model_variants_compile_benchmark(
+    mode, radon_model, radon_model_variants, benchmark
+):
+    """Test compilation speed when a slightly variant of a function is compiled each time.
+
+    This test more realistically simulates a use case where a model is recompiled
+    multiple times with small changes, such as in an interactive environment.
+
+    NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
+    """
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    # Compile base function once to populate the cache
+    fn = function(
+        [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+    )
+    fn(x)
+
+    def compile_and_call_once():
+        for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
+            fn = function(
+                [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+            )
+            fn(x)
+
+    benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM", "C_VM_NOGC", "NUMBA"])
+def test_radon_model_call_benchmark(mode, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+
+    real_mode = "C_VM" if mode == "C_VM_NOGC" else mode
+    fn = function(
+        [joined_inputs], [model_logp, model_dlogp], mode=real_mode, trust_input=True
+    )
+    if mode == "C_VM_NOGC":
+        fn.vm.allow_gc = False
+
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+    benchmark(fn, x)