Better type checking and patterns

fonnesbeck · fonnesbeck · commit 0ade15795a42 · 2025-08-20T14:12:50.000-05:00
diff --git a/pymc_extras/inference/pathfinder/numba_dispatch.py b/pymc_extras/inference/pathfinder/numba_dispatch.py
@@ -20,25 +20,6 @@
 from pytensor.link.numba.dispatch import numba_funcify
 
 
-# @numba_funcify.register(LogLike)  # DISABLED
-def _disabled_numba_funcify_LogLike(op, node, **kwargs):
-    """DISABLED: LogLike Op registration for Numba.
-
-    This registration is intentionally disabled because LogLike Op
-    cannot be compiled with Numba due to function closure limitations.
-
-    The error would be:
-    numba.core.errors.TypingError: Untyped global name 'actual_logp_func':
-    Cannot determine Numba type of <class 'function'>
-
-    Instead, use the scan-based approach in vectorized_logp module.
-    """
-    raise NotImplementedError(
-        "LogLike Op cannot be compiled with Numba due to function closure limitations. "
-        "Use scan-based vectorization instead."
-    )
-
-
 class NumbaChiMatrixOp(Op):
     """Numba-optimized Chi matrix computation.
 
@@ -78,7 +59,7 @@ def make_node(self, diff):
 
         output = pt.tensor(
             dtype=diff.dtype,
-            shape=(None, None, self.J),  # Only J is static
+            shape=(None, None, self.J),
         )
         return Apply(self, [diff], [output])
 
@@ -122,7 +103,6 @@ def __hash__(self):
 def numba_funcify_ChiMatrixOp(op, node, **kwargs):
     """Numba implementation for ChiMatrix sliding window computation with smart parallelization.
 
-    Phase 6: Uses intelligent parallelization and optimized memory access patterns.
     Automatically selects between parallel and sequential versions based on problem size.
 
     Parameters
@@ -392,7 +372,7 @@ def numba_funcify_BfgsSampleOp(op, node, **kwargs):
     """
 
     REGULARISATION_TERM = 1e-8
-    USE_CUSTOM_THRESHOLD = 100  # Use custom linear algebra for N < 100
+    CUSTOM_THRESHOLD = 100
 
     @numba_basic.numba_njit(
         fastmath=True, cache=True, error_model="numpy", boundscheck=False, inline="never"
@@ -899,7 +879,7 @@ def dense_bfgs_with_memory_pool(
             matmul_inplace(sqrt_alpha_diag_l, temp_matrix_NN3, temp_matrix_NN)
             matmul_inplace(temp_matrix_NN, sqrt_alpha_diag_l, H_inv_buffer)
 
-            if N <= USE_CUSTOM_THRESHOLD:
+            if N <= CUSTOM_THRESHOLD:
                 Lchol_l = cholesky_small(H_inv_buffer, upper=True)
             else:
                 Lchol_l = np.linalg.cholesky(H_inv_buffer).T
@@ -968,7 +948,7 @@ def sparse_bfgs_with_memory_pool(
         for l in range(L):  # noqa: E741
             matmul_inplace(inv_sqrt_alpha_diag[l], beta[l], qr_input_buffer)
 
-            if N <= USE_CUSTOM_THRESHOLD:
+            if N <= CUSTOM_THRESHOLD:
                 Q_l, R_l = qr_small(qr_input_buffer)
                 copy_matrix_inplace(Q_l, Q_buffer)
                 copy_matrix_inplace(R_l, R_buffer)
@@ -986,7 +966,7 @@ def sparse_bfgs_with_memory_pool(
                     temp_matrix_JJ2[i, j] = sum_val
             add_inplace(Id_JJ_reg, temp_matrix_JJ2, temp_matrix_JJ)
 
-            if JJ <= USE_CUSTOM_THRESHOLD:
+            if JJ <= CUSTOM_THRESHOLD:
                 Lchol_l = cholesky_small(temp_matrix_JJ, upper=True)
             else:
                 Lchol_l = np.linalg.cholesky(temp_matrix_JJ).T
@@ -1101,7 +1081,7 @@ def dense_bfgs_numba(
                 sqrt_alpha_diag_l, matmul_contiguous(temp_matrix, sqrt_alpha_diag_l)
             )
 
-            if N <= USE_CUSTOM_THRESHOLD:
+            if N <= CUSTOM_THRESHOLD:
                 # 3-5x speedup over BLAS
                 Lchol_l = cholesky_small(H_inv_l, upper=True)
             else:
@@ -1188,8 +1168,7 @@ def sparse_bfgs_numba(
         for l in range(L):  # noqa: E741
             qr_input_l = inv_sqrt_alpha_diag[l] @ beta[l]
 
-            if N <= USE_CUSTOM_THRESHOLD:
-                # 3-5x speedup over BLAS
+            if N <= CUSTOM_THRESHOLD:
                 Q_l, R_l = qr_small(qr_input_l)
             else:
                 Q_l, R_l = np.linalg.qr(qr_input_l)
@@ -1203,10 +1182,9 @@ def sparse_bfgs_numba(
 
             Lchol_input_l = temp_RgammaRT.copy()
             for i in range(JJ):
-                Lchol_input_l[i, i] += IdJJ[i, i]  # Add identity efficiently
+                Lchol_input_l[i, i] += IdJJ[i, i]
 
-            if JJ <= USE_CUSTOM_THRESHOLD:
-                # 3-5x speedup over BLAS
+            if JJ <= CUSTOM_THRESHOLD:
                 Lchol_l = cholesky_small(Lchol_input_l, upper=True)
             else:
                 Lchol_l = np.linalg.cholesky(Lchol_input_l).T
@@ -1346,10 +1324,6 @@ def bfgs_sample_numba(
                 x, g, alpha, beta, gamma, alpha_diag, inv_sqrt_alpha_diag, sqrt_alpha_diag, u
             )
 
-    # ===============================================================================
-    # Phase 6: Smart Parallelization
-    # ===============================================================================
-
     @numba_basic.numba_njit(
         dense_bfgs_signature,
         fastmath=True,
@@ -1426,7 +1400,7 @@ def dense_bfgs_parallel(
                 sqrt_alpha_diag_l, matmul_contiguous(temp_matrix, sqrt_alpha_diag_l)
             )
 
-            if N <= USE_CUSTOM_THRESHOLD:
+            if N <= CUSTOM_THRESHOLD:
                 Lchol_l = cholesky_small(H_inv_l, upper=True)
             else:
                 Lchol_l = np.linalg.cholesky(H_inv_l).T
@@ -1504,7 +1478,7 @@ def sparse_bfgs_parallel(
             beta_l = ensure_contiguous_2d(beta[l])
             qr_input_l = matmul_contiguous(inv_sqrt_alpha_diag_l, beta_l)
 
-            if N <= USE_CUSTOM_THRESHOLD:
+            if N <= CUSTOM_THRESHOLD:
                 Q_l, R_l = qr_small(qr_input_l)
             else:
                 Q_l, R_l = np.linalg.qr(qr_input_l)
@@ -1520,7 +1494,7 @@ def sparse_bfgs_parallel(
             for i in range(JJ):
                 Lchol_input_l[i, i] += IdJJ[i, i]
 
-            if JJ <= USE_CUSTOM_THRESHOLD:
+            if JJ <= CUSTOM_THRESHOLD:
                 Lchol_l = cholesky_small(Lchol_input_l, upper=True)
             else:
                 Lchol_l = np.linalg.cholesky(Lchol_input_l).T
@@ -1643,7 +1617,6 @@ def smart_dispatcher(
             """
             L, M, N = u.shape
 
-            # This avoids thread overhead for small problems
             if L >= 4:
                 return bfgs_sample_parallel(
                     x, g, alpha, beta, gamma, alpha_diag, inv_sqrt_alpha_diag, sqrt_alpha_diag, u
@@ -1655,5 +1628,4 @@ def smart_dispatcher(
 
         return smart_dispatcher
 
-    # Phase 6: Return intelligent parallel dispatcher
     return create_parallel_dispatcher()
diff --git a/pymc_extras/inference/pathfinder/pathfinder.py b/pymc_extras/inference/pathfinder/pathfinder.py
@@ -1776,7 +1776,7 @@ def multipath_pathfinder(
                 TimeRemainingColumn(),
                 TextColumn("/"),
                 TimeElapsedColumn(),
-                console=Console(),  # Use default theme if default_progress_theme is None
+                console=Console(),
                 disable=not progressbar,
             )
         with progress:
@@ -2031,7 +2031,6 @@ def fit_pathfinder(
         pathfinder_samples = mp_result.samples
     elif inference_backend == "numba":
         # Numba backend: Use PyTensor compilation with Numba mode
-        # Import Numba dispatch to register custom Op conversions
 
         numba_compile_kwargs = {"mode": "NUMBA", **compile_kwargs}
         mp_result = multipath_pathfinder(
diff --git a/pymc_extras/inference/pathfinder/vectorized_logp.py b/pymc_extras/inference/pathfinder/vectorized_logp.py
@@ -1,28 +1,9 @@
-#   Copyright 2022 The PyMC Developers
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
 """
 Native PyTensor vectorized logp implementation.
 
-This module provides a PyTensor First approach to vectorizing log-probability
+This module provides a PyTensor-based approach to vectorizing log-probability
 computations, eliminating the need for custom LogLike Op and ensuring automatic
 backend compatibility through native PyTensor operations.
-
-Expert Guidance Applied:
-- Uses vectorize_graph instead of custom Ops (Jesse Grabowski's recommendation)
-- Eliminates numpy.apply_along_axis dependency
-- Leverages existing PyTensor functionality per "PyTensor First" principle
 """
 
 from collections.abc import Callable as CallableType
@@ -40,9 +21,8 @@ def create_vectorized_logp_graph(
     """
     Create a vectorized log-probability computation graph using native PyTensor operations.
 
-    IMPORTANT: This function now detects the interface type and compilation mode to handle
-    both compiled functions and symbolic expressions properly, with special handling for
-    Numba mode to avoid LogLike Op compilation issues.
+    This function determines the appropriate vectorization strategy based on the input type
+    and compilation mode.
 
     Parameters
     ----------
@@ -57,45 +37,35 @@ def create_vectorized_logp_graph(
     -------
     Callable
         Function that takes a batch of parameter vectors and returns vectorized logp values
-
-    Notes
-    -----
-    This implementation follows PyTensor expert recommendations:
-    - "Can the perform method of that `Loglike` op be directly written in pytensor?" - Jesse Grabowski
-    - "PyTensor vectorize / vectorize_graph directly" - Ricardo
-    - Fixed interface mismatch between compiled functions and symbolic variables
-    - Automatic backend support through PyTensor's existing infrastructure
-    - Numba compatibility through scan-based approach
     """
+    from pytensor.compile.function.types import Function
 
     # For Numba mode, use OpFromGraph approach to avoid function closure issues
     if mode_name == "NUMBA":
         # Special handling for Numba: logp_func should be a PyMC model, not a compiled function
-        if hasattr(logp_func, "value_vars"):  # It's a PyMC model
+        if hasattr(logp_func, "value_vars"):
             return create_opfromgraph_logp(logp_func)
         else:
             raise ValueError(
                 "Numba backend requires PyMC model object, not compiled function. "
                 "Pass the model directly when using inference_backend='numba'."
             )
 
-    # Check if logp_func is a compiled function by testing its interface
-    phi_test = pt.vector("phi_test", dtype="float64")
+    # Use proper type checking to determine if logp_func is a compiled function
+    if isinstance(logp_func, Function):
+        # Compiled PyTensor function - use LogLike Op approach
+        from .pathfinder import LogLike  # Import the existing LogLike Op
 
-    try:
-        # Try to call logp_func with symbolic input
-        logP_scalar = logp_func(phi_test)
-        if hasattr(logP_scalar, "type"):  # It's a symbolic variable
-            use_symbolic_interface = True
-        else:
-            use_symbolic_interface = False
-    except (TypeError, AttributeError):
-        # logp_func is a compiled function that expects numeric input
-        # Fall back to LogLike Op approach for non-Numba modes
-        use_symbolic_interface = False
-
-    if use_symbolic_interface:
-        # Direct symbolic approach (ideal case)
+        def vectorized_logp(phi: TensorVariable) -> TensorVariable:
+            """Vectorized logp using LogLike Op for compiled functions."""
+            loglike_op = LogLike(logp_func)
+            result = loglike_op(phi)
+            return result
+
+        return vectorized_logp
+
+    else:
+        # Assume symbolic interface - use direct symbolic approach
         phi_scalar = pt.vector("phi_scalar", dtype="float64")
         logP_scalar = logp_func(phi_scalar)
 
@@ -116,19 +86,6 @@ def vectorized_logp(phi: TensorVariable) -> TensorVariable:
 
         return vectorized_logp
 
-    else:
-        # Fallback to LogLike Op for compiled functions (non-Numba modes only)
-        # This maintains compatibility while we transition to symbolic approach
-        from .pathfinder import LogLike  # Import the existing LogLike Op
-
-        def vectorized_logp(phi: TensorVariable) -> TensorVariable:
-            """Vectorized logp using LogLike Op fallback."""
-            loglike_op = LogLike(logp_func)
-            result = loglike_op(phi)
-            return result
-
-        return vectorized_logp
-
 
 def create_scan_based_logp_graph(logp_func: CallableType) -> CallableType:
     """