Enable new assume_a in Solve

ricardoV94 · ricardoV94 · commit 15fb803ac50f · 2025-03-06T18:46:10.000+01:00
diff --git a/pytensor/link/jax/dispatch/slinalg.py b/pytensor/link/jax/dispatch/slinalg.py
@@ -1,3 +1,5 @@
+import warnings
+
 import jax
 
 from pytensor.link.jax.dispatch.basic import jax_funcify
@@ -39,13 +41,28 @@ def cholesky(a, lower=lower):
 
 @jax_funcify.register(Solve)
 def jax_funcify_Solve(op, **kwargs):
-    if op.assume_a != "gen" and op.lower:
-        lower = True
+    assume_a = op.assume_a
+    lower = op.lower
+
+    if assume_a == "tridiagonal":
+        # jax.scipy.solve does not yet support tridiagonal matrices
+        # But there's a jax.lax.linalg.tridiaonal_solve we can use instead.
+        def solve(a, b):
+            dl = jax.numpy.diagonal(a, offset=-1, axis1=-2, axis2=-1)
+            d = jax.numpy.diagonal(a, offset=0, axis1=-2, axis2=-1)
+            du = jax.numpy.diagonal(a, offset=1, axis1=-2, axis2=-1)
+            return jax.lax.linalg.tridiagonal_solve(dl, d, du, b, lower=lower)
+
     else:
-        lower = False
+        if assume_a not in ("gen", "sym", "her", "pos"):
+            warnings.warn(
+                f"JAX solve does not support assume_a={op.assume_a}. Defaulting to assume_a='gen'.",
+                UserWarning,
+            )
+            assume_a = "gen"
 
-    def solve(a, b, lower=lower):
-        return jax.scipy.linalg.solve(a, b, lower=lower)
+        def solve(a, b):
+            return jax.scipy.linalg.solve(a, b, lower=lower, assume_a=assume_a)
 
     return solve
 
diff --git a/pytensor/link/numba/dispatch/slinalg.py b/pytensor/link/numba/dispatch/slinalg.py
@@ -1,3 +1,4 @@
+import warnings
 from collections.abc import Callable
 
 import numba
@@ -1070,14 +1071,16 @@ def numba_funcify_Solve(op, node, **kwargs):
     elif assume_a == "sym":
         solve_fn = _solve_symmetric
     elif assume_a == "her":
-        raise NotImplementedError(
-            'Use assume_a = "sym" for symmetric real matrices. If you need compelx support, '
-            "please open an issue on github."
-        )
+        # We already ruled out complex inputs
+        solve_fn = _solve_symmetric
     elif assume_a == "pos":
         solve_fn = _solve_psd
     else:
-        raise NotImplementedError(f"Assumption {assume_a} not supported in Numba mode")
+        warnings.warn(
+            f"Numba assume_a={assume_a} not implemented. Falling back to general solve.",
+            UserWarning,
+        )
+        solve_fn = _solve_gen
 
     @numba_basic.numba_njit(inline="always")
     def solve(a, b):
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
@@ -5,8 +5,10 @@
 from typing import Literal, cast
 
 import numpy as np
+import scipy
 import scipy.linalg as scipy_linalg
 from numpy.exceptions import ComplexWarning
+from packaging.version import parse as parse_version
 
 import pytensor
 import pytensor.tensor as pt
@@ -15,6 +17,7 @@
 from pytensor.tensor import TensorLike, as_tensor_variable
 from pytensor.tensor import basic as ptb
 from pytensor.tensor import math as ptm
+from pytensor.tensor.basic import diagonal
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.nlinalg import kron, matrix_dot
 from pytensor.tensor.shape import reshape
@@ -260,10 +263,10 @@ def make_node(self, A, b):
             raise ValueError(f"`b` must have {self.b_ndim} dims; got {b.type} instead.")
 
         # Infer dtype by solving the most simple case with 1x1 matrices
-        inp_arr = [np.eye(1).astype(A.dtype), np.eye(1).astype(b.dtype)]
-        out_arr = [[None]]
-        self.perform(None, inp_arr, out_arr)
-        o_dtype = out_arr[0][0].dtype
+        o_dtype = scipy_linalg.solve(
+            np.eye(1).astype(A.dtype),
+            np.eye(1).astype(b.dtype),
+        ).dtype
         x = tensor(dtype=o_dtype, shape=b.type.shape)
         return Apply(self, [A, b], [x])
 
@@ -315,7 +318,7 @@ def _default_b_ndim(b, b_ndim):
 
     b = as_tensor_variable(b)
     if b_ndim is None:
-        return min(b.ndim, 2)  # By default assume the core case is a matrix
+        return min(b.ndim, 2)  # By default, assume the core case is a matrix
 
 
 class CholeskySolve(SolveBase):
@@ -332,6 +335,19 @@ def __init__(self, **kwargs):
         kwargs.setdefault("lower", True)
         super().__init__(**kwargs)
 
+    def make_node(self, *inputs):
+        # Allow base class to do input validation
+        super_apply = super().make_node(*inputs)
+        A, b = super_apply.inputs
+        [super_out] = super_apply.outputs
+        # The dtype of chol_solve does not match solve, which the base class checks
+        dtype = scipy_linalg.cho_solve(
+            np.eye(1).astype(A.dtype),
+            np.eye(1).astype(b.dtype),
+        ).dtype
+        out = tensor(dtype=dtype, shape=super_out.type.shape)
+        return Apply(self, [A, b], [out])
+
     def perform(self, node, inputs, output_storage):
         C, b = inputs
         rval = scipy_linalg.cho_solve(
@@ -499,8 +515,32 @@ class Solve(SolveBase):
     )
 
     def __init__(self, *, assume_a="gen", **kwargs):
-        if assume_a not in ("gen", "sym", "her", "pos"):
-            raise ValueError(f"{assume_a} is not a recognized matrix structure")
+        # Triangular and diagonal are handled outside of Solve
+        valid_options = ["gen", "sym", "her", "pos", "tridiagonal", "banded"]
+
+        assume_a = assume_a.lower()
+        # We use the old names as the different dispatches are more likely to support them
+        if assume_a == "general":
+            assume_a = "gen"
+        elif assume_a == "symmetric":
+            assume_a = "sym"
+        elif assume_a == "hermitian":
+            assume_a = "her"
+        elif assume_a == "positive definite":
+            assume_a = "pos"
+        if assume_a not in valid_options:
+            raise ValueError(
+                f"Invalid assume_a: {assume_a}. It must be one of {valid_options}"
+            )
+
+        if assume_a in ("tridiagonal", "banded") and parse_version(
+            scipy.__version__
+        ) < parse_version("1.15.0"):
+            warnings.warn(
+                f"assume_a={assume_a} requires scipy>=1.5.0. Defaulting to assume_a='gen'.",
+                UserWarning,
+            )
+            assume_a = "gen"
 
         super().__init__(**kwargs)
         self.assume_a = assume_a
@@ -536,10 +576,12 @@ def solve(
     a,
     b,
     *,
-    assume_a="gen",
-    lower=False,
-    transposed=False,
-    check_finite=True,
+    lower: bool = False,
+    overwrite_a: bool = False,
+    overwrite_b: bool = False,
+    check_finite: bool = True,
+    assume_a: str = "gen",
+    transposed: bool = False,
     b_ndim: int | None = None,
 ):
     """Solves the linear equation set ``a * x = b`` for the unknown ``x`` for square ``a`` matrix.
@@ -548,14 +590,19 @@ def solve(
     corresponding string to ``assume_a`` key chooses the dedicated solver.
     The available options are
 
-    ===================  ========
-    generic matrix       'gen'
-    symmetric            'sym'
-    hermitian            'her'
-    positive definite    'pos'
-    ===================  ========
+    ===================  ================================
+     diagonal             'diagonal'
+     tridiagonal          'tridiagonal'
+     banded               'banded'
+     upper triangular     'upper triangular'
+     lower triangular     'lower triangular'
+     symmetric            'symmetric' (or 'sym')
+     hermitian            'hermitian' (or 'her')
+     positive definite    'positive definite' (or 'pos')
+     general              'general' (or 'gen')
+    ===================  ================================
 
-    If omitted, ``'gen'`` is the default structure.
+    If omitted, ``'general'`` is the default structure.
 
     The datatype of the arrays define which solver is called regardless
     of the values. In other words, even when the complex array entries have
@@ -568,23 +615,52 @@ def solve(
         Square input data
     b : (..., N, NRHS) array_like
         Input data for the right hand side.
-    lower : bool, optional
-        If True, use only the data contained in the lower triangle of `a`. Default
-        is to use upper triangle. (ignored for ``'gen'``)
-    transposed: bool, optional
-        If True, solves the system A^T x = b. Default is False.
+    lower : bool, default False
+        Ignored unless ``assume_a`` is one of ``'sym'``, ``'her'``, or ``'pos'``.
+        If True, the calculation uses only the data in the lower triangle of `a`;
+        entries above the diagonal are ignored. If False (default), the
+        calculation uses only the data in the upper triangle of `a`; entries
+        below the diagonal are ignored.
+    overwrite_a : bool
+        Ignored argument. PyTensor will perform the operation in-place if possible.
+    overwrite_b : bool
+        Ignored argument. PyTensor will perform the operation in-place if possible.
     check_finite : bool, optional
         Whether to check that the input matrices contain only finite numbers.
         Disabling may give a performance gain, but may result in problems
         (crashes, non-termination) if the inputs do contain infinities or NaNs.
     assume_a : str, optional
         Valid entries are explained above.
+    transposed: bool, default False
+        If True, solves the system A^T x = b. Default is False.
     b_ndim : int
         Whether the core case of b is a vector (1) or matrix (2).
         This will influence how batched dimensions are interpreted.
+        By default, we assume b_ndim = b.ndim is 2 if b.ndim > 1, else 1.
     """
+    assume_a = assume_a.lower()
+
+    if assume_a in ("lower triangular", "upper triangular"):
+        lower = "lower" in assume_a
+        return solve_triangular(
+            a,
+            b,
+            lower=lower,
+            trans=transposed,
+            check_finite=check_finite,
+            b_ndim=b_ndim,
+        )
+
     b_ndim = _default_b_ndim(b, b_ndim)
 
+    if assume_a == "diagonal":
+        a_diagonal = diagonal(a, axis1=-2, axis2=-1)
+        b_transposed = b[None, :] if b_ndim == 1 else b.mT
+        x = (b_transposed / pt.expand_dims(a_diagonal, -2)).mT
+        if b_ndim == 1:
+            x = x.squeeze(-1)
+        return x
+
     if transposed:
         a = a.mT
         lower = not lower