Fix contiguity bugs in Numba lapack routines

ricardoV94 · ricardoV94 · commit 3f6654486d9d · 2025-03-20T15:54:40.000+01:00
Also removes redundant tests
diff --git a/pytensor/link/numba/dispatch/slinalg.py b/pytensor/link/numba/dispatch/slinalg.py
@@ -26,6 +26,12 @@
 )
 
 
+@numba_basic.numba_njit(inline="always")
+def _copy_to_fortran_order_even_if_1d(x):
+    # Numba's _copy_to_fortran_order doesn't do anything for vectors
+    return x.copy() if x.ndim == 1 else _copy_to_fortran_order(x)
+
+
 @numba_basic.numba_njit(inline="always")
 def _solve_check(n, info, lamch=False, rcond=None):
     """
@@ -130,20 +136,23 @@ def impl(A, B, trans, lower, unit_diagonal, b_ndim, overwrite_b):
         B_is_1d = B.ndim == 1
 
         # This will only copy if A is not already fortran contiguous
-        A_f = np.asfortranarray(A)
-
-        if overwrite_b:
-            if B_is_1d:
-                B_copy = np.expand_dims(B, -1)
-            else:
-                # This *will* allow inplace destruction of B, but only if it is already fortran contiguous.
-                # Otherwise, there's no way to get around the need to copy the data before going into TRTRS
-                B_copy = np.asfortranarray(B)
+        if A.flags.f_contiguous or (A.flags.c_contiguous and trans in (0, 1)):
+            A_f = A
+            if A.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                # Is this valid for complex matrices that were .conj().mT by PyTensor?
+                lower = not lower
+                trans = 1 - trans
         else:
-            if B_is_1d:
-                B_copy = np.copy(np.expand_dims(B, -1))
-            else:
-                B_copy = _copy_to_fortran_order(B)
+            A_f = np.asfortranarray(A)
+
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
 
         NRHS = 1 if B_is_1d else int(B_copy.shape[-1])
 
@@ -247,10 +256,10 @@ def impl(A, lower=0, overwrite_a=False, check_finite=True):
         LDA = val_to_int_ptr(_N)
         INFO = val_to_int_ptr(0)
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and A.flags.f_contiguous:
             A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         numba_potrf(
             UPLO,
@@ -283,7 +292,7 @@ def numba_funcify_Cholesky(op, node, **kwargs):
     In particular, the `inplace` argument is not supported, which is why we choose to implement our own version.
     """
     lower = op.lower
-    overwrite_a = False
+    overwrite_a = op.overwrite_a
     check_finite = op.check_finite
     on_error = op.on_error
 
@@ -497,10 +506,10 @@ def impl(
     ) -> tuple[np.ndarray, np.ndarray, int]:
         _M, _N = np.int32(A.shape[-2:])  # type: ignore
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and A.flags.f_contiguous:
             A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         M = val_to_int_ptr(_M)  # type: ignore
         N = val_to_int_ptr(_N)  # type: ignore
@@ -545,10 +554,10 @@ def impl(
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
 
         if B_is_1d:
             B_copy = np.expand_dims(B_copy, -1)
@@ -576,7 +585,7 @@ def impl(
         )
 
         if B_is_1d:
-            return B_copy[..., 0], int_ptr_to_val(INFO)
+            B_copy = B_copy[..., 0]
 
         return B_copy, int_ptr_to_val(INFO)
 
@@ -681,19 +690,23 @@ def impl(
         _LDA, _N = np.int32(A.shape[-2:])  # type: ignore
         _solve_check_input_shapes(A, B)
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
             A_copy = A
+            if A.flags.c_contiguous:
+                # An upper/lower symmetric c_contiguous is the same as a lower/upper symmetric f_contiguous
+                lower = not lower
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+
         if B_is_1d:
-            B_copy = np.asfortranarray(np.expand_dims(B_copy, -1))
+            B_copy = np.expand_dims(B_copy, -1)
 
         NRHS = 1 if B_is_1d else int(B.shape[-1])
 
@@ -904,17 +917,20 @@ def impl(
 
         _N = np.int32(A.shape[-1])
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
             A_copy = A
+            if A.flags.c_contiguous:
+                # An upper/lower symmetric c_contiguous is the same as a lower/upper symmetric f_contiguous
+                lower = not lower
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
 
         if B_is_1d:
             B_copy = np.expand_dims(B_copy, -1)
@@ -1106,12 +1122,15 @@ def solve(a, b):
     return solve
 
 
-def _cho_solve(A_and_lower, B, overwrite_a=False, overwrite_b=False, check_finite=True):
+def _cho_solve(
+    C: np.ndarray, B: np.ndarray, lower: bool, overwrite_b: bool, check_finite: bool
+) -> np.ndarray:
     """
     Solve a positive-definite linear system using the Cholesky decomposition.
     """
-    A, lower = A_and_lower
-    return linalg.cho_solve((A, lower), B)
+    return linalg.cho_solve(
+        (C, lower), b=B, overwrite_b=overwrite_b, check_finite=check_finite
+    )
 
 
 @overload(_cho_solve)
@@ -1127,13 +1146,22 @@ def impl(C, B, lower=False, overwrite_b=False, check_finite=True):
         _solve_check_input_shapes(C, B)
 
         _N = np.int32(C.shape[-1])
-        C_f = np.asfortranarray(C)
+        if C.flags.f_contiguous or C.flags.c_contiguous:
+            C_f = C
+            if C.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous can be seen as the lower/upper triangular f_contiguous
+                lower = not lower
+        else:
+            C_f = np.asfortranarray(C)
+
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
 
         B_is_1d = B.ndim == 1
         if B_is_1d:
-            B_copy = np.asfortranarray(np.expand_dims(B, -1))
-        else:
-            B_copy = _copy_to_fortran_order(B)
+            B_copy = np.expand_dims(B_copy, -1)
 
         NRHS = 1 if B_is_1d else int(B.shape[-1])
 
@@ -1155,9 +1183,11 @@ def impl(C, B, lower=False, overwrite_b=False, check_finite=True):
             INFO,
         )
 
+        _solve_check(_N, int_ptr_to_val(INFO))
+
         if B_is_1d:
-            return B_copy[..., 0], int_ptr_to_val(INFO)
-        return B_copy, int_ptr_to_val(INFO)
+            return B_copy[..., 0]
+        return B_copy
 
     return impl
 
@@ -1186,16 +1216,8 @@ def cho_solve(c, b):
                     "Non-numeric values (nan or inf) in input b to cho_solve"
                 )
 
-        res, info = _cho_solve(
+        return _cho_solve(
             c, b, lower=lower, overwrite_b=overwrite_b, check_finite=check_finite
         )
 
-        if info < 0:
-            raise np.linalg.LinAlgError("Illegal values found in input to cho_solve")
-        elif info > 0:
-            raise np.linalg.LinAlgError(
-                "Matrix is not positive definite in input to cho_solve"
-            )
-        return res
-
     return cho_solve
diff --git a/tests/link/numba/test_basic.py b/tests/link/numba/test_basic.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 
+from pytensor.compile import SymbolicInput
 from tests.tensor.test_math_scipy import scipy
 
 
@@ -120,6 +121,7 @@ def perform(self, node, inputs, outputs):
 numba_mode = Mode(
     NumbaLinker(), opts.including("numba", "local_useless_unbatched_blockwise")
 )
+numba_inplace_mode = numba_mode.including("inplace")
 py_mode = Mode("py", opts)
 
 rng = np.random.default_rng(42849)
@@ -261,7 +263,11 @@ def assert_fn(x, y):
                 x, y
             )
 
-    if any(inp.owner is not None for inp in graph_inputs):
+    if any(
+        inp.owner is not None
+        for inp in graph_inputs
+        if not isinstance(inp, SymbolicInput)
+    ):
         raise ValueError("Inputs must be root variables")
 
     pytensor_py_fn = function(
diff --git a/tests/link/numba/test_slinalg.py b/tests/link/numba/test_slinalg.py