Fix contiguity bugs in Numba lapack routines

ricardoV94 · ricardoV94 · commit c93eb06df873 · 2025-03-19T16:58:37.000+01:00
diff --git a/pytensor/link/numba/dispatch/slinalg.py b/pytensor/link/numba/dispatch/slinalg.py
@@ -26,6 +26,12 @@
 )
 
 
+@numba_basic.numba_njit(inline="always")
+def _copy_to_fortran_order_even_if_1d(x):
+    # Numba's _copy_to_fortran_order doesn't do anything for vectors
+    return x.copy() if x.ndim == 1 else _copy_to_fortran_order(x)
+
+
 @numba_basic.numba_njit(inline="always")
 def _solve_check(n, info, lamch=False, rcond=None):
     """
@@ -497,10 +503,10 @@ def impl(
     ) -> tuple[np.ndarray, np.ndarray, int]:
         _M, _N = np.int32(A.shape[-2:])  # type: ignore
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and A.flags.f_contiguous:
             A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         M = val_to_int_ptr(_M)  # type: ignore
         N = val_to_int_ptr(_N)  # type: ignore
@@ -545,13 +551,14 @@ def impl(
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
 
         if B_is_1d:
             B_copy = np.expand_dims(B_copy, -1)
+            assert B_copy.flags.f_contiguous
 
         NRHS = 1 if B_is_1d else int(B_copy.shape[-1])
 
@@ -681,19 +688,22 @@ def impl(
         _LDA, _N = np.int32(A.shape[-2:])  # type: ignore
         _solve_check_input_shapes(A, B)
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
+            # A symmetric c_contiguous is the same as a symmetric f_contiguous
             A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+
         if B_is_1d:
-            B_copy = np.asfortranarray(np.expand_dims(B_copy, -1))
+            B_copy = np.expand_dims(B_copy, -1)
+            assert B_copy.flags.f_contiguous
 
         NRHS = 1 if B_is_1d else int(B.shape[-1])
 
@@ -903,17 +913,20 @@ def impl(
 
         _N = np.int32(A.shape[-1])
 
-        if not overwrite_a:
-            A_copy = _copy_to_fortran_order(A)
-        else:
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
             A_copy = A
+            if A.flags.c_contiguous:
+                # A lower/upper c_contiguous is the same as an upper/lower f_contiguous
+                lower = not lower
+        else:
+            A_copy = _copy_to_fortran_order(A)
 
         B_is_1d = B.ndim == 1
 
-        if not overwrite_b:
-            B_copy = _copy_to_fortran_order(B)
-        else:
+        if overwrite_b and B.flags.f_contiguous:
             B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
 
         if B_is_1d:
             B_copy = np.expand_dims(B_copy, -1)
diff --git a/tests/link/numba/test_basic.py b/tests/link/numba/test_basic.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 
+from pytensor.compile import SymbolicInput
 from tests.tensor.test_math_scipy import scipy
 
 
@@ -261,7 +262,11 @@ def assert_fn(x, y):
                 x, y
             )
 
-    if any(inp.owner is not None for inp in graph_inputs):
+    if any(
+        inp.owner is not None
+        for inp in graph_inputs
+        if not isinstance(inp, SymbolicInput)
+    ):
         raise ValueError("Inputs must be root variables")
 
     pytensor_py_fn = function(
diff --git a/tests/link/numba/test_slinalg.py b/tests/link/numba/test_slinalg.py
@@ -8,8 +8,8 @@
 
 import pytensor
 import pytensor.tensor as pt
-from pytensor import config
-from pytensor.tensor.slinalg import SolveTriangular
+from pytensor import In, config
+from pytensor.tensor.slinalg import Solve, SolveTriangular
 from tests import unittest_tools as utt
 from tests.link.numba.test_basic import compare_numba_and_py
 
@@ -399,75 +399,98 @@ def lu_solve(a, b, trans, overwrite_a, overwrite_b):
     assert_allclose(x, x_sp)
 
 
+@pytest.mark.filterwarnings(
+    'ignore:Cannot cache compiled function "numba_funcified_fgraph"'
+)
 @pytest.mark.parametrize(
     "b_shape",
     [(5, 1), (5, 5), (5,)],
     ids=["b_col_vec", "b_matrix", "b_vec"],
 )
 @pytest.mark.parametrize("assume_a", ["gen", "sym", "pos"], ids=str)
-@pytest.mark.filterwarnings(
-    'ignore:Cannot cache compiled function "numba_funcified_fgraph"'
+@pytest.mark.parametrize(
+    "overwrite_a, overwrite_b",
+    [(False, False), (True, False), (False, True)],
+    ids=["no_overwrite", "overwrite_a", "overwrite_b"],
 )
-def test_solve(b_shape: tuple[int], assume_a: Literal["gen", "sym", "pos"]):
-    A = pt.matrix("A", dtype=floatX)
-    b = pt.tensor("b", shape=b_shape, dtype=floatX)
-
-    A_val = np.asfortranarray(np.random.normal(size=(5, 5)).astype(floatX))
-    b_val = np.asfortranarray(np.random.normal(size=b_shape).astype(floatX))
-
+def test_solve(
+    b_shape: tuple[int],
+    assume_a: Literal["gen", "sym", "pos"],
+    overwrite_a: bool,
+    overwrite_b: bool,
+):
     def A_func(x):
         if assume_a == "pos":
             x = x @ x.T
         elif assume_a == "sym":
             x = (x + x.T) / 2
         return x
 
+    A = pt.matrix("A", dtype=floatX)
+    b = pt.tensor("b", shape=b_shape, dtype=floatX)
+
+    rng = np.random.default_rng(418)
+    A_val = np.asfortranarray(A_func(rng.normal(size=(5, 5))).astype(floatX))
+    b_val = np.asfortranarray(rng.normal(size=b_shape).astype(floatX))
+
     X = pt.linalg.solve(
-        A_func(A),
+        A,
         b,
         assume_a=assume_a,
         b_ndim=len(b_shape),
     )
-    f = pytensor.function(
-        [pytensor.In(A, mutable=True), pytensor.In(b, mutable=True)], X, mode="NUMBA"
-    )
-    op = f.maker.fgraph.outputs[0].owner.op
 
-    compare_numba_and_py([A, b], [X], test_inputs=[A_val, b_val], inplace=True)
-
-    # Calling this is destructive and will rewrite b_val to be the answer. Store copies of the inputs first.
-    A_val_copy = A_val.copy()
-    b_val_copy = b_val.copy()
-
-    X_np = f(A_val, b_val)
-
-    # overwrite_b is preferred when both inputs can be destroyed
-    assert op.destroy_map == {0: [1]}
-
-    # Confirm inputs were destroyed by checking against the copies
-    assert (A_val == A_val_copy).all() == (op.destroy_map.get(0, None) != [0])
-    assert (b_val == b_val_copy).all() == (op.destroy_map.get(0, None) != [1])
-
-    ATOL = 1e-8 if floatX.endswith("64") else 1e-4
-    RTOL = 1e-8 if floatX.endswith("64") else 1e-4
-
-    # Confirm b_val is used to store to solution
-    np.testing.assert_allclose(X_np, b_val, atol=ATOL, rtol=RTOL)
-    assert not np.allclose(b_val, b_val_copy)
-
-    # Test that the result is numerically correct. Need to use the unmodified copy
-    np.testing.assert_allclose(
-        A_func(A_val_copy) @ X_np, b_val_copy, atol=ATOL, rtol=RTOL
+    f, res = compare_numba_and_py(
+        [In(A, mutable=overwrite_a), In(b, mutable=overwrite_b)],
+        X,
+        test_inputs=[A_val, b_val],
+        inplace=True,
+        numba_mode="NUMBA",  # Default numba mode inplace rewrites get triggered
     )
 
-    # See the note in tensor/test_slinalg.py::test_solve_correctness for details about the setup here
-    utt.verify_grad(
-        lambda A, b: pt.linalg.solve(
-            A_func(A), b, lower=False, assume_a=assume_a, b_ndim=len(b_shape)
-        ),
-        [A_val_copy, b_val_copy],
-        mode="NUMBA",
+    op = f.maker.fgraph.outputs[0].owner.op
+    assert isinstance(op, Solve)
+    destroy_map = op.destroy_map
+    if overwrite_a and overwrite_b:
+        raise NotImplementedError(
+            "Test not implemented for symultaneous overwrite_a and overwrite_b, as that's not currently supported by PyTensor"
+        )
+    elif overwrite_a:
+        assert destroy_map == {0: [0]}
+    elif overwrite_b:
+        assert destroy_map == {0: [1]}
+    else:
+        assert destroy_map == {}
+
+    # Test inputs are destroyed if possible
+    A_val_f_contig = np.copy(A_val, order="F")
+    b_val_f_contig = np.copy(b_val, order="F")
+    res_f_contig = f(A_val_f_contig, b_val_f_contig)
+    np.testing.assert_allclose(res_f_contig, res)
+    assert (A_val == A_val_f_contig).all() == (not overwrite_a)
+    assert (b_val == b_val_f_contig).all() == (not overwrite_b)
+
+    # Test right results even if input cannot be destroyed because it is not F-contiguous
+    A_val_c_contig = np.copy(A_val, order="C")
+    b_val_c_contig = np.copy(b_val, order="C")
+    res_c_contig = f(A_val_c_contig, b_val_c_contig)
+    np.testing.assert_allclose(res_c_contig, res)
+    # We can actually destroy either C or F-contiguous arrays
+    assert np.allclose(A_val_c_contig, A_val) == (
+        not (overwrite_a and assume_a in ("sym", "pos"))
     )
+    # Vectors are always f_contiguous if also c_contiguous
+    assert np.allclose(b_val_c_contig, b_val) == (
+        not (overwrite_b and b_val_c_contig.flags.f_contiguous)
+    )
+
+    # Test right results if inputs are not contiguous in either format
+    A_val_not_contig = np.repeat(A_val, 2, axis=0)[::2]
+    b_val_not_contig = np.repeat(b_val, 2, axis=0)[::2]
+    res_not_contig = f(A_val_not_contig, b_val_not_contig)
+    np.testing.assert_allclose(res_not_contig, res)
+    np.testing.assert_allclose(A_val_not_contig, A_val)
+    np.testing.assert_allclose(b_val_not_contig, b_val)
 
 
 @pytest.mark.parametrize(