Improving/debugging solve tests

jessegrabowski · jessegrabowski · commit f8c9d7e7365b · 2025-02-16T17:07:27.000+08:00
diff --git a/tests/link/numba/test_slinalg.py b/tests/link/numba/test_slinalg.py
@@ -1,9 +1,11 @@
 import re
 from functools import partial
+from typing import Literal
 
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
+from scipy import linalg as scipy_linalg
 
 import pytensor
 import pytensor.tensor as pt
@@ -31,59 +33,79 @@ def transpose_func(x, trans):
 
 
 @pytest.mark.parametrize(
-    "b_func, b_size",
-    [(pt.matrix, (5, 1)), (pt.matrix, (5, 5)), (pt.vector, (5,))],
+    "b_shape",
+    [(5, 1), (5, 5), (5,)],
     ids=["b_col_vec", "b_matrix", "b_vec"],
 )
 @pytest.mark.parametrize("lower", [True, False], ids=["lower=True", "lower=False"])
 @pytest.mark.parametrize("trans", [0, 1, 2], ids=["trans=N", "trans=C", "trans=T"])
 @pytest.mark.parametrize(
     "unit_diag", [True, False], ids=["unit_diag=True", "unit_diag=False"]
 )
-@pytest.mark.parametrize("complex", [True, False], ids=["complex", "real"])
+@pytest.mark.parametrize("is_complex", [True, False], ids=["complex", "real"])
 @pytest.mark.filterwarnings(
     'ignore:Cannot cache compiled function "numba_funcified_fgraph"'
 )
-def test_solve_triangular(b_func, b_size, lower, trans, unit_diag, complex):
-    if complex:
+def test_solve_triangular(b_shape: tuple[int], lower, trans, unit_diag, is_complex):
+    if is_complex:
         # TODO: Complex raises ValueError: To change to a dtype of a different size, the last axis must be contiguous,
         #  why?
         pytest.skip("Complex inputs currently not supported to solve_triangular")
 
     complex_dtype = "complex64" if floatX.endswith("32") else "complex128"
-    dtype = complex_dtype if complex else floatX
+    dtype = complex_dtype if is_complex else floatX
 
     A = pt.matrix("A", dtype=dtype)
-    b = b_func("b", dtype=dtype)
+    b = pt.tensor("b", shape=b_shape, dtype=dtype)
+
+    def A_func(x):
+        x = x @ x.conj().T
+        x_tri = scipy_linalg.cholesky(x, lower=lower).astype(dtype)
+
+        if unit_diag:
+            x_tri[np.diag_indices_from(x_tri)] = 1.0
 
-    X = pt.linalg.solve_triangular(
-        A, b, lower=lower, trans=trans, unit_diagonal=unit_diag
+        return x_tri.astype(dtype)
+
+    solve_op = partial(
+        pt.linalg.solve_triangular, lower=lower, trans=trans, unit_diagonal=unit_diag
     )
+
+    X = solve_op(A, b)
     f = pytensor.function([A, b], X, mode="NUMBA")
 
     A_val = np.random.normal(size=(5, 5))
-    b_val = np.random.normal(size=b_size)
+    b_val = np.random.normal(size=b_shape)
 
-    if complex:
+    if is_complex:
         A_val = A_val + np.random.normal(size=(5, 5)) * 1j
-        b_val = b_val + np.random.normal(size=b_size) * 1j
-    A_sym = A_val @ A_val.conj().T
+        b_val = b_val + np.random.normal(size=b_shape) * 1j
 
-    A_tri = np.linalg.cholesky(A_sym).astype(dtype)
-    if unit_diag:
-        adj_mat = np.ones((5, 5))
-        adj_mat[np.diag_indices(5)] = 1 / np.diagonal(A_tri)
-        A_tri = A_tri * adj_mat
+    X_np = f(A_func(A_val.copy()), b_val.copy())
 
-    A_tri = A_tri.astype(dtype)
-    b_val = b_val.astype(dtype)
+    test_input = transpose_func(A_func(A_val.copy()), trans)
+    np.testing.assert_allclose(test_input @ X_np, b_val, atol=ATOL, rtol=RTOL)
 
-    if not lower:
-        A_tri = A_tri.T
+    compare_numba_and_py(f.maker.fgraph, [A_func(A_val.copy()), b_val.copy()])
 
-    X_np = f(A_tri, b_val)
-    np.testing.assert_allclose(
-        transpose_func(A_tri, trans) @ X_np, b_val, atol=ATOL, rtol=RTOL
+    # utt.verify_grad uses small perturbations to the input matrix to calculate the finite difference gradient. When
+    # a non-triangular matrix is passed to scipy.linalg.solve_triangular, no error is raise, but the result will be
+    # wrong, resulting in wrong gradients. As a result, it is necessary to add a mapping from the space of all matrices
+    # to the space of triangular matrices, and test the gradient of that entire graph.
+    def A_func_pt(x):
+        x = x @ x.conj().T
+        x_tri = pt.linalg.cholesky(x, lower=lower).astype(dtype)
+
+        if unit_diag:
+            n = A_val.shape[0]
+            x_tri = x_tri[np.diag_indices(n)].set(1.0)
+
+        return transpose_func(x_tri.astype(dtype), trans)
+
+    utt.verify_grad(
+        lambda A, b: solve_op(A_func_pt(A), b),
+        [A_val.copy(), b_val.copy()],
+        mode="NUMBA",
     )
 
 
@@ -166,7 +188,8 @@ def test_numba_Cholesky_grad(lower, trans):
     L = rng.normal(size=(5, 5)).astype(floatX)
     X = L @ L.T
 
-    utt.verify_grad(pt.linalg.cholesky, [X])
+    chol_op = partial(pt.linalg.cholesky, lower=lower, trans=trans)
+    utt.verify_grad(chol_op, [X], mode="NUMBA")
 
 
 def test_block_diag():
@@ -319,69 +342,72 @@ def lu_solve(a, b, trans, overwrite_a, overwrite_b):
 
 
 @pytest.mark.parametrize(
-    "b_func, b_size",
-    [(pt.matrix, (5, 1)), (pt.matrix, (5, 5)), (pt.vector, (5,))],
+    "b_shape",
+    [(5, 1), (5, 5), (5,)],
     ids=["b_col_vec", "b_matrix", "b_vec"],
 )
 @pytest.mark.parametrize("assume_a", ["gen", "sym", "pos"], ids=str)
-@pytest.mark.parametrize("transposed", [True, False], ids=["trans", "no_trans"])
 @pytest.mark.filterwarnings(
     'ignore:Cannot cache compiled function "numba_funcified_fgraph"'
 )
-def test_solve(b_func, b_size, assume_a, transposed):
+def test_solve(b_shape: tuple[int], assume_a: Literal["gen", "sym", "pos"]):
     A = pt.matrix("A", dtype=floatX)
-    b = b_func("b", dtype=floatX)
+    b = pt.tensor("b", shape=b_shape, dtype=floatX)
+
+    A_val = np.asfortranarray(np.random.normal(size=(5, 5)).astype(floatX))
+    b_val = np.asfortranarray(np.random.normal(size=b_shape).astype(floatX))
+
+    def A_func(x):
+        if assume_a == "pos":
+            x = x.T @ x
+        elif assume_a == "sym":
+            x = (x.T + x) / 2
+
+        return x
 
     X = pt.linalg.solve(
-        A,
+        A_func(A),
         b,
-        lower=False,
         assume_a=assume_a,
-        transposed=transposed,
-        b_ndim=len(b_size),
+        b_ndim=len(b_shape),
     )
     f = pytensor.function(
         [pytensor.In(A, mutable=True), pytensor.In(b, mutable=True)], X, mode="NUMBA"
     )
+    op = f.maker.fgraph.outputs[0].owner.op
 
-    A_val = np.random.normal(size=(5, 5)).astype(floatX)
-
-    if assume_a in ["sym", "pos"]:
-        A_val = A_val @ A_val.conj().T
-    A_val = np.asfortranarray(A_val)
-
-    b_val = np.random.normal(size=b_size)
-    b_val = b_val.astype(floatX)
-    b_val = np.asfortranarray(b_val)
+    compare_numba_and_py(f.maker.fgraph, inputs=[A_func(A_val.copy()), b_val.copy()])
 
+    # Calling this is destructive and will rewrite b_val to be the answer. Store copies of the inputs first.
     A_val_copy = A_val.copy()
     b_val_copy = b_val.copy()
 
-    X_np = f(A_val, b_val)
-    op = f.maker.fgraph.outputs[0].owner.op
+    X_np = f(A_func(A_val), b_val)
 
     # overwrite_b is preferred when both inputs can be destroyed
     assert op.destroy_map == {0: [1]}
 
-    # Test that the result is numerically correct
-    np.testing.assert_allclose(
-        transpose_func(A_val_copy, transposed) @ X_np, b_val_copy, atol=ATOL, rtol=RTOL
-    )
-
-    # Confirm input was destroyed
+    # Confirm inputs were destroyed by checking against the copies
     assert (A_val == A_val_copy).all() == (op.destroy_map.get(0, None) != [0])
     assert (b_val == b_val_copy).all() == (op.destroy_map.get(0, None) != [1])
 
-    # Test gradients
-    solve = partial(
-        pt.linalg.solve,
-        lower=False,
-        assume_a=assume_a,
-        transposed=transposed,
-        b_ndim=len(b_size),
+    # Confirm b_val is used to store to solution
+    np.testing.assert_allclose(X_np, b_val, atol=ATOL, rtol=RTOL)
+    assert not np.allclose(b_val, b_val_copy)
+
+    # Test that the result is numerically correct. Need to use the unmodified copy
+    np.testing.assert_allclose(
+        A_func(A_val_copy) @ X_np, b_val_copy, atol=ATOL, rtol=RTOL
     )
 
-    utt.verify_grad(solve, [A_val_copy, b_val_copy], mode="NUMBA")
+    # See the note in tensor/test_slinalg.py::test_solve_correctness for details about the setup here
+    utt.verify_grad(
+        lambda A, b: pt.linalg.solve(
+            A_func(A), b, lower=False, assume_a=assume_a, b_ndim=len(b_shape)
+        ),
+        [A_val_copy, b_val_copy],
+        mode="NUMBA",
+    )
 
 
 @pytest.mark.parametrize(