Blockwise optimal linear control ops

jessegrabowski · jessegrabowski · commit 118279155b12 · 2024-10-20T19:37:57.000+08:00
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
@@ -778,6 +778,7 @@ def perform(self, node, inputs, outputs):
 
 class SolveContinuousLyapunov(Op):
     __props__ = ()
+    gufunc_signature = "(m,m),(m,m)->(m,m)"
 
     def make_node(self, A, B):
         A = as_tensor_variable(A)
@@ -814,6 +815,8 @@ def grad(self, inputs, output_grads):
 
 
 class BilinearSolveDiscreteLyapunov(Op):
+    gufunc_signature = "(m,m),(m,m)->(m,m)"
+
     def make_node(self, A, B):
         A = as_tensor_variable(A)
         B = as_tensor_variable(B)
@@ -849,84 +852,102 @@ def grad(self, inputs, output_grads):
         return [A_bar, Q_bar]
 
 
-_solve_continuous_lyapunov = SolveContinuousLyapunov()
-_solve_bilinear_direct_lyapunov = cast(typing.Callable, BilinearSolveDiscreteLyapunov())
+_solve_continuous_lyapunov = Blockwise(SolveContinuousLyapunov())
+_solve_bilinear_direct_lyapunov = cast(
+    typing.Callable, Blockwise(BilinearSolveDiscreteLyapunov())
+)
 
 
-def _direct_solve_discrete_lyapunov(A: "TensorLike", Q: "TensorLike") -> TensorVariable:
-    A_ = as_tensor_variable(A)
-    Q_ = as_tensor_variable(Q)
+def _direct_solve_discrete_lyapunov(
+    A: TensorVariable, Q: TensorVariable
+) -> TensorVariable:
+    # By default kron acts on tensors, but we need a vectorized version over matrices for this function
+    vec_kron = pt.vectorize(kron, "(m,n),(o,p)->(q,r)")
 
-    if "complex" in A_.type.dtype:
-        AA = kron(A_, A_.conj())
+    if A.type.dtype.startswith("complex"):
+        AxA = vec_kron(A, A.conj())
     else:
-        AA = kron(A_, A_)
+        AxA = vec_kron(A, A)
+
+    eye = pt.eye(AxA.shape[-1])
+    q_shape = pt.concatenate([Q.shape[:-2], [-1]])
+
+    vec_Q = Q.reshape(q_shape)
+    vec_X = solve(eye - AxA, vec_Q, b_ndim=1)
 
-    X = solve(pt.eye(AA.shape[0]) - AA, Q_.ravel())
-    return cast(TensorVariable, reshape(X, Q_.shape))
+    return cast(TensorVariable, reshape(vec_X, A.shape))
 
 
 def solve_discrete_lyapunov(
-    A: "TensorLike", Q: "TensorLike", method: Literal["direct", "bilinear"] = "direct"
+    A: TensorVariable,
+    Q: TensorVariable,
+    method: Literal["direct", "bilinear"] = "direct",
 ) -> TensorVariable:
     """Solve the discrete Lyapunov equation :math:`A X A^H - X = Q`.
 
     Parameters
     ----------
-    A
-        Square matrix of shape N x N; must have the same shape as Q
-    Q
-        Square matrix of shape N x N; must have the same shape as A
-    method
-        Solver method used, one of ``"direct"`` or ``"bilinear"``. ``"direct"``
-        solves the problem directly via matrix inversion.  This has a pure
-        PyTensor implementation and can thus be cross-compiled to supported
-        backends, and should be preferred when ``N`` is not large. The direct
-        method scales poorly with the size of ``N``, and the bilinear can be
+    A: TensorVariable
+        Square matrix of shape N x N
+    Q: TensorVariable
+        Square matrix of shape N x N
+    method: str, one of ``"direct"`` or ``"bilinear"``
+        Solver method used, . ``"direct"`` solves the problem directly via matrix inversion.  This has a pure
+        PyTensor implementation and can thus be cross-compiled to supported backends, and should be preferred when
+         ``N`` is not large. The direct method scales poorly with the size of ``N``, and the bilinear can be
         used in these cases.
 
     Returns
     -------
-        Square matrix of shape ``N x N``, representing the solution to the
-        Lyapunov equation
+    X: TensorVariable
+        Square matrix of shape ``N x N``. Solution to the Lyapunov equation
 
     """
     if method not in ["direct", "bilinear"]:
         raise ValueError(
             f'Parameter "method" must be one of "direct" or "bilinear", found {method}'
         )
 
+    A = as_tensor_variable(A)
+    Q = as_tensor_variable(Q)
+
     if method == "direct":
         return _direct_solve_discrete_lyapunov(A, Q)
+
     if method == "bilinear":
         return cast(TensorVariable, _solve_bilinear_direct_lyapunov(A, Q))
 
 
-def solve_continuous_lyapunov(A: "TensorLike", Q: "TensorLike") -> TensorVariable:
-    """Solve the continuous Lyapunov equation :math:`A X + X A^H + Q = 0`.
+def solve_continuous_lyapunov(A: TensorVariable, Q: TensorVariable) -> TensorVariable:
+    """
+    Solve the continuous Lyapunov equation :math:`A X + X A^H + Q = 0`.
 
     Parameters
     ----------
-    A
-        Square matrix of shape ``N x N``; must have the same shape as `Q`.
-    Q
-        Square matrix of shape ``N x N``; must have the same shape as `A`.
+    A: TensorVariable
+        Square matrix of shape ``N x N``.
+    Q: TensorVariable
+        Square matrix of shape ``N x N``.
 
     Returns
     -------
-        Square matrix of shape ``N x N``, representing the solution to the
-        Lyapunov equation
+    X: TensorVariable
+        Square matrix of shape ``N x N``
 
     """
 
     return cast(TensorVariable, _solve_continuous_lyapunov(A, Q))
 
 
 class SolveDiscreteARE(pt.Op):
-    __props__ = ("enforce_Q_symmetric",)
+    __props__ = ("enforce_Q_symmetric", "use_bilinear_lyapunov")
+    gufunc_signature = "(m,m),(m,n),(m,m),(n,n)->(m,m)"
 
-    def __init__(self, enforce_Q_symmetric=False):
+    def __init__(
+        self, enforce_Q_symmetric: bool = False, use_bilinear_lyapunov: bool = True
+    ):
         self.enforce_Q_symmetric = enforce_Q_symmetric
+        self.use_bilinear_lyapunov = use_bilinear_lyapunov
 
     def make_node(self, A, B, Q, R):
         A = as_tensor_variable(A)
@@ -961,13 +982,20 @@ def grad(self, inputs, output_grads):
         X = self(A, B, Q, R)
 
         K_inner = R + pt.linalg.matrix_dot(B.T, X, B)
-        K_inner_inv = pt.linalg.solve(K_inner, pt.eye(R.shape[0]))
-        K = matrix_dot(K_inner_inv, B.T, X, A)
+
+        # K_inner is guaranteed to be symmetric, because X and R are symmetric
+        K_inner_inv_BT = pt.linalg.solve(K_inner, B.T, assume_a="sym")
+        K = matrix_dot(K_inner_inv_BT, X, A)
 
         A_tilde = A - B.dot(K)
 
         dX_symm = 0.5 * (dX + dX.T)
-        S = solve_discrete_lyapunov(A_tilde, dX_symm).astype(dX.type.dtype)
+        method: Literal["bilinear", "direct"] = (
+            "bilinear" if self.use_bilinear_lyapunov else "direct"
+        )
+        S = solve_discrete_lyapunov(A_tilde, dX_symm, method=method).astype(
+            dX.type.dtype
+        )
 
         A_bar = 2 * matrix_dot(X, A_tilde, S)
         B_bar = -2 * matrix_dot(X, A_tilde, S, K.T)
@@ -977,30 +1005,43 @@ def grad(self, inputs, output_grads):
         return [A_bar, B_bar, Q_bar, R_bar]
 
 
-def solve_discrete_are(A, B, Q, R, enforce_Q_symmetric=False) -> TensorVariable:
+def solve_discrete_are(
+    A: TensorVariable,
+    B: TensorVariable,
+    Q: TensorVariable,
+    R: TensorVariable,
+    enforce_Q_symmetric: bool = False,
+    use_bilinear_lyapunov: bool = True,
+) -> TensorVariable:
     """
     Solve the discrete Algebraic Riccati equation :math:`A^TXA - X - (A^TXB)(R + B^TXB)^{-1}(B^TXA) + Q = 0`.
 
     Parameters
     ----------
-    A: ArrayLike
+    A: TensorVariable
         Square matrix of shape M x M
-    B: ArrayLike
+    B: TensorVariable
         Square matrix of shape M x M
-    Q: ArrayLike
+    Q: TensorVariable
         Symmetric square matrix of shape M x M
-    R: ArrayLike
+    R: TensorVariable
         Square matrix of shape N x N
     enforce_Q_symmetric: bool
         If True, the provided Q matrix is transformed to 0.5 * (Q + Q.T) to ensure symmetry
+    use_bilinear_lyapunov: bool
+        If True, the bilinear method is used to solve a discrete Lyapunov equation when computing the gradients of
+        the ARE. If False, the direct method is used instead. See the docstring for ``solve_discrete_lyapunov`` for
+        details.
 
     Returns
     -------
-    X: pt.matrix
+    X: TensorVariable
         Square matrix of shape M x M, representing the solution to the DARE
     """
 
-    return cast(TensorVariable, SolveDiscreteARE(enforce_Q_symmetric)(A, B, Q, R))
+    return cast(
+        TensorVariable, Blockwise(SolveDiscreteARE(enforce_Q_symmetric))(A, B, Q, R)
+    )
 
 
 def _largest_common_dtype(tensors: typing.Sequence[TensorVariable]) -> np.dtype:
diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py
@@ -1,5 +1,6 @@
 import functools
 import itertools
+from typing import Literal
 
 import numpy as np
 import pytest
@@ -514,105 +515,106 @@ def test_expm_grad_3():
     utt.verify_grad(expm, [A], rng=rng)
 
 
-def test_solve_discrete_lyapunov_via_direct_real():
-    N = 5
-    rng = np.random.default_rng(utt.fetch_seed())
-    a = pt.dmatrix("a")
-    q = pt.dmatrix("q")
-    f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")])
-
-    A = rng.normal(size=(N, N))
-    Q = rng.normal(size=(N, N))
+def recover_Q(A, X, continuous=True):
+    if continuous:
+        return A @ X + X @ A.conj().T
+    else:
+        return X - A @ X @ A.conj().T
 
-    X = f(A, Q)
-    assert np.allclose(A @ X @ A.T - X + Q, 0.0)
 
-    utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng)
+vec_recover_Q = np.vectorize(recover_Q, signature="(m,m),(m,m),()->(m,m)")
 
 
+@pytest.mark.parametrize("use_complex", [False, True])
+@pytest.mark.parametrize("shape", [(5, 5), (5, 5, 5)], ids=["matrix", "batch"])
+@pytest.mark.parametrize("method", ["direct", "bilinear"])
 @pytest.mark.filterwarnings("ignore::UserWarning")
-def test_solve_discrete_lyapunov_via_direct_complex():
-    # Conj doesn't have C-op; filter the warning.
-
-    N = 5
+def test_solve_discrete_lyapunov(
+    use_complex, shape: tuple[int], method: Literal["direct", "bilinear"]
+):
     rng = np.random.default_rng(utt.fetch_seed())
-    a = pt.zmatrix()
-    q = pt.zmatrix()
-    f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")])
+    dtype = config.floatX
+    if use_complex:
+        precision = int(dtype[-2:])  # 64 or 32
+        dtype = f"complex{int(2 * precision)}"
 
-    A = rng.normal(size=(N, N)) + rng.normal(size=(N, N)) * 1j
-    Q = rng.normal(size=(N, N))
-    X = f(A, Q)
-    np.testing.assert_array_less(A @ X @ A.conj().T - X + Q, 1e-12)
+    a = pt.tensor(name="a", shape=shape, dtype=dtype)
+    q = pt.tensor(name="q", shape=shape, dtype=dtype)
 
-    # TODO: the .conj() method currently does not have a gradient; add this test when gradients are implemented.
-    # utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng)
+    f = function([a, q], solve_discrete_lyapunov(a, q, method=method))
 
-
-def test_solve_discrete_lyapunov_via_bilinear():
-    N = 5
-    rng = np.random.default_rng(utt.fetch_seed())
-    a = pt.dmatrix()
-    q = pt.dmatrix()
-    f = function([a, q], [solve_discrete_lyapunov(a, q, method="bilinear")])
-
-    A = rng.normal(size=(N, N))
-    Q = rng.normal(size=(N, N))
+    A = rng.normal(size=shape)
+    Q = rng.normal(size=shape)
 
     X = f(A, Q)
+    Q_recovered = vec_recover_Q(A, X, continuous=False)
+    np.testing.assert_allclose(Q_recovered, Q)
 
-    np.testing.assert_array_less(A @ X @ A.conj().T - X + Q, 1e-12)
-    utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng)
+    utt.verify_grad(
+        functools.partial(solve_discrete_lyapunov, method=method), pt=[A, Q], rng=rng
+    )
 
 
-def test_solve_continuous_lyapunov():
-    N = 5
+@pytest.mark.parametrize("shape", [(5, 5), (5, 5, 5)], ids=["matrix", "batched"])
+def test_solve_continuous_lyapunov(shape: tuple[int]):
     rng = np.random.default_rng(utt.fetch_seed())
-    a = pt.dmatrix()
-    q = pt.dmatrix()
+    a = pt.tensor(name="a", shape=shape)
+    q = pt.tensor(name="q", shape=shape)
     f = function([a, q], [solve_continuous_lyapunov(a, q)])
 
-    A = rng.normal(size=(N, N))
-    Q = rng.normal(size=(N, N))
+    A = rng.normal(size=shape)
+    Q = rng.normal(size=shape)
     X = f(A, Q)
 
-    Q_recovered = A @ X + X @ A.conj().T
+    Q_recovered = vec_recover_Q(A, X, continuous=True)
 
     np.testing.assert_allclose(Q_recovered.squeeze(), Q)
     utt.verify_grad(solve_continuous_lyapunov, pt=[A, Q], rng=rng)
 
 
-def test_solve_discrete_are_forward():
+@pytest.mark.parametrize("add_batch_dim", [False, True])
+def test_solve_discrete_are_forward(add_batch_dim):
     # TEST CASE 4 : darex #1 -- taken from Scipy tests
     a, b, q, r = (
         np.array([[4, 3], [-4.5, -3.5]]),
         np.array([[1], [-1]]),
         np.array([[9, 6], [6, 4]]),
         np.array([[1]]),
     )
-    a, b, q, r = (x.astype(config.floatX) for x in [a, b, q, r])
+    if add_batch_dim:
+        a, b, q, r = (np.stack([x] * 5) for x in [a, b, q, r])
 
-    x = solve_discrete_are(a, b, q, r).eval()
-    res = a.T.dot(x.dot(a)) - x + q
-    res -= (
-        a.conj()
-        .T.dot(x.dot(b))
-        .dot(np.linalg.solve(r + b.conj().T.dot(x.dot(b)), b.T).dot(x.dot(a)))
-    )
+    a, b, q, r = (pt.as_tensor_variable(x).astype(config.floatX) for x in [a, b, q, r])
+
+    x = solve_discrete_are(a, b, q, r)
+
+    # A^TXA - X - (A^TXB)(R + B^TXB)^{-1}(B^TXA) + Q
+    def eval_fun(a, b, q, r, x):
+        term_1 = a.T @ x @ a
+        term_2 = a.T @ x @ b
+        term_3 = pt.linalg.solve(r + b.T @ x @ b, b.T) @ x @ a
+
+        return term_1 - x - term_2 @ term_3 + q
+
+    res = pt.vectorize(eval_fun, "(m,m),(m,n),(m,m),(n,n),(m,m)->(m,m)")(a, b, q, r, x)
+    res_np = res.eval()
 
     atol = 1e-4 if config.floatX == "float32" else 1e-12
-    np.testing.assert_allclose(res, np.zeros_like(res), atol=atol)
+    np.testing.assert_allclose(res_np, np.zeros_like(res_np), atol=atol)
 
 
-def test_solve_discrete_are_grad():
+@pytest.mark.parametrize("add_batch_dim", [False, True])
+def test_solve_discrete_are_grad(add_batch_dim):
     a, b, q, r = (
         np.array([[4, 3], [-4.5, -3.5]]),
         np.array([[1], [-1]]),
         np.array([[9, 6], [6, 4]]),
         np.array([[1]]),
     )
-    a, b, q, r = (x.astype(config.floatX) for x in [a, b, q, r])
+    if add_batch_dim:
+        a, b, q, r = (np.stack([x] * 5) for x in [a, b, q, r])
 
+    a, b, q, r = (x.astype(config.floatX) for x in [a, b, q, r])
     rng = np.random.default_rng(utt.fetch_seed())
 
     # TODO: Is there a "theoretically motivated" value to use here? I pulled 1e-4 out of a hat