LUSolve Op (potentially useless)

jessegrabowski · jessegrabowski · commit 94d3aa3fb5e3 · 2025-03-18T23:45:32.000+08:00
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
@@ -11,7 +11,7 @@
 import pytensor
 import pytensor.tensor as pt
 from pytensor.gradient import DisconnectedType
-from pytensor.graph.basic import Apply
+from pytensor.graph.basic import Apply, Variable
 from pytensor.graph.op import Op
 from pytensor.tensor import TensorLike, as_tensor_variable
 from pytensor.tensor import basic as ptb
@@ -733,6 +733,103 @@ def lu_factor(
     )
 
 
+class LUSolve(Op):
+    """
+    Solve a system of linear equations given the LU factorization of the matrix.
+    """
+
+    __props__ = ("trans", "overwrite_b", "check_finite", "b_ndim")
+
+    def __init__(self, b_ndim, trans=False, overwrite_b=False, check_finite=True):
+        self.trans = trans
+        self.overwrite_b = overwrite_b
+        self.check_finite = check_finite
+
+        assert b_ndim in (1, 2)
+        self.b_ndim = b_ndim
+
+        if b_ndim == 1:
+            self.gufunc_signature = "(m,m),(m),(m)->(m)"
+        else:
+            self.gufunc_signature = "(m,m),(m),(m,n)->(m,n)"
+
+        if overwrite_b:
+            self.destroy_map = {0: [2]}
+
+    def make_node(self, LU, pivots, b):
+        LU = as_tensor_variable(LU)
+        pivots = as_tensor_variable(pivots)
+        b = as_tensor_variable(b)
+
+        if LU.type.ndim != 2:
+            raise TypeError(
+                f"LU only allowed on matrix (2-D) inputs, got {LU.type.ndim}-D input"
+            )
+
+        x = tensor(name="x", shape=b.type.shape, dtype=b.type.dtype)
+        return Apply(self, [LU, pivots, b], [x])
+
+    def infer_shape(self, fgraph, node, shapes):
+        LU_shape, pivot_shape, b_shape = shapes
+        rows = LU_shape[1]
+        if len(b_shape) == 1:
+            return [(rows,)]
+        else:
+            cols = b_shape[1]
+            return [(rows, cols)]
+
+    def inplace_on_inputs(self, allowed_inplace_inputs: list[int]) -> "Op":
+        if 2 in allowed_inplace_inputs:
+            new_props = self._props_dict()  # type: ignore
+            new_props["overwrite_b"] = True
+            return type(self)(**new_props)
+        else:
+            return self
+
+    def perform(self, node, inputs, outputs):
+        LU, pivots, b = inputs
+
+        outputs[0][0] = scipy_linalg.lu_solve(
+            lu_and_piv=(LU, pivots),
+            b=b,
+            check_finite=self.check_finite,
+            trans=self.trans,
+            overwrite_b=self.overwrite_b,
+        )
+
+    def L_op(
+        self,
+        inputs: Sequence[Variable],
+        outputs: Sequence[Variable],
+        output_grads: Sequence[Variable],
+    ) -> list[Variable]:
+        LU, pivots, b = inputs
+        [x] = outputs
+        [x_bar] = output_grads
+
+        p_inv = _pivot_to_permutation(pivots)
+        p = pt.argsort(p_inv)
+        P = ptb.identity_like(LU)[p]
+
+        # We are solving PLUx = b
+        # Forward sensitivity will be dX = (LU)^{-1} (P.T @ db - dLU @ x)
+        # Backward sensitivities are:
+        # B_bar = P @ (LU)^{-T} @ X_bar
+        # LU_bar = (-X @ X_bar.T @ (LU)^{-1}).T = -(LU)^{-T} @ X_bar @ X.T = -P.T @ B_bar @ X.T
+
+        # Note that (P L U)^{-T} = P (LU)^{-T} (because P is orthogonal), so we can just directly lu_solve for b_bar
+        # with trans = not trans
+        new_props = self._props_dict()  # type: ignore
+        new_props["trans"] = not new_props["trans"]
+        b_bar = type(self)(**new_props)(LU, pivots, x_bar)
+        LU_bar = -P.T @ ptm.outer(b_bar, x) if x.ndim == 1 else -P.T @ b_bar @ x.T
+
+        # Pivots are always disconnected; we assume they are locally stable
+        permutations_bar = pt.zeros(pivots.shape, dtype=LU.type.dtype)
+
+        return [LU_bar, permutations_bar, b_bar]
+
+
 def lu_solve(
     LU_and_pivots: tuple[TensorVariable, TensorVariable],
     b: TensorVariable,