Add RootOp, fix gradient tests (they are failing)

jessegrabowski · jessegrabowski · commit 92db737e5e0c · 2025-02-02T00:09:38.000+08:00
diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
@@ -1,55 +1,43 @@
+from collections.abc import Sequence
 from copy import copy
 
 from scipy.optimize import minimize as scipy_minimize
+from scipy.optimize import root as scipy_root
 
-from pytensor import function, graph_replace
-from pytensor.gradient import grad
+from pytensor import Variable, function, graph_replace
+from pytensor.gradient import grad, jacobian
 from pytensor.graph import Apply, Constant, FunctionGraph
 from pytensor.graph.basic import truncated_graph_inputs
 from pytensor.graph.op import ComputeMapType, HasInnerGraph, Op, StorageMapType
 from pytensor.scalar import bool as scalar_bool
+from pytensor.tensor.basic import atleast_2d
+from pytensor.tensor.slinalg import solve
+from pytensor.tensor.variable import TensorVariable
 
 
-class MinimizeOp(Op, HasInnerGraph):
-    def __init__(
-        self,
-        x,
-        *args,
-        output,
-        method="BFGS",
-        jac=True,
-        hess=False,
-        hessp=False,
-        options: dict | None = None,
-        debug: bool = False,
-    ):
-        self.fgraph = FunctionGraph([x, *args], [output])
+class ScipyWrapperOp(Op, HasInnerGraph):
+    """Shared logic for scipy optimization ops"""
 
-        if jac:
-            grad_wrt_x = grad(self.fgraph.outputs[0], self.fgraph.inputs[0])
-            self.fgraph.add_output(grad_wrt_x)
-
-        self.jac = jac
-        self.hess = hess
-        self.hessp = hessp
-
-        self.method = method
-        self.options = options if options is not None else {}
-        self.debug = debug
-        self._fn = None
-        self._fn_wrapped = None
+    __props__ = ("method", "debug")
 
     def build_fn(self):
+        """
+        This is overloaded because scipy converts scalar inputs to lists, changing the return type. The
+        wrapper function logic is there to handle this.
+        """
+        # TODO: Introduce rewrites to change MinimizeOp to MinimizeScalarOp and RootOp to RootScalarOp
+        #  when x is scalar. That will remove the need for the wrapper.
+
         outputs = self.inner_outputs
         if len(outputs) == 1:
             outputs = outputs[0]
         self._fn = fn = function(self.inner_inputs, outputs)
-        self.fgraph = (
-            fn.maker.fgraph
-        )  # So we see the compiled graph ater the first call
+
+        # Do this reassignment to see the compiled graph in the dprint
+        self.fgraph = fn.maker.fgraph
 
         if self.inner_inputs[0].type.shape == ():
-            # Work-around for scipy changing the type of x
+
             def fn_wrapper(x, *args):
                 return fn(x.squeeze(), *args)
 
@@ -90,21 +78,51 @@ def prepare_node(
         impl: str | None,
     ):
         """Trigger the compilation of the inner fgraph so it shows in the dprint before the first call"""
-        # TODO: Implemet this method
+        self.build_fn()
 
     def make_node(self, *inputs):
         assert len(inputs) == len(self.inner_inputs)
 
         return Apply(
-            self, inputs, [self.inner_outputs[0].type(), scalar_bool("success")]
+            self, inputs, [self.inner_inputs[0].type(), scalar_bool("success")]
         )
 
+
+class MinimizeOp(ScipyWrapperOp):
+    __props__ = ("method", "jac", "hess", "hessp", "debug")
+
+    def __init__(
+        self,
+        x,
+        *args,
+        objective,
+        method="BFGS",
+        jac=True,
+        hess=False,
+        hessp=False,
+        options: dict | None = None,
+        debug: bool = False,
+    ):
+        self.fgraph = FunctionGraph([x, *args], [objective])
+
+        if jac:
+            grad_wrt_x = grad(self.fgraph.outputs[0], self.fgraph.inputs[0])
+            self.fgraph.add_output(grad_wrt_x)
+
+        self.jac = jac
+        self.hess = hess
+        self.hessp = hessp
+
+        self.method = method
+        self.options = options if options is not None else {}
+        self.debug = debug
+        self._fn = None
+        self._fn_wrapped = None
+
     def perform(self, node, inputs, outputs):
         f = self.fn_wrapped
         x0, *args = inputs
 
-        # print(f(*inputs))
-
         res = scipy_minimize(
             fun=f,
             jac=self.jac,
@@ -113,8 +131,10 @@ def perform(self, node, inputs, outputs):
             method=self.method,
             **self.options,
         )
+
         if self.debug:
             print(res)
+
         outputs[0][0] = res.x
         outputs[1][0] = res.success
 
@@ -128,16 +148,12 @@ def L_op(self, inputs, outputs, output_grads):
 
         inner_grads = grad(inner_fx, [inner_x, *inner_args])
 
-        # TODO: Does clone replace do what we want? It might need a merge optimization pass afterwards
         replace = dict(zip(self.fgraph.inputs, (x_star, *args), strict=True))
+
         grad_f_wrt_x_star, *grad_f_wrt_args = graph_replace(
             inner_grads, replace=replace
         )
 
-        # # TODO: If scipy optimizer uses hessian (or hessp), just store it from the inner function
-        # inner_hess = jacobian(inner_fx, inner_args)
-        # hess_f_x = clone_replace(inner_hess, replace=replace)
-
         grad_wrt_args = [
             -grad_f_wrt_arg / grad_f_wrt_x_star * output_grad
             for grad_f_wrt_arg in grad_f_wrt_args
@@ -192,9 +208,108 @@ def minimize(
     ]
 
     minimize_op = MinimizeOp(
-        x, *args, output=objective, method=method, jac=jac, debug=debug, options=options
+        x,
+        *args,
+        objective=objective,
+        method=method,
+        jac=jac,
+        debug=debug,
+        options=options,
     )
+
     return minimize_op(x, *args)
 
 
-__all__ = ["minimize"]
+class RootOp(ScipyWrapperOp):
+    __props__ = ("method", "jac", "debug")
+
+    def __init__(
+        self,
+        variables,
+        *args,
+        equations,
+        method="hybr",
+        jac=True,
+        options: dict | None = None,
+        debug: bool = False,
+    ):
+        self.fgraph = FunctionGraph([variables, *args], [equations])
+
+        if jac:
+            jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0])
+            self.fgraph.add_output(atleast_2d(jac_wrt_x))
+
+        self.jac = jac
+
+        self.method = method
+        self.options = options if options is not None else {}
+        self.debug = debug
+        self._fn = None
+        self._fn_wrapped = None
+
+    def perform(self, node, inputs, outputs):
+        f = self.fn_wrapped
+        variables, *args = inputs
+
+        res = scipy_root(
+            fun=f,
+            jac=self.jac,
+            x0=variables,
+            args=tuple(args),
+            method=self.method,
+            **self.options,
+        )
+
+        if self.debug:
+            print(res)
+
+        outputs[0][0] = res.x
+        outputs[1][0] = res.success
+
+    def L_op(
+        self,
+        inputs: Sequence[Variable],
+        outputs: Sequence[Variable],
+        output_grads: Sequence[Variable],
+    ) -> list[Variable]:
+        # TODO: Broken
+        x, *args = inputs
+        x_star, success = outputs
+        output_grad, _ = output_grads
+
+        inner_x, *inner_args = self.fgraph.inputs
+        inner_fx = self.fgraph.outputs[0]
+
+        inner_jac = jacobian(inner_fx, [inner_x, *inner_args])
+
+        replace = dict(zip(self.fgraph.inputs, (x_star, *args), strict=True))
+        jac_f_wrt_x_star, *jac_f_wrt_args = graph_replace(inner_jac, replace=replace)
+
+        jac_wrt_args = solve(-jac_f_wrt_x_star, output_grad)
+
+        return [x.zeros_like(), jac_wrt_args]
+
+
+def root(
+    equations: TensorVariable,
+    variables: TensorVariable,
+    method: str = "hybr",
+    jac: bool = True,
+    debug: bool = False,
+):
+    """Find roots of a system of equations using scipy.optimize.root."""
+
+    args = [
+        arg
+        for arg in truncated_graph_inputs([equations], [variables])
+        if (arg is not variables and not isinstance(arg, Constant))
+    ]
+
+    root_op = RootOp(
+        variables, *args, equations=equations, method=method, jac=jac, debug=debug
+    )
+
+    return root_op(variables, *args)
+
+
+__all__ = ["minimize", "root"]
diff --git a/tests/tensor/test_optimize.py b/tests/tensor/test_optimize.py
@@ -1,8 +1,9 @@
 import numpy as np
 
+import pytensor
 import pytensor.tensor as pt
 from pytensor import config
-from pytensor.tensor.optimize import minimize
+from pytensor.tensor.optimize import minimize, root
 from tests import unittest_tools as utt
 
 
@@ -19,6 +20,7 @@ def test_simple_minimize():
     out = (x - b * c) ** 2
 
     minimized_x, success = minimize(out, x)
+    minimized_x.dprint()
 
     a_val = 2.0
     c_val = 3.0
@@ -43,7 +45,6 @@ def rosenbrock_shifted_scaled(x, a, b):
     b = pt.scalar("b")
 
     objective = rosenbrock_shifted_scaled(x, a, b)
-
     minimized_x, success = minimize(objective, x, method="BFGS")
 
     a_val = 0.5
@@ -56,4 +57,64 @@ def rosenbrock_shifted_scaled(x, a, b):
         x_star_val, np.ones_like(x_star_val), atol=1e-6, rtol=1e-6
     )
 
-    utt.verify_grad(rosenbrock_shifted_scaled, [x0, a_val, b_val], eps=1e-6)
+    def f(x, a, b):
+        objective = rosenbrock_shifted_scaled(x, a, b)
+        out = minimize(objective, x)[0]
+        return out
+
+    utt.verify_grad(f, [x0, a_val, b_val], eps=1e-6)
+
+
+def test_root_simple():
+    x = pt.scalar("x")
+    a = pt.scalar("a")
+
+    def fn(x, a):
+        return x + 2 * a * pt.cos(x)
+
+    f = fn(x, a)
+    root_f, success = root(f, x)
+    func = pytensor.function([x, a], [root_f, success])
+
+    x0 = 0.0
+    a_val = 1.0
+    solution, success = func(x0, a_val)
+
+    assert success
+    np.testing.assert_allclose(solution, -1.02986653, atol=1e-6, rtol=1e-6)
+
+    def root_fn(x, a):
+        f = fn(x, a)
+        return root(f, x)[0]
+
+    utt.verify_grad(root_fn, [x0, a_val], eps=1e-6)
+
+
+def test_root_system_of_equations():
+    x = pt.dvector("x")
+    a = pt.dvector("a")
+    b = pt.dvector("b")
+
+    f = pt.stack([a[0] * x[0] * pt.cos(x[1]) - b[0], x[0] * x[1] - a[1] * x[1] - b[1]])
+
+    root_f, success = root(f, x, debug=True)
+    func = pytensor.function([x, a, b], [root_f, success])
+
+    x0 = np.array([1.0, 1.0])
+    a_val = np.array([1.0, 1.0])
+    b_val = np.array([4.0, 5.0])
+    solution, success = func(x0, a_val, b_val)
+
+    assert success
+
+    np.testing.assert_allclose(
+        solution, np.array([6.50409711, 0.90841421]), atol=1e-6, rtol=1e-6
+    )
+
+    def root_fn(x, a, b):
+        f = pt.stack(
+            [a[0] * x[0] * pt.cos(x[1]) - b[0], x[0] * x[1] - a[1] * x[1] - b[1]]
+        )
+        return root(f, x)[0]
+
+    utt.verify_grad(root_fn, [x0, a_val, b_val], eps=1e-6)