add notes so hackers can see

theorashid · theorashid · commit 57aaf8c1282e · 2024-09-11T13:29:52.000+01:00
diff --git a/pytensor/optimise/fixed_point.py b/pytensor/optimise/fixed_point.py
@@ -1,3 +1,4 @@
+from collections.abc import Callable
 from functools import partial
 
 import pytensor
@@ -37,8 +38,8 @@ def newton_solver(x_prev, *args, func, tol):
 
 
 def fixed_point_solver(
-    f: callable,
-    solver: callable,
+    f: Callable,
+    solver: Callable,
     x0: pt.TensorVariable,
     *args: tuple[pt.Variable, ...],
     max_iter: int = 1000,
diff --git a/tests/optimise/test_fixed_point.py b/tests/optimise/test_fixed_point.py
@@ -1,5 +1,3 @@
-import functools as ft
-
 import jax
 import numpy as np
 from numpy.testing import assert_array_almost_equal
@@ -11,21 +9,26 @@
 jax.config.update("jax_enable_x64", True)
 
 
-def jax_newton_solver(f, z_init):
-    def f_root(z):
-        return f(z) - z
+def jax_newton_solver(f, x0):
+    def f_root(x):
+        return f(x) - x
+
+    def g(x):
+        return x - jax.numpy.linalg.solve(jax.jacobian(f_root)(x), f_root(x))
+
+    return jax_fwd_solver(g, x0)
 
-    def g(z):
-        return z - jax.numpy.linalg.solve(jax.jacobian(f_root)(z), f_root(z))
 
-    return jax_fwd_solver(g, z_init)
+def jax_fwd_solver(f, x0, tol=1e-5):
+    x_prev, x = x0, f(x0)
+    while jax.numpy.linalg.norm(x_prev - x) > tol:
+        x_prev, x = x, f(x)
+    return x
 
 
-def jax_fwd_solver(f, z_init, tol=1e-5):
-    z_prev, z = z_init, f(z_init)
-    while jax.numpy.linalg.norm(z_prev - z) > tol:
-        z_prev, z = z, f(z)
-    return z
+def jax_fixed_point_solver(solver, f, params, x0, **solver_kwargs):
+    x_star = solver(lambda x: f(x, *params), x0=x0, **solver_kwargs)
+    return x_star
 
 
 def test_fixed_point_forward():
@@ -34,7 +37,7 @@ def test_fixed_point_forward():
     def g(x, W, b):
         return pt.tanh(pt.dot(W, x) + b)
 
-    def _jax_g(x, W, b):
+    def jax_g(x, W, b):
         return jax.numpy.tanh(jax.numpy.dot(W, x) + b)
 
     ndim = 10
@@ -43,9 +46,13 @@ def _jax_g(x, W, b):
 
     W, b = np.asarray(W), np.asarray(b)
 
-    jax_g = ft.partial(_jax_g, W=W, b=b)
+    jax_solution = jax_fixed_point_solver(
+        jax_fwd_solver,
+        jax_g,
+        (W, b),
+        x0=jax.numpy.zeros_like(b),
+    )
 
-    jax_solution = jax_fwd_solver(jax_g, jax.numpy.zeros_like(b))
     pytensor_solution, _ = fixed_point_solver(
         g,
         fwd_solver,
@@ -60,7 +67,7 @@ def test_fixed_point_newton():
     def g(x, W, b):
         return pt.tanh(pt.dot(W, x) + b)
 
-    def _jax_g(x, W, b):
+    def jax_g(x, W, b):
         return jax.numpy.tanh(jax.numpy.dot(W, x) + b)
 
     ndim = 10
@@ -69,9 +76,13 @@ def _jax_g(x, W, b):
 
     W, b = np.asarray(W), np.asarray(b)
 
-    jax_g = ft.partial(_jax_g, W=W, b=b)
+    jax_solution = jax_fixed_point_solver(
+        jax_newton_solver,
+        jax_g,
+        (W, b),
+        x0=jax.numpy.zeros_like(b),
+    )
 
-    jax_solution = jax_newton_solver(jax_g, jax.numpy.zeros_like(b))
     pytensor_solution, _ = fixed_point_solver(
         g,
         newton_solver,
@@ -86,3 +97,49 @@ def _jax_g(x, W, b):
 # and adjoint implicit function theorem rewritten grad
 # see the [notes](https://theorashid.github.io/notes/fixed-point-iteration
 # and the [Deep Implicit Layers workshop](https://implicit-layers-tutorial.org/implicit_functions/)
+
+# %%
+# import jax
+# import numpy as np
+
+# def grad_test_fixed_point_forward():
+#     def jax_g(x, W, b):
+#         return jax.numpy.tanh(jax.numpy.dot(W, x) + b)
+
+#     ndim = 10
+#     W = jax.random.normal(jax.random.PRNGKey(0), (ndim, ndim)) / jax.numpy.sqrt(ndim)
+#     b = jax.random.normal(jax.random.PRNGKey(1), (ndim,))
+
+#     W, b = np.asarray(W), np.asarray(b)  # params
+
+#     # gradient of the sum of the outputs with respect to the parameter matrix
+#     jax_grad = jax.grad(
+#         lambda W: jax_fixed_point_solver(
+#             jax_fwd_solver,
+#             jax_g,
+#             (W, b),  # wrt W
+#             x0=jax.numpy.zeros_like(b),
+#         ).sum()
+#     )(W)
+#     print(jax_grad[0])
+
+# grad_test_fixed_point_forward()
+
+#     # params -> W
+#     # z -> x
+#     # x -> b
+#     # f = lambda W, b, x: jnp.tanh(jnp.dot(W, x) + b)
+#     # x_star = solver(lambda x: f(params, b, x), x_init=jnp.zeros_like(b))
+#     # x_star = fixed_point_layer(fwd_solver, f, W, b)
+#     # g = jax.grad(lambda W: fixed_point_layer(fwd_solver, f, W, b).sum())(W)
+# %%
+# def implicit_gradients_vjp(solver, f, res, x_soln):
+#     params, x, x_star = res
+#     # find adjoint u^T via solver
+#     # u^T = w^T + u^T \delta_{x_star} f(x_star, params)
+#     _, vjp_x = jax.vjp(lambda : f(x, *params), x_star)  # diff wrt x
+#     _, vjp_par = jax.vjp(lambda params: f(x, *params), *params)  # diff wrt params
+#     u = solver(lambda u: vjp_x(u)[0] + x_soln, x0=jax.numpy.zeros_like(x_soln))
+
+#     # then compute vjp u^T \delta_{params} f(x_star, params)
+#     return vjp_par(u)