From 1955bd1606d31b50f1cb76681a4bcbacd5b20704 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Mon, 30 Jan 2023 15:28:35 -0500
Subject: [PATCH 01/10] add quad example

---
 scratch/t.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100755 scratch/t.py

diff --git a/scratch/t.py b/scratch/t.py
new file mode 100755
index 000000000..b96b9c14e
--- /dev/null
+++ b/scratch/t.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import numpy as np
+import torch
+
+import theseus as th
+
+torch.manual_seed(0)
+
+def f(optim_vars, aux_vars):
+    (x,) = optim_vars
+    return x.tensor
+
+n_dim = 2
+x = th.Vector(n_dim, name="x")
+optim_vars = [x]
+cost_function = th.AutoDiffCostFunction(
+    optim_vars,
+    f,
+    n_dim,
+    name="cost_fn",
+)
+objective = th.Objective()
+objective.add(cost_function)
+optimizer = th.GaussNewton(
+    objective,
+    max_iterations=15,
+    step_size=1.0,
+)
+
+theseus_inputs = {
+    "x": torch.ones([1, n_dim]).requires_grad_(),
+}
+theseus_optim = th.TheseusLayer(optimizer)
+updated_inputs, info = theseus_optim.forward(
+    theseus_inputs,
+)
+
+print(updated_inputs) #, info)

From 0c98ad82f0005a3d40124b29b0759502a6ea5a5d Mon Sep 17 00:00:00 2001
From: Dishank Bansal <dishankbansal09@gmail.com>
Date: Mon, 30 Jan 2023 13:14:13 -0800
Subject: [PATCH 02/10] completed example

---
 scratch/quadratic.py | 86 ++++++++++++++++++++++++++++++++++++++++++++
 scratch/t.py         | 44 -----------------------
 2 files changed, 86 insertions(+), 44 deletions(-)
 create mode 100755 scratch/quadratic.py
 delete mode 100755 scratch/t.py

diff --git a/scratch/quadratic.py b/scratch/quadratic.py
new file mode 100755
index 000000000..cb148548c
--- /dev/null
+++ b/scratch/quadratic.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# import numpy as np
+import torch
+
+import theseus as th
+import sys
+from IPython.core import ultratb
+
+sys.excepthook = ultratb.FormattedTB(mode="Plain", color_scheme="Neutral", call_pdb=1)
+
+torch.manual_seed(0)
+
+
+def g(x):
+    # return x[..., 0] - 1
+    return x - 1
+
+
+def aug_lagrang(mu, z):
+    # def g(optim_vars):
+    #     (x,) = optim_vars
+    #     return x.tensor[..., 0] - 1
+
+    def f(optim_vars, aux_vars):
+        (x,) = optim_vars
+        mu, z = aux_vars
+        augment = torch.sqrt(mu.tensor) * g(x.tensor) + z.tensor / (
+            2.0 * torch.sqrt(mu.tensor)
+        )
+        cost = torch.cat([x.tensor, augment], axis=-1)
+        return cost
+
+    n_dim = 2
+    n_constraints = z.shape[-1]
+    x = th.Vector(n_dim, name="x")
+    # mu = th.Variable(torch.ones(1, 1) * 1000, name="mu")
+    # z = th.Variable(torch.ones(1, 1) * 1, name="z")
+    optim_vars = [x]
+    aux_vars = [mu, z]
+    cost_function = th.AutoDiffCostFunction(
+        optim_vars,
+        f,
+        n_dim + n_constraints,
+        aux_vars=aux_vars,
+        name="cost_fn",
+    )
+    objective = th.Objective()
+    objective.add(cost_function)
+    optimizer = th.GaussNewton(
+        objective,
+        max_iterations=15,
+        step_size=1.0,
+    )
+
+    theseus_inputs = {
+        "x": torch.ones([1, n_dim]).requires_grad_(),
+    }
+    theseus_optim = th.TheseusLayer(optimizer)
+    updated_inputs, info = theseus_optim.forward(
+        theseus_inputs,
+    )
+
+    # print(updated_inputs)  # , info)
+    return updated_inputs["x"]
+
+
+mu = th.Variable(torch.ones(1, 1), name="mu")
+z = th.Variable(torch.zeros(1, 2), name="z")
+
+prev_g_norm = 0
+# print(x)
+for iter in range(10):
+    x = aug_lagrang(mu, z)
+    g_x = g(x)
+    z.tensor = z.tensor + 2 * mu.tensor * g_x
+    g_norm = g_x.norm()
+    if g_norm > 0.25 * prev_g_norm:
+        mu.tensor = 2 * mu.tensor
+    prev_g_norm = g_norm
+    print(x, mu, z)
diff --git a/scratch/t.py b/scratch/t.py
deleted file mode 100755
index b96b9c14e..000000000
--- a/scratch/t.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-import numpy as np
-import torch
-
-import theseus as th
-
-torch.manual_seed(0)
-
-def f(optim_vars, aux_vars):
-    (x,) = optim_vars
-    return x.tensor
-
-n_dim = 2
-x = th.Vector(n_dim, name="x")
-optim_vars = [x]
-cost_function = th.AutoDiffCostFunction(
-    optim_vars,
-    f,
-    n_dim,
-    name="cost_fn",
-)
-objective = th.Objective()
-objective.add(cost_function)
-optimizer = th.GaussNewton(
-    objective,
-    max_iterations=15,
-    step_size=1.0,
-)
-
-theseus_inputs = {
-    "x": torch.ones([1, n_dim]).requires_grad_(),
-}
-theseus_optim = th.TheseusLayer(optimizer)
-updated_inputs, info = theseus_optim.forward(
-    theseus_inputs,
-)
-
-print(updated_inputs) #, info)

From 90f24452b00ce932d81c4cc8fd2a913d30c76d2d Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Mon, 30 Jan 2023 17:04:30 -0500
Subject: [PATCH 03/10] add dx

---
 scratch/dx.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100755 scratch/dx.py

diff --git a/scratch/dx.py b/scratch/dx.py
new file mode 100755
index 000000000..0582f019b
--- /dev/null
+++ b/scratch/dx.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+import torch
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+
+x = torch.Tensor([0., 0. ,0.])
+N = 10
+u = torch.ones(N,2)
+
+L = 0.1
+
+def f(x, u):
+    h = 0.1
+    new_x = x.clone()
+    new_x[0] += h*u[0]*torch.cos(x[2])
+    new_x[1] += h*u[0]*torch.sin(x[2])
+    new_x[2] += h*u[0]*torch.tan(u[1] / L)
+    return new_x
+
+xs = [x.clone()]
+for i in range(N):
+    x = f(x, u[i])
+    xs.append(x.clone())
+    print(x)
+
+
+
+fig, ax = plt.subplots(figsize=(6,6))
+for x in xs:
+    p = x[:2]
+    theta = x[2]
+    ax.scatter(p[0], p[1], color='k')
+    width = 0.05
+    rect = patches.Rectangle(
+         p, L, .5*width, linewidth=1,
+        facecolor='grey', alpha=0.5)
+    t2 = mpl.transforms.Affine2D().rotate_around(p[0], p[1], theta) + ax.transData
+    rect.set_transform(t2)
+    ax.add_patch(rect)
+
+    rect = patches.Rectangle(
+         p, L, -.5*width, linewidth=1,
+        facecolor='grey', alpha=0.5)
+    t2 = mpl.transforms.Affine2D().rotate_around(p[0], p[1], theta) + ax.transData
+    rect.set_transform(t2)
+    ax.add_patch(rect)
+
+# ax.set_xlim(-.5, .5)
+# ax.set_ylim(0, 1)
+ax.axis('equal')
+fig.savefig('t.png')

From 3f0ef20d3e13310453f066467675a12ffffa09cd Mon Sep 17 00:00:00 2001
From: Dishank Bansal <dishankbansal09@gmail.com>
Date: Mon, 30 Jan 2023 15:11:45 -0800
Subject: [PATCH 04/10] car example added, needs debugging

---
 scratch/car.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100755 scratch/car.py

diff --git a/scratch/car.py b/scratch/car.py
new file mode 100755
index 000000000..6d2485f21
--- /dev/null
+++ b/scratch/car.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# import numpy as np
+import torch
+
+import theseus as th
+import sys
+from IPython.core import ultratb
+
+sys.excepthook = ultratb.FormattedTB(mode="Plain", color_scheme="Neutral", call_pdb=1)
+
+torch.manual_seed(0)
+
+N = 10
+L = 0.1
+x_init = torch.zeros(3)
+x_final = torch.tensor([0, 0.5, 0])
+h = 0.1
+
+
+def g(us, xs):
+    constraints = [dx(x_init, us[0][0]) - xs[0]]
+    for idx in range(N - 2):
+        constraints.append(dx(xs[idx][0], us[idx][0]) - xs[idx + 1])
+    constraints.append((dx(xs[-1][0], us[-1][0]) - x_final).unsqueeze(0))
+    return torch.cat(constraints).ravel()
+
+
+def dx(x, u):
+    # new_x = x.clone()
+    new_x = [
+        x[0] + (h * u[0] * torch.cos(x[2])),
+        x[1] + h * u[0] * torch.sin(x[2]),
+        x[2] + h * u[0] * torch.tan(u[1] / L),
+    ]
+    return torch.stack(new_x)
+
+
+def aug_lagrang(mu, z):
+    # def g(optim_vars):
+    #     (x,) = optim_vars
+    #     return x.tensor[..., 0] - 1
+
+    def f(optim_vars, aux_vars, lam=torch.tensor(0.1)):
+        us, xs = optim_vars[:N], optim_vars[N:]
+        us = [u.tensor for u in us]
+        xs = [x.tensor for x in xs]
+        cost_list = us
+        for idx in range(len(us) - 1):
+            cost_list.append(torch.sqrt(lam) * (us[idx + 1] - us[idx]))
+
+        mu, z = aux_vars
+        augment = torch.sqrt(mu.tensor) * g(us, xs) + z.tensor / (
+            2.0 * torch.sqrt(mu.tensor)
+        )
+        # cost = torch.cat([x.tensor, augment], axis=-1)
+        cost = torch.cat(cost_list + [augment], axis=-1)
+        # print(cost.shape)
+        return cost
+
+    n_dim = N * 2
+    n_constraints = z.shape[-1]
+    u = [th.Vector(2, name="u" + str(idx + 1)) for idx in range(N)]
+    x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(N - 1)]
+    # mu = th.Variable(torch.ones(1, 1) * 1000, name="mu")
+    # z = th.Variable(torch.ones(1, 1) * 1, name="z")
+    optim_vars = u + x
+    aux_vars = [mu, z]
+    cost_function = th.AutoDiffCostFunction(
+        optim_vars,
+        f,
+        2 * (N + N - 1) + n_constraints,
+        aux_vars=aux_vars,
+        name="cost_fn",
+    )
+    objective = th.Objective()
+    objective.add(cost_function)
+    optimizer = th.LevenbergMarquardt(
+        objective,
+        max_iterations=15,
+        step_size=1.0,
+    )
+
+    # theseus_inputs = {
+    #     "x": torch.ones([1, n_dim]).requires_grad_(),
+    # }
+    # theseus_inputs = {"u" + str(idx + 1): torch.ones([1, 2]) * 10 for idx in range(N)}
+    theseus_optim = th.TheseusLayer(optimizer)
+    # TODO: warmstart with previous solution
+    updated_inputs, info = theseus_optim.forward(
+        # theseus_inputs, optimizer_kwargs={"verbose": True}
+    )
+
+    # print(updated_inputs)  # , info)
+    return updated_inputs
+
+
+mu = th.Variable(torch.ones(1, 1), name="mu")
+z = th.Variable(torch.zeros(1, N * 3), name="z")
+
+prev_g_norm = 0
+# print(x)
+for iter in range(10):
+    output = aug_lagrang(mu, z)
+    us = [output["u" + str(idx + 1)] for idx in range(N)]
+    xs = [output["x" + str(idx + 1)] for idx in range(N - 1)]
+    # us = [u.tensor for u in us]
+    # xs = [x.tensor for x in xs]
+
+    g_x = g(us, xs)
+    z.tensor = z.tensor + 2 * mu.tensor * g_x
+    g_norm = g_x.norm()
+    if g_norm > 0.25 * prev_g_norm:
+        mu.tensor = 2 * mu.tensor
+    prev_g_norm = g_norm
+
+print(xs)
+print(us)

From 2f166f4d64787a85e1e0c51672ba87ae3a37319e Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Tue, 31 Jan 2023 18:20:06 -0500
Subject: [PATCH 05/10] car example mostly working for a simple goal

---
 scratch/car.py | 144 +++++++++++++++++++++++++++++++++++++------------
 scratch/dx.py  |  41 +++++++++-----
 2 files changed, 137 insertions(+), 48 deletions(-)

diff --git a/scratch/car.py b/scratch/car.py
index 6d2485f21..146756145 100755
--- a/scratch/car.py
+++ b/scratch/car.py
@@ -4,10 +4,14 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-
-# import numpy as np
+import math
+import numpy as np
 import torch
 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+
 import theseus as th
 import sys
 from IPython.core import ultratb
@@ -15,13 +19,61 @@
 sys.excepthook = ultratb.FormattedTB(mode="Plain", color_scheme="Neutral", call_pdb=1)
 
 torch.manual_seed(0)
+torch.set_default_dtype(torch.float64)
 
-N = 10
-L = 0.1
+N = 30
+L = 0.05
 x_init = torch.zeros(3)
-x_final = torch.tensor([0, 0.5, 0])
-h = 0.1
-
+# x_final = torch.tensor([0, 0.5, 0])
+# x_final = torch.tensor([0, 1., 0])
+# x_final = torch.tensor([0.5, 0.3, 0])
+# x_final = torch.tensor([0, 1., 0.5*np.pi])
+# x_final = torch.tensor([0.5, 0.5, np.pi/2.])
+x_final = torch.tensor([0.5, 0.5, 0.])
+h = 1.
+gamma = 10.
+
+
+def plot(xs, us):
+    xs = [x_init] + xs + [x_final]
+    us = us + [torch.zeros(2)]
+
+    fig, ax = plt.subplots(figsize=(6,6))
+    for x, u in zip(xs, us):
+        x = x.squeeze()
+        u = u.squeeze()
+
+        p = x[:2]
+        theta = x[2]
+
+        width = 0.02
+        alpha = 0.7
+        rect = patches.Rectangle(
+            (0, -0.5*width), L, width, linewidth=1,
+            edgecolor='black', facecolor='#DCDCDC', alpha=alpha)
+        t = mpl.transforms.Affine2D().rotate(theta).translate(*p) + ax.transData
+        rect.set_transform(t)
+        ax.add_patch(rect)
+
+        wheel_length = width/2.
+        wheel = patches.Rectangle(
+            (L-0.5*wheel_length, -0.5*width), wheel_length, 0, linewidth=1,
+            edgecolor='black', alpha=alpha)
+        t = mpl.transforms.Affine2D().rotate_around(
+            L, -0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+        wheel.set_transform(t)
+        ax.add_patch(wheel)
+
+        wheel = patches.Rectangle(
+            (L-0.5*wheel_length, +0.5*width), wheel_length, 0, linewidth=1,
+            edgecolor='black', alpha=alpha)
+        t = mpl.transforms.Affine2D().rotate_around(
+            L, +0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+        wheel.set_transform(t)
+        ax.add_patch(wheel)
+
+    ax.axis('equal')
+    fig.savefig('t.png')
 
 def g(us, xs):
     constraints = [dx(x_init, us[0][0]) - xs[0]]
@@ -34,25 +86,21 @@ def g(us, xs):
 def dx(x, u):
     # new_x = x.clone()
     new_x = [
-        x[0] + (h * u[0] * torch.cos(x[2])),
+        x[0] + h * u[0] * torch.cos(x[2]),
         x[1] + h * u[0] * torch.sin(x[2]),
         x[2] + h * u[0] * torch.tan(u[1] / L),
     ]
     return torch.stack(new_x)
 
 
-def aug_lagrang(mu, z):
-    # def g(optim_vars):
-    #     (x,) = optim_vars
-    #     return x.tensor[..., 0] - 1
-
-    def f(optim_vars, aux_vars, lam=torch.tensor(0.1)):
+def aug_lagrang(mu, z, state_dict):
+    def f(optim_vars, aux_vars):
         us, xs = optim_vars[:N], optim_vars[N:]
         us = [u.tensor for u in us]
         xs = [x.tensor for x in xs]
         cost_list = us
         for idx in range(len(us) - 1):
-            cost_list.append(torch.sqrt(lam) * (us[idx + 1] - us[idx]))
+            cost_list.append(math.sqrt(gamma) * (us[idx + 1] - us[idx]))
 
         mu, z = aux_vars
         augment = torch.sqrt(mu.tensor) * g(us, xs) + z.tensor / (
@@ -66,7 +114,7 @@ def f(optim_vars, aux_vars, lam=torch.tensor(0.1)):
     n_dim = N * 2
     n_constraints = z.shape[-1]
     u = [th.Vector(2, name="u" + str(idx + 1)) for idx in range(N)]
-    x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(N - 1)]
+    x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(1, N)]
     # mu = th.Variable(torch.ones(1, 1) * 1000, name="mu")
     # z = th.Variable(torch.ones(1, 1) * 1, name="z")
     optim_vars = u + x
@@ -80,35 +128,57 @@ def f(optim_vars, aux_vars, lam=torch.tensor(0.1)):
     )
     objective = th.Objective()
     objective.add(cost_function)
-    optimizer = th.LevenbergMarquardt(
+    optimizer = th.GaussNewton(
         objective,
-        max_iterations=15,
-        step_size=1.0,
+        max_iterations=10000,
+        step_size=0.1,
     )
 
-    # theseus_inputs = {
-    #     "x": torch.ones([1, n_dim]).requires_grad_(),
-    # }
-    # theseus_inputs = {"u" + str(idx + 1): torch.ones([1, 2]) * 10 for idx in range(N)}
     theseus_optim = th.TheseusLayer(optimizer)
-    # TODO: warmstart with previous solution
-    updated_inputs, info = theseus_optim.forward(
-        # theseus_inputs, optimizer_kwargs={"verbose": True}
-    )
+    with torch.no_grad():
+        state_dict, info = theseus_optim.forward(
+            state_dict,
+            optimizer_kwargs={
+                "verbose": True,
+                # 'track_err_history': True,
+                # 'track_state_history': True
+            },
+        )
+    # assert all(info.status == th.NonlinearOptimizerStatus.CONVERGED )
+    # import ipdb; ipdb.set_trace()
 
     # print(updated_inputs)  # , info)
-    return updated_inputs
+    # import ipdb; ipdb.set_trace()
+    return state_dict
 
 
 mu = th.Variable(torch.ones(1, 1), name="mu")
 z = th.Variable(torch.zeros(1, N * 3), name="z")
 
+state_dict = {}
+
+def project_states(state_dict):
+    xt = x_init
+    for t in range(1, N+1):
+        # print(t)
+        ut = state_dict.get(f'u{t}')
+        # state_dict[f'u{t}'] = ut.unsqueeze(0)
+        # print('h')
+        state_dict[f'x{t+1}'] = dx(xt, ut.squeeze()).unsqueeze(0)
+    # import ipdb; ipdb.set_trace()
+
 prev_g_norm = 0
+
+# increase_factor = 2.
+
 # print(x)
-for iter in range(10):
-    output = aug_lagrang(mu, z)
-    us = [output["u" + str(idx + 1)] for idx in range(N)]
-    xs = [output["x" + str(idx + 1)] for idx in range(N - 1)]
+for i in range(50):
+    print(f'=== iter {i}')
+    # if i > 0:
+    #     project_states(state_dict)
+    state_dict = aug_lagrang(mu, z, state_dict=state_dict)
+    us = [state_dict["u" + str(idx + 1)] for idx in range(N)]
+    xs = [state_dict["x" + str(idx + 1)] for idx in range(1, N)]
     # us = [u.tensor for u in us]
     # xs = [x.tensor for x in xs]
 
@@ -119,5 +189,11 @@ def f(optim_vars, aux_vars, lam=torch.tensor(0.1)):
         mu.tensor = 2 * mu.tensor
     prev_g_norm = g_norm
 
-print(xs)
-print(us)
+    print(xs)
+    print(us)
+    print(mu)
+    print(z)
+
+    plot(xs, us)
+    # import ipdb; ipdb.set_trace()
+
diff --git a/scratch/dx.py b/scratch/dx.py
index 0582f019b..a779e1ff3 100755
--- a/scratch/dx.py
+++ b/scratch/dx.py
@@ -8,7 +8,7 @@
 
 x = torch.Tensor([0., 0. ,0.])
 N = 10
-u = torch.ones(N,2)
+us = torch.ones(N,2)
 
 L = 0.1
 
@@ -22,31 +22,44 @@ def f(x, u):
 
 xs = [x.clone()]
 for i in range(N):
-    x = f(x, u[i])
+    x = f(x, us[i])
     xs.append(x.clone())
     print(x)
 
 
 
 fig, ax = plt.subplots(figsize=(6,6))
-for x in xs:
+for x, u in zip(xs, us):
     p = x[:2]
     theta = x[2]
-    ax.scatter(p[0], p[1], color='k')
+    # print(p, theta)
+
+    # ax.scatter(p[0], p[1], color='k')
+
     width = 0.05
     rect = patches.Rectangle(
-         p, L, .5*width, linewidth=1,
-        facecolor='grey', alpha=0.5)
-    t2 = mpl.transforms.Affine2D().rotate_around(p[0], p[1], theta) + ax.transData
-    rect.set_transform(t2)
+         (0, -0.5*width), L, width, linewidth=1,
+        edgecolor='black', facecolor='grey', alpha=0.5)
+    t = mpl.transforms.Affine2D().rotate(theta).translate(*p) + ax.transData
+    rect.set_transform(t)
     ax.add_patch(rect)
 
-    rect = patches.Rectangle(
-         p, L, -.5*width, linewidth=1,
-        facecolor='grey', alpha=0.5)
-    t2 = mpl.transforms.Affine2D().rotate_around(p[0], p[1], theta) + ax.transData
-    rect.set_transform(t2)
-    ax.add_patch(rect)
+    wheel_length = width/2.
+    wheel = patches.Rectangle(
+         (L-0.5*wheel_length, -0.5*width), wheel_length, 0, linewidth=1,
+        edgecolor='black', alpha=1)
+    t = mpl.transforms.Affine2D().rotate_around(
+        L, -0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+    wheel.set_transform(t)
+    ax.add_patch(wheel)
+
+    wheel = patches.Rectangle(
+         (L-0.5*wheel_length, +0.5*width), wheel_length, 0, linewidth=1,
+        edgecolor='black', alpha=1)
+    t = mpl.transforms.Affine2D().rotate_around(
+        L, +0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+    wheel.set_transform(t)
+    ax.add_patch(wheel)
 
 # ax.set_xlim(-.5, .5)
 # ax.set_ylim(0, 1)

From a7da01117f3d75a1d2c0ff2c5a15c63bef8262a5 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Thu, 2 Feb 2023 21:52:12 -0500
Subject: [PATCH 06/10] car: tweak hypers, add control plot

---
 scratch/car.py | 84 ++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/scratch/car.py b/scratch/car.py
index 146756145..f9de7aae6 100755
--- a/scratch/car.py
+++ b/scratch/car.py
@@ -7,10 +7,12 @@
 import math
 import numpy as np
 import torch
+import copy
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
+plt.style.use('bmh')
 
 import theseus as th
 import sys
@@ -21,20 +23,22 @@
 torch.manual_seed(0)
 torch.set_default_dtype(torch.float64)
 
-N = 30
-L = 0.05
+N = 20
+L = 0.1
 x_init = torch.zeros(3)
 # x_final = torch.tensor([0, 0.5, 0])
 # x_final = torch.tensor([0, 1., 0])
 # x_final = torch.tensor([0.5, 0.3, 0])
 # x_final = torch.tensor([0, 1., 0.5*np.pi])
-# x_final = torch.tensor([0.5, 0.5, np.pi/2.])
-x_final = torch.tensor([0.5, 0.5, 0.])
-h = 1.
-gamma = 10.
+# x_final = torch.tensor([0.5, 0.5, 0.])
+x_final = torch.tensor([0.5, 0.3, np.pi/2.])
+h = 0.05
 
+squared_cost_weight = 1e-2
+slew_cost_weight = 1e-1
 
-def plot(xs, us):
+
+def plot_car(xs, us):
     xs = [x_init] + xs + [x_final]
     us = us + [torch.zeros(2)]
 
@@ -46,8 +50,8 @@ def plot(xs, us):
         p = x[:2]
         theta = x[2]
 
-        width = 0.02
-        alpha = 0.7
+        width = 0.5*L
+        alpha = 0.5
         rect = patches.Rectangle(
             (0, -0.5*width), L, width, linewidth=1,
             edgecolor='black', facecolor='#DCDCDC', alpha=alpha)
@@ -74,17 +78,28 @@ def plot(xs, us):
 
     ax.axis('equal')
     fig.savefig('t.png')
+    plt.close(fig)
+
+def plot_actions(us):
+    us = torch.stack(us).squeeze()
+    fig, ax = plt.subplots(figsize=(6,4))
+    ax.plot(us[:,0], label='speed')
+    ax.plot(us[:,1], label='steering angle')
+    fig.legend(ncol=2, loc='upper center')
+    fig.savefig('actions.png', transparent=True)
+    plt.close(fig)
 
 def g(us, xs):
+    assert len(us) == N
     constraints = [dx(x_init, us[0][0]) - xs[0]]
     for idx in range(N - 2):
-        constraints.append(dx(xs[idx][0], us[idx][0]) - xs[idx + 1])
+        constraints.append(dx(xs[idx][0], us[idx+1][0]) - xs[idx + 1])
     constraints.append((dx(xs[-1][0], us[-1][0]) - x_final).unsqueeze(0))
-    return torch.cat(constraints).ravel()
+    constraints = torch.cat(constraints).ravel()
+    return constraints
 
 
 def dx(x, u):
-    # new_x = x.clone()
     new_x = [
         x[0] + h * u[0] * torch.cos(x[2]),
         x[1] + h * u[0] * torch.sin(x[2]),
@@ -98,25 +113,23 @@ def f(optim_vars, aux_vars):
         us, xs = optim_vars[:N], optim_vars[N:]
         us = [u.tensor for u in us]
         xs = [x.tensor for x in xs]
-        cost_list = us
+        assert len(us) == N
+        assert len(xs) == N - 1
+        cost_list = copy.copy([u*squared_cost_weight for u in us])
         for idx in range(len(us) - 1):
-            cost_list.append(math.sqrt(gamma) * (us[idx + 1] - us[idx]))
+            cost_list.append(math.sqrt(slew_cost_weight) * (us[idx + 1] - us[idx]))
 
         mu, z = aux_vars
         augment = torch.sqrt(mu.tensor) * g(us, xs) + z.tensor / (
             2.0 * torch.sqrt(mu.tensor)
         )
-        # cost = torch.cat([x.tensor, augment], axis=-1)
         cost = torch.cat(cost_list + [augment], axis=-1)
-        # print(cost.shape)
         return cost
 
     n_dim = N * 2
     n_constraints = z.shape[-1]
     u = [th.Vector(2, name="u" + str(idx + 1)) for idx in range(N)]
     x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(1, N)]
-    # mu = th.Variable(torch.ones(1, 1) * 1000, name="mu")
-    # z = th.Variable(torch.ones(1, 1) * 1, name="z")
     optim_vars = u + x
     aux_vars = [mu, z]
     cost_function = th.AutoDiffCostFunction(
@@ -128,10 +141,10 @@ def f(optim_vars, aux_vars):
     )
     objective = th.Objective()
     objective.add(cost_function)
-    optimizer = th.GaussNewton(
+    optimizer = th.LevenbergMarquardt(
         objective,
-        max_iterations=10000,
-        step_size=0.1,
+        max_iterations=100,
+        step_size=1.,
     )
 
     theseus_optim = th.TheseusLayer(optimizer)
@@ -153,47 +166,38 @@ def f(optim_vars, aux_vars):
 
 
 mu = th.Variable(torch.ones(1, 1), name="mu")
+# mu = th.Variable(torch.ones(1, 1), name="mu")
 z = th.Variable(torch.zeros(1, N * 3), name="z")
 
 state_dict = {}
 
-def project_states(state_dict):
-    xt = x_init
-    for t in range(1, N+1):
-        # print(t)
-        ut = state_dict.get(f'u{t}')
-        # state_dict[f'u{t}'] = ut.unsqueeze(0)
-        # print('h')
-        state_dict[f'x{t+1}'] = dx(xt, ut.squeeze()).unsqueeze(0)
-    # import ipdb; ipdb.set_trace()
-
 prev_g_norm = 0
 
-# increase_factor = 2.
+increase_factor = 2.
+
+state_dict = {}
 
-# print(x)
 for i in range(50):
     print(f'=== iter {i}')
-    # if i > 0:
-    #     project_states(state_dict)
     state_dict = aug_lagrang(mu, z, state_dict=state_dict)
     us = [state_dict["u" + str(idx + 1)] for idx in range(N)]
     xs = [state_dict["x" + str(idx + 1)] for idx in range(1, N)]
-    # us = [u.tensor for u in us]
-    # xs = [x.tensor for x in xs]
 
     g_x = g(us, xs)
     z.tensor = z.tensor + 2 * mu.tensor * g_x
     g_norm = g_x.norm()
     if g_norm > 0.25 * prev_g_norm:
-        mu.tensor = 2 * mu.tensor
+        mu.tensor = increase_factor * mu.tensor
     prev_g_norm = g_norm
 
     print(xs)
     print(us)
     print(mu)
     print(z)
+    print(g_x)
 
-    plot(xs, us)
-    # import ipdb; ipdb.set_trace()
+    plot_car(xs, us)
+    plot_actions(us)
+
+# import ipdb; ipdb.set_trace()
 

From b9cab3b3451e7e30d749ceca9ec91bed72a94ce5 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Fri, 3 Feb 2023 15:38:36 -0500
Subject: [PATCH 07/10] rm dx

---
 scratch/dx.py | 67 ---------------------------------------------------
 1 file changed, 67 deletions(-)
 delete mode 100755 scratch/dx.py

diff --git a/scratch/dx.py b/scratch/dx.py
deleted file mode 100755
index a779e1ff3..000000000
--- a/scratch/dx.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-
-import torch
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-
-x = torch.Tensor([0., 0. ,0.])
-N = 10
-us = torch.ones(N,2)
-
-L = 0.1
-
-def f(x, u):
-    h = 0.1
-    new_x = x.clone()
-    new_x[0] += h*u[0]*torch.cos(x[2])
-    new_x[1] += h*u[0]*torch.sin(x[2])
-    new_x[2] += h*u[0]*torch.tan(u[1] / L)
-    return new_x
-
-xs = [x.clone()]
-for i in range(N):
-    x = f(x, us[i])
-    xs.append(x.clone())
-    print(x)
-
-
-
-fig, ax = plt.subplots(figsize=(6,6))
-for x, u in zip(xs, us):
-    p = x[:2]
-    theta = x[2]
-    # print(p, theta)
-
-    # ax.scatter(p[0], p[1], color='k')
-
-    width = 0.05
-    rect = patches.Rectangle(
-         (0, -0.5*width), L, width, linewidth=1,
-        edgecolor='black', facecolor='grey', alpha=0.5)
-    t = mpl.transforms.Affine2D().rotate(theta).translate(*p) + ax.transData
-    rect.set_transform(t)
-    ax.add_patch(rect)
-
-    wheel_length = width/2.
-    wheel = patches.Rectangle(
-         (L-0.5*wheel_length, -0.5*width), wheel_length, 0, linewidth=1,
-        edgecolor='black', alpha=1)
-    t = mpl.transforms.Affine2D().rotate_around(
-        L, -0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
-    wheel.set_transform(t)
-    ax.add_patch(wheel)
-
-    wheel = patches.Rectangle(
-         (L-0.5*wheel_length, +0.5*width), wheel_length, 0, linewidth=1,
-        edgecolor='black', alpha=1)
-    t = mpl.transforms.Affine2D().rotate_around(
-        L, +0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
-    wheel.set_transform(t)
-    ax.add_patch(wheel)
-
-# ax.set_xlim(-.5, .5)
-# ax.set_ylim(0, 1)
-ax.axis('equal')
-fig.savefig('t.png')

From 29a76cb062cd5647914318ea3e6ff831cfa1fcf8 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Fri, 3 Feb 2023 16:41:29 -0500
Subject: [PATCH 08/10] prototype more general/automatic augmented Lagrangian
 code

---
 scratch/.gitignore              |   1 +
 scratch/augmented_lagrangian.py |  74 ++++++++++
 scratch/car.py                  | 248 +++++++++++++++-----------------
 scratch/quadratic.py            | 101 ++++---------
 4 files changed, 218 insertions(+), 206 deletions(-)
 create mode 100644 scratch/.gitignore
 create mode 100644 scratch/augmented_lagrangian.py

diff --git a/scratch/.gitignore b/scratch/.gitignore
new file mode 100644
index 000000000..aab52d906
--- /dev/null
+++ b/scratch/.gitignore
@@ -0,0 +1 @@
+*.png
\ No newline at end of file
diff --git a/scratch/augmented_lagrangian.py b/scratch/augmented_lagrangian.py
new file mode 100644
index 000000000..532bdfa80
--- /dev/null
+++ b/scratch/augmented_lagrangian.py
@@ -0,0 +1,74 @@
+# Implements the augmented Lagrangian method for constrained
+# nonlinear least squares as described on page 11.21 ofQ
+# http://www.seas.ucla.edu/~vandenbe/133B/lectures/nllseq.pdf
+
+import copy
+import torch
+import theseus as th
+
+
+def solve_augmented_lagrangian(
+    err_fn,
+    optim_vars,
+    aux_vars,
+    dim,
+    equality_constraint_fn,
+    optimizer_cls,
+    optimizer_kwargs,
+    verbose,
+    initial_state_dict,
+    num_augmented_lagrangian_iterations=10,
+    callback=None,
+):
+    def err_fn_augmented(optim_vars, aux_vars):
+        # TODO: Dangerous to override optim_vars,aux_vars here
+        original_aux_vars = aux_vars[:-2]
+        original_err = err_fn(optim_vars, original_aux_vars)
+        g = equality_constraint_fn(optim_vars, original_aux_vars)
+
+        mu, z = aux_vars[-2:]
+        sqrt_mu = torch.sqrt(mu.tensor)
+        err_augmented = sqrt_mu * g + z.tensor / (2.0 * sqrt_mu)
+        combined_err = torch.cat([original_err, err_augmented], axis=-1)
+        return combined_err
+
+    g = equality_constraint_fn(optim_vars, aux_vars)
+    dim_constraints = g.shape[-1]
+    dim_augmented = dim + dim_constraints
+
+    num_batch = 1  # TODO: Infer this from somewhere else?
+    mu = th.Variable(torch.ones(num_batch, 1), name="mu")
+    z = th.Variable(torch.zeros(num_batch, dim_constraints), name="z")
+    aux_vars_augmented = aux_vars + [mu, z]
+
+    cost_fn_augmented = th.AutoDiffCostFunction(
+        optim_vars=optim_vars,
+        err_fn=err_fn_augmented,
+        dim=dim_augmented,
+        aux_vars=aux_vars_augmented,
+        name="cost_fn_augmented",
+    )
+
+    objective = th.Objective()
+    objective.add(cost_fn_augmented)
+
+    optimizer = optimizer_cls(objective, **optimizer_kwargs)
+    theseus_optim = th.TheseusLayer(optimizer)
+    state_dict = copy.copy(initial_state_dict)
+
+    prev_g_norm = 0
+    for i in range(num_augmented_lagrangian_iterations):
+        state_dict, info = theseus_optim.forward(state_dict)
+        g_x = equality_constraint_fn(optim_vars, aux_vars)
+        g_norm = g_x.norm()
+        if verbose:
+            print(f"=== [{i}] mu: {mu.tensor.item():.2e}, ||g||: {g_norm:.2e}")
+            print(state_dict)
+        if callback is not None:
+            callback(state_dict)
+        z.tensor = z.tensor + 2 * mu.tensor * g_x
+        if i > 0 and g_norm > 0.25 * prev_g_norm:
+            mu.tensor = 2 * mu.tensor
+        prev_g_norm = g_norm
+
+    return state_dict
diff --git a/scratch/car.py b/scratch/car.py
index f9de7aae6..4e66110e6 100755
--- a/scratch/car.py
+++ b/scratch/car.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-import math
 import numpy as np
 import torch
 import copy
@@ -12,37 +11,46 @@
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
-plt.style.use('bmh')
 
 import theseus as th
-import sys
-from IPython.core import ultratb
+from augmented_lagrangian import solve_augmented_lagrangian
 
-sys.excepthook = ultratb.FormattedTB(mode="Plain", color_scheme="Neutral", call_pdb=1)
+plt.style.use("bmh")
 
-torch.manual_seed(0)
-torch.set_default_dtype(torch.float64)
+N = 20  # Number of timesteps
+L = 0.1  # Length of car
+h = 0.05  # Discretization interval
+squared_cost_weight = 1e-2  # Weight for the controls
+slew_cost_weight = 1.0  # Weight for the slew rate
 
-N = 20
-L = 0.1
-x_init = torch.zeros(3)
+x_init = torch.zeros(3)  # Initial state
+x_final = torch.tensor([0.5, 0.3, np.pi / 2.0])  # Final state
+
+# Can try these other final states, but isn't very stable.
 # x_final = torch.tensor([0, 0.5, 0])
 # x_final = torch.tensor([0, 1., 0])
 # x_final = torch.tensor([0.5, 0.3, 0])
 # x_final = torch.tensor([0, 1., 0.5*np.pi])
 # x_final = torch.tensor([0.5, 0.5, 0.])
-x_final = torch.tensor([0.5, 0.3, np.pi/2.])
-h = 0.05
 
-squared_cost_weight = 1e-2
-slew_cost_weight = 1e-1
 
+# Given a state x and action u, return the
+# next state following the discretized dynamics.
+def car_dynamics(x, u):
+    new_x = [
+        x[0] + h * u[0] * torch.cos(x[2]),
+        x[1] + h * u[0] * torch.sin(x[2]),
+        x[2] + h * u[0] * torch.tan(u[1] / L),
+    ]
+    return torch.stack(new_x)
 
-def plot_car(xs, us):
+
+def plot_car_trajectory(xs, us):
+    # The optimization formulation leaves out these known values from the states
     xs = [x_init] + xs + [x_final]
     us = us + [torch.zeros(2)]
 
-    fig, ax = plt.subplots(figsize=(6,6))
+    fig, ax = plt.subplots(figsize=(6, 6))
     for x, u in zip(xs, us):
         x = x.squeeze()
         u = u.squeeze()
@@ -50,154 +58,122 @@ def plot_car(xs, us):
         p = x[:2]
         theta = x[2]
 
-        width = 0.5*L
+        width = 0.5 * L
         alpha = 0.5
         rect = patches.Rectangle(
-            (0, -0.5*width), L, width, linewidth=1,
-            edgecolor='black', facecolor='#DCDCDC', alpha=alpha)
+            (0, -0.5 * width),
+            L,
+            width,
+            linewidth=1,
+            edgecolor="black",
+            facecolor="#DCDCDC",
+            alpha=alpha,
+        )
         t = mpl.transforms.Affine2D().rotate(theta).translate(*p) + ax.transData
         rect.set_transform(t)
         ax.add_patch(rect)
 
-        wheel_length = width/2.
+        wheel_length = width / 2.0
         wheel = patches.Rectangle(
-            (L-0.5*wheel_length, -0.5*width), wheel_length, 0, linewidth=1,
-            edgecolor='black', alpha=alpha)
-        t = mpl.transforms.Affine2D().rotate_around(
-            L, -0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+            (L - 0.5 * wheel_length, -0.5 * width),
+            wheel_length,
+            0,
+            linewidth=1,
+            edgecolor="black",
+            alpha=alpha,
+        )
+        t = (
+            mpl.transforms.Affine2D()
+            .rotate_around(L, -0.5 * width, u[1])
+            .rotate(theta)
+            .translate(*p)
+            + ax.transData
+        )
         wheel.set_transform(t)
         ax.add_patch(wheel)
 
         wheel = patches.Rectangle(
-            (L-0.5*wheel_length, +0.5*width), wheel_length, 0, linewidth=1,
-            edgecolor='black', alpha=alpha)
-        t = mpl.transforms.Affine2D().rotate_around(
-            L, +0.5*width, u[1]).rotate(theta).translate(*p) + ax.transData
+            (L - 0.5 * wheel_length, +0.5 * width),
+            wheel_length,
+            0,
+            linewidth=1,
+            edgecolor="black",
+            alpha=alpha,
+        )
+        t = (
+            mpl.transforms.Affine2D()
+            .rotate_around(L, +0.5 * width, u[1])
+            .rotate(theta)
+            .translate(*p)
+            + ax.transData
+        )
         wheel.set_transform(t)
         ax.add_patch(wheel)
 
-    ax.axis('equal')
-    fig.savefig('t.png')
+    ax.axis("equal")
+    fig.savefig("trajectory.png")
     plt.close(fig)
 
-def plot_actions(us):
+
+def plot_action_sequence(us):
     us = torch.stack(us).squeeze()
-    fig, ax = plt.subplots(figsize=(6,4))
-    ax.plot(us[:,0], label='speed')
-    ax.plot(us[:,1], label='steering angle')
-    fig.legend(ncol=2, loc='upper center')
-    fig.savefig('actions.png', transparent=True)
+    fig, ax = plt.subplots(figsize=(6, 4))
+    ax.plot(us[:, 0], label="speed")
+    ax.plot(us[:, 1], label="steering angle")
+    fig.legend(ncol=2, loc="upper center")
+    fig.savefig("actions.png", transparent=True)
     plt.close(fig)
 
-def g(us, xs):
+
+# Ensure the trajectory satisfies the dynamics and reaches the goal state
+def equality_constraint_fn(optim_vars, aux_vars):
+    xs, us = optim_vars[: N - 1], optim_vars[N - 1 :]
+    us = [u.tensor for u in us]
+    xs = [x.tensor for x in xs]
     assert len(us) == N
-    constraints = [dx(x_init, us[0][0]) - xs[0]]
+    constraints = [car_dynamics(x_init, us[0][0]) - xs[0]]
     for idx in range(N - 2):
-        constraints.append(dx(xs[idx][0], us[idx+1][0]) - xs[idx + 1])
-    constraints.append((dx(xs[-1][0], us[-1][0]) - x_final).unsqueeze(0))
+        constraints.append(car_dynamics(xs[idx][0], us[idx + 1][0]) - xs[idx + 1])
+    constraints.append((car_dynamics(xs[-1][0], us[-1][0]) - x_final).unsqueeze(0))
     constraints = torch.cat(constraints).ravel()
     return constraints
 
 
-def dx(x, u):
-    new_x = [
-        x[0] + h * u[0] * torch.cos(x[2]),
-        x[1] + h * u[0] * torch.sin(x[2]),
-        x[2] + h * u[0] * torch.tan(u[1] / L),
-    ]
-    return torch.stack(new_x)
-
-
-def aug_lagrang(mu, z, state_dict):
-    def f(optim_vars, aux_vars):
-        us, xs = optim_vars[:N], optim_vars[N:]
-        us = [u.tensor for u in us]
-        xs = [x.tensor for x in xs]
-        assert len(us) == N
-        assert len(xs) == N - 1
-        cost_list = copy.copy([u*squared_cost_weight for u in us])
-        for idx in range(len(us) - 1):
-            cost_list.append(math.sqrt(slew_cost_weight) * (us[idx + 1] - us[idx]))
-
-        mu, z = aux_vars
-        augment = torch.sqrt(mu.tensor) * g(us, xs) + z.tensor / (
-            2.0 * torch.sqrt(mu.tensor)
-        )
-        cost = torch.cat(cost_list + [augment], axis=-1)
-        return cost
-
-    n_dim = N * 2
-    n_constraints = z.shape[-1]
-    u = [th.Vector(2, name="u" + str(idx + 1)) for idx in range(N)]
-    x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(1, N)]
-    optim_vars = u + x
-    aux_vars = [mu, z]
-    cost_function = th.AutoDiffCostFunction(
-        optim_vars,
-        f,
-        2 * (N + N - 1) + n_constraints,
-        aux_vars=aux_vars,
-        name="cost_fn",
-    )
-    objective = th.Objective()
-    objective.add(cost_function)
-    optimizer = th.LevenbergMarquardt(
-        objective,
-        max_iterations=100,
-        step_size=1.,
-    )
-
-    theseus_optim = th.TheseusLayer(optimizer)
-    with torch.no_grad():
-        state_dict, info = theseus_optim.forward(
-            state_dict,
-            optimizer_kwargs={
-                "verbose": True,
-                # 'track_err_history': True,
-                # 'track_state_history': True
-            },
-        )
-    # assert all(info.status == th.NonlinearOptimizerStatus.CONVERGED )
-    # import ipdb; ipdb.set_trace()
-
-    # print(updated_inputs)  # , info)
-    # import ipdb; ipdb.set_trace()
-    return state_dict
-
+# Make the controls and rate of change of controls minimal.
+def err_fn(optim_vars, aux_vars):
+    xs, us = optim_vars[: N - 1], optim_vars[N - 1 :]
+    us = [u.tensor for u in us]
+    xs = [x.tensor for x in xs]
+    err_list = copy.copy([u * squared_cost_weight for u in us])
+    for idx in range(len(us) - 1):
+        err_list.append(slew_cost_weight * (us[idx + 1] - us[idx]))
 
-mu = th.Variable(torch.ones(1, 1), name="mu")
-# mu = th.Variable(torch.ones(1, 1), name="mu")
-z = th.Variable(torch.zeros(1, N * 3), name="z")
+    err = torch.cat(err_list, axis=-1)
+    return err
 
-state_dict = {}
 
-prev_g_norm = 0
+u = [th.Vector(2, name="u" + str(idx + 1)) for idx in range(N)]
+x = [th.Vector(3, name="x" + str(idx + 1)) for idx in range(1, N)]
+optim_vars = x + u
+dim = 2 * (N + N - 1)
 
-increase_factor = 2.
 
-state_dict = {}
-
-for i in range(50):
-    print(f'=== iter {i}')
-    state_dict = aug_lagrang(mu, z, state_dict=state_dict)
-    us = [state_dict["u" + str(idx + 1)] for idx in range(N)]
+def plot_callback(state_dict):
     xs = [state_dict["x" + str(idx + 1)] for idx in range(1, N)]
-
-    g_x = g(us, xs)
-    z.tensor = z.tensor + 2 * mu.tensor * g_x
-    g_norm = g_x.norm()
-    if g_norm > 0.25 * prev_g_norm:
-        mu.tensor = increase_factor * mu.tensor
-    prev_g_norm = g_norm
-
-    print(xs)
-    print(us)
-    print(mu)
-    print(z)
-    print(g_x)
-
-    plot_car(xs, us)
-    plot_actions(us)
-
-# import ipdb; ipdb.set_trace()
-
+    us = [state_dict["u" + str(idx + 1)] for idx in range(N)]
+    plot_car_trajectory(xs, us)
+    plot_action_sequence(us)
+
+
+solve_augmented_lagrangian(
+    err_fn=err_fn,
+    optim_vars=optim_vars,
+    aux_vars=[],
+    dim=dim,
+    equality_constraint_fn=equality_constraint_fn,
+    optimizer_cls=th.LevenbergMarquardt,
+    optimizer_kwargs=dict(max_iterations=100, step_size=1.0),
+    verbose=True,
+    initial_state_dict={},
+    callback=plot_callback,
+)
diff --git a/scratch/quadratic.py b/scratch/quadratic.py
index cb148548c..f688f1b3e 100755
--- a/scratch/quadratic.py
+++ b/scratch/quadratic.py
@@ -3,84 +3,45 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+#
+# This is a small test of the augmented Lagrangian solver for the
+# optimization problem:
+#     minimize ||x||^2 subject to x_0 = 1,
+# which has the analytic solution [1, 0].
 
-
-# import numpy as np
 import torch
-
 import theseus as th
-import sys
-from IPython.core import ultratb
-
-sys.excepthook = ultratb.FormattedTB(mode="Plain", color_scheme="Neutral", call_pdb=1)
-
-torch.manual_seed(0)
-
-
-def g(x):
-    # return x[..., 0] - 1
-    return x - 1
-
+from augmented_lagrangian import solve_augmented_lagrangian
 
-def aug_lagrang(mu, z):
-    # def g(optim_vars):
-    #     (x,) = optim_vars
-    #     return x.tensor[..., 0] - 1
 
-    def f(optim_vars, aux_vars):
-        (x,) = optim_vars
-        mu, z = aux_vars
-        augment = torch.sqrt(mu.tensor) * g(x.tensor) + z.tensor / (
-            2.0 * torch.sqrt(mu.tensor)
-        )
-        cost = torch.cat([x.tensor, augment], axis=-1)
-        return cost
+def err_fn(optim_vars, aux_vars):
+    (x,) = optim_vars
+    cost = x.tensor
+    return cost
 
-    n_dim = 2
-    n_constraints = z.shape[-1]
-    x = th.Vector(n_dim, name="x")
-    # mu = th.Variable(torch.ones(1, 1) * 1000, name="mu")
-    # z = th.Variable(torch.ones(1, 1) * 1, name="z")
-    optim_vars = [x]
-    aux_vars = [mu, z]
-    cost_function = th.AutoDiffCostFunction(
-        optim_vars,
-        f,
-        n_dim + n_constraints,
-        aux_vars=aux_vars,
-        name="cost_fn",
-    )
-    objective = th.Objective()
-    objective.add(cost_function)
-    optimizer = th.GaussNewton(
-        objective,
-        max_iterations=15,
-        step_size=1.0,
-    )
 
-    theseus_inputs = {
-        "x": torch.ones([1, n_dim]).requires_grad_(),
-    }
-    theseus_optim = th.TheseusLayer(optimizer)
-    updated_inputs, info = theseus_optim.forward(
-        theseus_inputs,
-    )
+def equality_constraint_fn(optim_vars, aux_vars):
+    (x,) = optim_vars
+    x = x.tensor
+    return x[..., 0] - 1
 
-    # print(updated_inputs)  # , info)
-    return updated_inputs["x"]
 
+dim = 2
+x = th.Vector(dim, name="x")
+optim_vars = [x]
 
-mu = th.Variable(torch.ones(1, 1), name="mu")
-z = th.Variable(torch.zeros(1, 2), name="z")
+state_dict = solve_augmented_lagrangian(
+    err_fn=err_fn,
+    optim_vars=optim_vars,
+    aux_vars=[],
+    dim=dim,
+    equality_constraint_fn=equality_constraint_fn,
+    optimizer_cls=th.LevenbergMarquardt,
+    optimizer_kwargs=dict(max_iterations=100, step_size=1.0),
+    verbose=True,
+    initial_state_dict={},
+)
 
-prev_g_norm = 0
-# print(x)
-for iter in range(10):
-    x = aug_lagrang(mu, z)
-    g_x = g(x)
-    z.tensor = z.tensor + 2 * mu.tensor * g_x
-    g_norm = g_x.norm()
-    if g_norm > 0.25 * prev_g_norm:
-        mu.tensor = 2 * mu.tensor
-    prev_g_norm = g_norm
-    print(x, mu, z)
+true_solution = torch.tensor([1.0, 0.0])
+threshold = 1e-5
+assert (state_dict["x"].squeeze() - true_solution).norm() <= threshold

From 2cd06dac8650eceb04e4167c6e289fa437de41aa Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Fri, 3 Feb 2023 16:44:16 -0500
Subject: [PATCH 09/10] add comments

---
 scratch/augmented_lagrangian.py | 2 +-
 scratch/car.py                  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/scratch/augmented_lagrangian.py b/scratch/augmented_lagrangian.py
index 532bdfa80..23aad101c 100644
--- a/scratch/augmented_lagrangian.py
+++ b/scratch/augmented_lagrangian.py
@@ -1,5 +1,5 @@
 # Implements the augmented Lagrangian method for constrained
-# nonlinear least squares as described on page 11.21 ofQ
+# nonlinear least squares as described on page 11.21 of
 # http://www.seas.ucla.edu/~vandenbe/133B/lectures/nllseq.pdf
 
 import copy
diff --git a/scratch/car.py b/scratch/car.py
index 4e66110e6..e99b6f2ec 100755
--- a/scratch/car.py
+++ b/scratch/car.py
@@ -3,6 +3,9 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+#
+# This is the car example starting on page 23 of:
+# http://www.seas.ucla.edu/~vandenbe/133B/lectures/nllseq.pdf
 
 import numpy as np
 import torch

From 80b1b0ce8bf95837ad540e5a3e821046a821056f Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Fri, 3 Feb 2023 17:09:04 -0500
Subject: [PATCH 10/10] comments, disable transparency

---
 scratch/.gitignore              | 1 +
 scratch/augmented_lagrangian.py | 5 +++++
 scratch/car.py                  | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/scratch/.gitignore b/scratch/.gitignore
index aab52d906..63660c052 100644
--- a/scratch/.gitignore
+++ b/scratch/.gitignore
@@ -1 +1,2 @@
+# TODO: delete this with scratch
 *.png
\ No newline at end of file
diff --git a/scratch/augmented_lagrangian.py b/scratch/augmented_lagrangian.py
index 23aad101c..c0663c573 100644
--- a/scratch/augmented_lagrangian.py
+++ b/scratch/augmented_lagrangian.py
@@ -1,3 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
 # Implements the augmented Lagrangian method for constrained
 # nonlinear least squares as described on page 11.21 of
 # http://www.seas.ucla.edu/~vandenbe/133B/lectures/nllseq.pdf
diff --git a/scratch/car.py b/scratch/car.py
index e99b6f2ec..181dbef7d 100755
--- a/scratch/car.py
+++ b/scratch/car.py
@@ -124,7 +124,7 @@ def plot_action_sequence(us):
     ax.plot(us[:, 0], label="speed")
     ax.plot(us[:, 1], label="steering angle")
     fig.legend(ncol=2, loc="upper center")
-    fig.savefig("actions.png", transparent=True)
+    fig.savefig("actions.png")
     plt.close(fig)