|
1 | | -# %% |
2 | 1 | # Copyright 2023 DeepMind Technologies Limited |
3 | 2 | # |
4 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
|
14 | 13 | # limitations under the License. |
15 | 14 |
|
16 | 15 | import matplotlib.pyplot as plt |
| 16 | +import mediapy as media |
17 | 17 | import mujoco |
18 | | -import direct_optimizer |
| 18 | +from mujoco_mpc import direct as direct_lib |
19 | 19 | import numpy as np |
20 | | -# %% |
21 | | -## Example |
22 | 20 |
|
| 21 | + |
| 22 | +# %% |
23 | 23 | # 2D Particle Model |
24 | 24 | xml = """ |
25 | 25 | <mujoco model="Particle"> |
|
40 | 40 |
|
41 | 41 | <option timestep="0.01"></option> |
42 | 42 |
|
| 43 | + <default> |
| 44 | + <joint type="hinge" axis="0 0 1" limited="true" range="-.29 .29" damping="1"/> |
| 45 | + <motor gear=".1" ctrlrange="-1 1" ctrllimited="true"/> |
| 46 | + </default> |
| 47 | +
|
43 | 48 | <worldbody> |
44 | 49 | <light name="light" pos="0 0 1"/> |
45 | 50 | <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/> |
|
72 | 77 |
|
73 | 78 | model = mujoco.MjModel.from_xml_string(xml) |
74 | 79 | data = mujoco.MjData(model) |
75 | | - |
| 80 | +renderer = mujoco.Renderer(model) |
76 | 81 | # %% |
77 | 82 | # initialization |
78 | | -T = 400 |
| 83 | +T = 100 |
79 | 84 | q0 = np.array([-0.25, -0.25]) |
80 | 85 | qM = np.array([-0.25, 0.25]) |
81 | 86 | qN = np.array([0.25, -0.25]) |
|
92 | 97 |
|
93 | 98 | # time |
94 | 99 | time = [t * model.opt.timestep for t in range(T)] |
95 | | - |
96 | 100 | # %% |
97 | 101 | # plot position |
98 | 102 | fig = plt.figure() |
|
107 | 111 | plt.legend() |
108 | 112 | plt.xlabel("X") |
109 | 113 | plt.ylabel("Y") |
110 | | - |
111 | 114 | # %% |
112 | | -# create optimizer |
113 | | -optimizer = direct_optimizer.DirectOptimizer(model, T) |
114 | | - |
115 | | -# settings |
116 | | -optimizer.max_iterations = 10 |
117 | | -optimizer.max_search_iterations = 10 |
118 | | - |
119 | | -# force weight |
120 | | -fw = 5.0e2 |
121 | | - |
| 115 | +# optimizer model |
| 116 | +model_optimizer = mujoco.MjModel.from_xml_string(xml) |
| 117 | + |
| 118 | +# direct optimizer |
| 119 | +configuration_length = T + 2 |
| 120 | +optimizer = direct_lib.Direct( |
| 121 | + model=model_optimizer, |
| 122 | + configuration_length=configuration_length, |
| 123 | +) |
| 124 | +# %% |
122 | 125 | # set data |
123 | | -for t in range(T): |
| 126 | +for t in range(configuration_length): |
| 127 | + # unpack |
| 128 | + qt = np.zeros(model.nq) |
| 129 | + st = np.zeros(model.nsensordata) |
| 130 | + mt = np.zeros(model.nsensor) |
| 131 | + ft = np.zeros(model.nv) |
| 132 | + ct = np.zeros(model.nu) |
| 133 | + tt = np.array([t * model.opt.timestep]) |
| 134 | + |
124 | 135 | # set initial state |
125 | 136 | if t == 0 or t == 1: |
126 | | - optimizer.pinned[t] = True |
127 | | - optimizer.qpos[:, t] = q0 |
128 | | - if t == 1: |
129 | | - optimizer.weights_force[:, t] = fw |
| 137 | + qt = q0 |
| 138 | + st = q0 |
| 139 | + mt = np.array([1, 1]) |
130 | 140 |
|
131 | 141 | # set goal |
132 | | - elif t >= T - 2: |
133 | | - optimizer.pinned[t] = True |
134 | | - optimizer.qpos[:, t] = qT |
135 | | - if t == T - 2: |
136 | | - optimizer.weights_force[:, t] = fw |
| 142 | + elif t >= configuration_length - 2: |
| 143 | + qt = qT |
| 144 | + st = qT |
| 145 | + mt = np.array([1, 1]) |
137 | 146 |
|
138 | 147 | # set waypoint |
139 | | - elif t == 100: |
140 | | - optimizer.pinned[t] = False |
141 | | - optimizer.qpos[:, t] = qM |
142 | | - optimizer.sensor_target[: model.nq, t] = qM |
143 | | - optimizer.weights_force[:, t] = fw |
144 | | - optimizer.weights_sensor[:, t] = 1.0 |
| 148 | + elif t == 25: |
| 149 | + st = qM |
| 150 | + mt = np.array([1, 1]) |
145 | 151 |
|
146 | 152 | # set waypoint |
147 | | - elif t == 300: |
148 | | - optimizer.pinned[t] = False |
149 | | - optimizer.qpos[:, t] = qN |
150 | | - optimizer.sensor_target[: model.nq, t] = qN |
151 | | - optimizer.weights_force[:, t] = fw |
152 | | - optimizer.weights_sensor[:, t] = 1.0 |
| 153 | + elif t == 75: |
| 154 | + st = qN |
| 155 | + mt = np.array([1, 1]) |
153 | 156 |
|
154 | 157 | # initialize qpos |
155 | 158 | else: |
156 | | - optimizer.pinned[t] = False |
157 | | - optimizer.qpos[:, t] = qinterp[:, t] |
158 | | - optimizer.weights_force[:, t] = fw |
159 | | - optimizer.weights_sensor[:, t] = 0.0 |
| 159 | + qt = qinterp[:, t - 1] |
| 160 | + mt = np.array([0, 0]) |
| 161 | + |
| 162 | + # set data |
| 163 | + data_ = optimizer.data( |
| 164 | + t, |
| 165 | + configuration=qt, |
| 166 | + sensor_measurement=st, |
| 167 | + sensor_mask=mt, |
| 168 | + force_measurement=ft, |
| 169 | + time=tt, |
| 170 | + ) |
| 171 | +# %% |
| 172 | +# set std |
| 173 | +optimizer.noise(process=np.array([1000.0, 1000.0]), sensor=np.array([1.0, 1.0])) |
| 174 | + |
| 175 | +# set settings |
| 176 | +optimizer.settings( |
| 177 | + sensor_flag=True, |
| 178 | + force_flag=True, |
| 179 | + max_smoother_iterations=1000, |
| 180 | + max_search_iterations=1000, |
| 181 | + regularization_initial=1.0e-12, |
| 182 | + gradient_tolerance=1.0e-6, |
| 183 | + search_direction_tolerance=1.0e-6, |
| 184 | + cost_tolerance=1.0e-6, |
| 185 | + first_step_position_sensors=True, |
| 186 | + last_step_position_sensors=True, |
| 187 | + last_step_velocity_sensors=True, |
| 188 | +) |
160 | 189 |
|
161 | 190 | # optimize |
162 | 191 | optimizer.optimize() |
163 | 192 |
|
164 | | -# status |
165 | | -optimizer.status() |
| 193 | +# costs |
| 194 | +optimizer.print_cost() |
166 | 195 |
|
| 196 | +# status |
| 197 | +optimizer.print_status() |
| 198 | +# %% |
| 199 | +# get estimated trajectories |
| 200 | +q_est = np.zeros((model_optimizer.nq, configuration_length)) |
| 201 | +v_est = np.zeros((model_optimizer.nv, configuration_length)) |
| 202 | +s_est = np.zeros((model_optimizer.nsensordata, configuration_length)) |
| 203 | +f_est = np.zeros((model_optimizer.nv, configuration_length)) |
| 204 | +t_est = np.zeros(configuration_length) |
| 205 | +for t in range(configuration_length): |
| 206 | + data_ = optimizer.data(t) |
| 207 | + q_est[:, t] = data_["configuration"] |
| 208 | + v_est[:, t] = data_["velocity"] |
| 209 | + s_est[:, t] = data_["sensor_prediction"] |
| 210 | + f_est[:, t] = data_["force_prediction"] |
| 211 | + t_est[t] = data_["time"] |
167 | 212 | # %% |
168 | 213 | # plot position |
169 | 214 | fig = plt.figure() |
170 | 215 |
|
171 | 216 | plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") |
172 | | -plt.plot( |
173 | | - optimizer.qpos[0, :], |
174 | | - optimizer.qpos[1, :], |
175 | | - label="direct trajopt", |
176 | | - color="orange", |
177 | | -) |
| 217 | +plt.plot(q_est[0, :], q_est[1, :], label="direct trajopt", color="orange") |
178 | 218 | plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") |
179 | 219 | plt.plot(qM[0], qM[1], color="magenta", marker="o") |
180 | 220 | plt.plot(qN[0], qN[1], color="magenta", marker="o") |
|
184 | 224 | plt.xlabel("X") |
185 | 225 | plt.ylabel("Y") |
186 | 226 |
|
187 | | -# %% |
188 | | -# recover ctrl |
189 | | -mujoco.mj_forward(model, data) |
190 | | -ctrl = np.vstack( |
191 | | - [ |
192 | | - np.linalg.pinv(data.actuator_moment.T) @ optimizer.force[:, t] |
193 | | - for t in range(T) |
194 | | - ] |
195 | | -) |
196 | | - |
197 | | -# plot ctrl |
| 227 | +# plot velocity |
198 | 228 | fig = plt.figure() |
199 | | -times = [t * model.opt.timestep for t in range(T)] |
200 | 229 |
|
201 | | -plt.step(times, ctrl[:, 0], label="action 0", color="orange") |
202 | | -plt.step(times, ctrl[:, 1], label="action 1", color="magenta") |
| 230 | +# velocity |
| 231 | +plt.plot(t_est[1:] - model.opt.timestep, v_est[0, 1:], label="v0", color="cyan") |
| 232 | +plt.plot( |
| 233 | + t_est[1:] - model.opt.timestep, v_est[1, 1:], label="v1", color="orange" |
| 234 | +) |
| 235 | + |
203 | 236 | plt.legend() |
204 | 237 | plt.xlabel("Time (s)") |
205 | | -plt.ylabel("ctrl") |
| 238 | +plt.ylabel("Velocity") |
| 239 | +# %% |
| 240 | +# frames optimized |
| 241 | +frames_opt = [] |
| 242 | + |
| 243 | +# simulate |
| 244 | +for t in range(configuration_length - 1): |
| 245 | + # get solution from optimizer |
| 246 | + data_ = optimizer.data(t) |
| 247 | + |
| 248 | + # set configuration |
| 249 | + data.qpos = q_est[:, t] |
| 250 | + data.qvel = v_est[:, t] |
| 251 | + |
| 252 | + mujoco.mj_forward(model, data) |
| 253 | + |
| 254 | + # render and save frames |
| 255 | + renderer.update_scene(data) |
| 256 | + pixels = renderer.render() |
| 257 | + frames_opt.append(pixels) |
| 258 | + |
| 259 | +# display video |
| 260 | +# media.show_video(frames_opt, fps=1.0 / model.opt.timestep, loop=False) |
0 commit comments