Skip to content

Commit 1413d57

Browse files
Merge of 48b8b5f
PiperOrigin-RevId: 607807654 Change-Id: I6c545b1d14787526f805997fff1671402151fefc
2 parents e98a151 + 48b8b5f commit 1413d57

File tree

15 files changed

+564
-861
lines changed

15 files changed

+564
-861
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
.vs/
1717
# Exclude temporary folders
1818
*.egg-info/
19+
.eggs/
1920
build/
2021
build_cmake/
2122
# Exclude macOS folder attributes

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ We provide a simple Python API for MJPC. This API is still experimental and expe
110110
- [direct.py](python/mujoco_mpc/direct.py) for available methods for direct optimization.
111111

112112
## Installing via Pip
113-
The MJPC Python module can be installed with:
113+
First, build MJPC (see above), then the MJPC Python module can be installed with:
114114
```sh
115115
python "${MUJOCO_MPC_ROOT}/python/${API}.py" install
116116
```

mjpc/planners/cross_entropy/planner.cc

Lines changed: 33 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() {
106106
trajectory[i].Allocate(kMaxTrajectoryHorizon);
107107
candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
108108
}
109-
110-
// elite average trajectory
111-
elite_avg.Initialize(num_state, model->nu, task->num_residual,
112-
task->num_trace, kMaxTrajectoryHorizon);
113-
elite_avg.Allocate(kMaxTrajectoryHorizon);
109+
nominal_trajectory.Initialize(num_state, model->nu, task->num_residual,
110+
task->num_trace, kMaxTrajectoryHorizon);
111+
nominal_trajectory.Allocate(kMaxTrajectoryHorizon);
114112
}
115113

116114
// reset memory to zeros
@@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon,
143141
trajectory[i].Reset(kMaxTrajectoryHorizon);
144142
candidate_policy[i].Reset(horizon);
145143
}
146-
elite_avg.Reset(kMaxTrajectoryHorizon);
144+
nominal_trajectory.Reset(kMaxTrajectoryHorizon);
147145

148146
for (const auto& d : data_) {
149147
mju_zero(d->ctrl, model->nu);
@@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) {
161159

162160
// optimize nominal policy using random sampling
163161
void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
164-
// check horizon
165-
if (horizon != elite_avg.horizon) {
166-
NominalTrajectory(horizon, pool);
167-
}
168-
169162
// if num_trajectory_ has changed, use it in this new iteration.
170163
// num_trajectory_ might change while this function runs. Keep it constant
171164
// for the duration of this function.
@@ -220,66 +213,29 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
220213
int num_spline_points = resampled_policy.num_spline_points;
221214
int num_parameters = resampled_policy.num_parameters;
222215

223-
// reset parameters scratch to zero
224-
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
225-
226-
// reset elite average
227-
elite_avg.Reset(horizon);
228-
229-
// set elite average trajectory times
230-
for (int tt = 0; tt <= horizon; tt++) {
231-
elite_avg.times[tt] = time + tt * model->opt.timestep;
232-
}
233-
234-
// best elite
235-
int idx = trajectory_order[0];
216+
// averaged return over elites
217+
double avg_return = 0.0;
236218

237-
// add parameters
238-
mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
239-
num_parameters);
219+
// reset parameters scratch
220+
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
240221

241-
// copy first elite trajectory
242-
mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
243-
model->nu * (horizon - 1));
244-
mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
245-
trajectory[idx].dim_trace * horizon);
246-
mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
247-
elite_avg.dim_residual * horizon);
248-
mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
249-
elite_avg.total_return = trajectory[idx].total_return;
250-
251-
// loop over remaining elites to compute average
252-
for (int i = 1; i < n_elite; i++) {
222+
// loop over elites to compute average
223+
for (int i = 0; i < n_elite; i++) {
253224
// ordered trajectory index
254225
int idx = trajectory_order[i];
255226

256227
// add parameters
257228
mju_addTo(parameters_scratch.data(),
258229
candidate_policy[idx].parameters.data(), num_parameters);
259230

260-
// add elite trajectory
261-
mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
262-
model->nu * (horizon - 1));
263-
mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
264-
trajectory[idx].dim_trace * horizon);
265-
mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
266-
elite_avg.dim_residual * horizon);
267-
mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
268-
elite_avg.total_return += trajectory[idx].total_return;
231+
// add total return
232+
avg_return += trajectory[idx].total_return;
269233
}
270234

271235
// normalize
272236
mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
273237
num_parameters);
274-
mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
275-
model->nu * (horizon - 1));
276-
mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
277-
elite_avg.dim_trace * horizon);
278-
mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
279-
elite_avg.dim_residual * horizon);
280-
mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
281-
horizon);
282-
elite_avg.total_return /= n_elite;
238+
avg_return /= n_elite;
283239

284240
// loop over elites to compute variance
285241
std::fill(variance.begin(), variance.end(), 0.0); // reset variance to zero
@@ -304,25 +260,28 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
304260
}
305261

306262
// improvement: compare nominal to elite average
307-
improvement = mju_max(
308-
elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
309-
0.0);
263+
improvement =
264+
mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);
310265

311266
// stop timer
312267
policy_update_compute_time = GetDuration(policy_update_start);
313268
}
314269

315270
// compute trajectory using nominal policy
316-
void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
271+
void CrossEntropyPlanner::NominalTrajectory(int horizon) {
317272
// set policy
318273
auto nominal_policy = [&cp = resampled_policy](
319274
double* action, const double* state, double time) {
320275
cp.Action(action, state, time);
321276
};
322277

323278
// rollout nominal policy
324-
elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
325-
time, mocap.data(), userdata.data(), horizon);
279+
nominal_trajectory.Rollout(nominal_policy, task, model,
280+
data_[ThreadPool::WorkerId()].get(), state.data(),
281+
time, mocap.data(), userdata.data(), horizon);
282+
}
283+
void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
284+
NominalTrajectory(horizon);
326285
}
327286

328287
// set action from policy
@@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {
363322

364323
LinearRange(resampled_policy.times.data(), time_shift,
365324
resampled_policy.times[0], num_spline_points);
325+
326+
resampled_policy.representation = policy.representation;
366327
}
367328

368329
// add random noise to nominal policy
@@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
446407
state.data(), time, mocap.data(), userdata.data(), horizon);
447408
});
448409
}
449-
pool.WaitCount(count_before + num_trajectory);
410+
// nominal
411+
pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); });
412+
413+
// wait
414+
pool.WaitCount(count_before + num_trajectory + 1);
450415
pool.ResetCount();
451416
}
452417

453-
// returns the nominal trajectory (this is the purple trace)
454-
const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
418+
// returns the **nominal** trajectory (this is the purple trace)
419+
const Trajectory* CrossEntropyPlanner::BestTrajectory() {
420+
return &nominal_trajectory;
421+
}
455422

456423
// visualize planner-specific traces
457424
void CrossEntropyPlanner::Traces(mjvScene* scn) {

mjpc/planners/cross_entropy/planner.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner {
5757

5858
// compute trajectory using nominal policy
5959
void NominalTrajectory(int horizon, ThreadPool& pool) override;
60+
void NominalTrajectory(int horizon);
6061

6162
// set action from policy
6263
void ActionFromPolicy(double* action, const double* state, double time,
@@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner {
111112

112113
// trajectories
113114
Trajectory trajectory[kMaxTrajectory];
114-
Trajectory elite_avg;
115+
Trajectory nominal_trajectory;
115116

116117
// order of indices of rolled out trajectories, ordered by total return
117118
std::vector<int> trajectory_order;
@@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner {
129130
// improvement
130131
double improvement;
131132

132-
// flags
133-
int processed_noise_status;
134-
135133
// timing
136134
std::atomic<double> noise_compute_time;
137135
double rollouts_compute_time;

python/mujoco_mpc/agent_test.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class AgentTest(parameterized.TestCase):
4646
def test_set_task_parameters(self):
4747
model_path = (
4848
pathlib.Path(__file__).parent.parent.parent
49-
/ "mjpc/tasks/cartpole/task.xml"
49+
/ "build/mjpc/tasks/cartpole/task.xml"
5050
)
5151
model = mujoco.MjModel.from_xml_path(str(model_path))
5252
with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -56,7 +56,7 @@ def test_set_task_parameters(self):
5656
def test_set_subprocess_working_dir(self):
5757
model_path = (
5858
pathlib.Path(__file__).parent.parent.parent
59-
/ "mjpc/tasks/cartpole/task.xml"
59+
/ "build/mjpc/tasks/cartpole/task.xml"
6060
)
6161
model = mujoco.MjModel.from_xml_path(str(model_path))
6262

@@ -76,7 +76,7 @@ def test_set_subprocess_working_dir(self):
7676
def test_step_env_with_planner(self):
7777
model_path = (
7878
pathlib.Path(__file__).parent.parent.parent
79-
/ "mjpc/tasks/particle/task_timevarying.xml"
79+
/ "build/mjpc/tasks/particle/task_timevarying.xml"
8080
)
8181
model = mujoco.MjModel.from_xml_path(str(model_path))
8282
data = mujoco.MjData(model)
@@ -109,7 +109,7 @@ def test_step_env_with_planner(self):
109109
def test_env_initialized_to_home_keyframe(self):
110110
model_path = (
111111
pathlib.Path(__file__).parent.parent.parent
112-
/ "mjpc/tasks/quadruped/task_flat.xml"
112+
/ "build/mjpc/tasks/quadruped/task_flat.xml"
113113
)
114114
model = mujoco.MjModel.from_xml_path(str(model_path))
115115

@@ -126,7 +126,7 @@ def test_action_averaging_doesnt_change_state(self, nominal):
126126
# out physics, but the API should be implemented not to mutate the state
127127
model_path = (
128128
pathlib.Path(__file__).parent.parent.parent
129-
/ "mjpc/tasks/cartpole/task.xml"
129+
/ "build/mjpc/tasks/cartpole/task.xml"
130130
)
131131
model = mujoco.MjModel.from_xml_path(str(model_path))
132132
data = mujoco.MjData(model)
@@ -161,7 +161,7 @@ def test_action_averaging_improves_control(self):
161161
# expect action averaging to be a bit better
162162
model_path = (
163163
pathlib.Path(__file__).parent.parent.parent
164-
/ "mjpc/tasks/cartpole/task.xml"
164+
/ "build/mjpc/tasks/cartpole/task.xml"
165165
)
166166
model = mujoco.MjModel.from_xml_path(str(model_path))
167167
data = mujoco.MjData(model)
@@ -213,7 +213,7 @@ def test_stepping_on_agent_side(self):
213213
"""Test an alternative way of stepping the physics, on the agent side."""
214214
model_path = (
215215
pathlib.Path(__file__).parent.parent.parent
216-
/ "mjpc/tasks/cartpole/task.xml"
216+
/ "build/mjpc/tasks/cartpole/task.xml"
217217
)
218218
model = mujoco.MjModel.from_xml_path(str(model_path))
219219
data = mujoco.MjData(model)
@@ -246,7 +246,7 @@ def test_stepping_on_agent_side(self):
246246
def test_set_cost_weights(self):
247247
model_path = (
248248
pathlib.Path(__file__).parent.parent.parent
249-
/ "mjpc/tasks/cartpole/task.xml"
249+
/ "build/mjpc/tasks/cartpole/task.xml"
250250
)
251251
model = mujoco.MjModel.from_xml_path(str(model_path))
252252

@@ -271,7 +271,7 @@ def test_set_cost_weights(self):
271271
def test_get_cost_weights(self):
272272
model_path = (
273273
pathlib.Path(__file__).parent.parent.parent
274-
/ "mjpc/tasks/cartpole/task.xml"
274+
/ "build/mjpc/tasks/cartpole/task.xml"
275275
)
276276
model = mujoco.MjModel.from_xml_path(str(model_path))
277277

@@ -310,7 +310,7 @@ def test_get_cost_weights(self):
310310
def test_set_state_with_lists(self):
311311
model_path = (
312312
pathlib.Path(__file__).parent.parent.parent
313-
/ "mjpc/tasks/particle/task_timevarying.xml"
313+
/ "build/mjpc/tasks/particle/task_timevarying.xml"
314314
)
315315
model = mujoco.MjModel.from_xml_path(str(model_path))
316316
data = mujoco.MjData(model)
@@ -330,7 +330,7 @@ def test_set_state_with_lists(self):
330330
def test_get_set_default_mode(self):
331331
model_path = (
332332
pathlib.Path(__file__).parent.parent.parent
333-
/ "mjpc/tasks/cartpole/task.xml"
333+
/ "build/mjpc/tasks/cartpole/task.xml"
334334
)
335335
model = mujoco.MjModel.from_xml_path(str(model_path))
336336
with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -341,7 +341,7 @@ def test_get_set_default_mode(self):
341341
def test_get_set_mode(self):
342342
model_path = (
343343
pathlib.Path(__file__).parent.parent.parent
344-
/ "mjpc/tasks/quadruped/task_flat.xml"
344+
/ "build/mjpc/tasks/quadruped/task_flat.xml"
345345
)
346346
model = mujoco.MjModel.from_xml_path(str(model_path))
347347
with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -352,7 +352,7 @@ def test_get_set_mode(self):
352352
def test_get_all_modes(self):
353353
model_path = (
354354
pathlib.Path(__file__).parent.parent.parent
355-
/ "mjpc/tasks/quadruped/task_flat.xml"
355+
/ "build/mjpc/tasks/quadruped/task_flat.xml"
356356
)
357357
model = mujoco.MjModel.from_xml_path(str(model_path))
358358
with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -365,7 +365,7 @@ def test_get_all_modes(self):
365365
def test_set_mode_error(self):
366366
model_path = (
367367
pathlib.Path(__file__).parent.parent.parent
368-
/ "mjpc/tasks/quadruped/task_flat.xml"
368+
/ "build/mjpc/tasks/quadruped/task_flat.xml"
369369
)
370370
model = mujoco.MjModel.from_xml_path(str(model_path))
371371
with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -374,7 +374,7 @@ def test_set_mode_error(self):
374374
def test_set_task_parameters_from_another_agent(self):
375375
model_path = (
376376
pathlib.Path(__file__).parent.parent.parent
377-
/ "mjpc/tasks/cartpole/task.xml"
377+
/ "build/mjpc/tasks/cartpole/task.xml"
378378
)
379379
model = mujoco.MjModel.from_xml_path(str(model_path))
380380
with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -393,7 +393,7 @@ def test_set_task_parameters_from_another_agent(self):
393393
def test_best_trajectory(self):
394394
model_path = (
395395
pathlib.Path(__file__).parent.parent.parent
396-
/ "mjpc/tasks/particle/task_timevarying.xml"
396+
/ "build/mjpc/tasks/particle/task_timevarying.xml"
397397
)
398398
model = mujoco.MjModel.from_xml_path(str(model_path))
399399
data = mujoco.MjData(model)
@@ -418,7 +418,7 @@ def test_best_trajectory(self):
418418
def test_set_mocap(self):
419419
model_path = (
420420
pathlib.Path(__file__).parent.parent.parent
421-
/ "mjpc/tasks/particle/task_timevarying.xml"
421+
/ "build/mjpc/tasks/particle/task_timevarying.xml"
422422
)
423423
model = mujoco.MjModel.from_xml_path(str(model_path))
424424
with agent_lib.Agent(task_id="ParticleFixed", model=model) as agent:

0 commit comments

Comments
 (0)