google-deepmind
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mjpc/planners/cross_entropy/planner.cc‎
Lines changed: 33 additions & 66 deletions b/‎mjpc/planners/cross_entropy/planner.cc‎
Lines changed: 33 additions & 66 deletions
diff --git a/‎mjpc/planners/cross_entropy/planner.h‎
Lines changed: 2 additions & 4 deletions b/‎mjpc/planners/cross_entropy/planner.h‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎python/mujoco_mpc/agent_test.py‎
Lines changed: 17 additions & 17 deletions b/‎python/mujoco_mpc/agent_test.py‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎python/mujoco_mpc/demos/direct/box_drop_smoother.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py‎ b/‎python/mujoco_mpc/demos/direct/box_drop_smoother.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py‎
diff --git a/‎python/mujoco_mpc/demos/direct/cartpole_trajopt.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py‎ b/‎python/mujoco_mpc/demos/direct/cartpole_trajopt.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py‎
diff --git a/‎python/mujoco_mpc/demos/direct/particle_smoother.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py‎ b/‎python/mujoco_mpc/demos/direct/particle_smoother.py‎ renamed to ‎python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py‎
@@ -16,6 +16,7 @@
 .vs/
 # Exclude temporary folders
 *.egg-info/
+.eggs/
 build/
 build_cmake/
 # Exclude macOS folder attributes
 
@@ -110,7 +110,7 @@ We provide a simple Python API for MJPC. This API is still experimental and expe
 - [direct.py](python/mujoco_mpc/direct.py) for available methods for direct optimization.
 
 ## Installing via Pip
-The MJPC Python module can be installed with:
+First, build MJPC (see above), then the MJPC Python module can be installed with:
 ```sh
 python "${MUJOCO_MPC_ROOT}/python/${API}.py" install
 ```
 
@@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() {
     trajectory[i].Allocate(kMaxTrajectoryHorizon);
     candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
   }
-
-  // elite average trajectory
-  elite_avg.Initialize(num_state, model->nu, task->num_residual,
-                       task->num_trace, kMaxTrajectoryHorizon);
-  elite_avg.Allocate(kMaxTrajectoryHorizon);
+  nominal_trajectory.Initialize(num_state, model->nu, task->num_residual,
+                                task->num_trace, kMaxTrajectoryHorizon);
+  nominal_trajectory.Allocate(kMaxTrajectoryHorizon);
 }
 
 // reset memory to zeros
@@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon,
     trajectory[i].Reset(kMaxTrajectoryHorizon);
     candidate_policy[i].Reset(horizon);
   }
-  elite_avg.Reset(kMaxTrajectoryHorizon);
+  nominal_trajectory.Reset(kMaxTrajectoryHorizon);
 
   for (const auto& d : data_) {
     mju_zero(d->ctrl, model->nu);
@@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) {
 
 // optimize nominal policy using random sampling
 void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
-  // check horizon
-  if (horizon != elite_avg.horizon) {
-    NominalTrajectory(horizon, pool);
-  }
-
   // if num_trajectory_ has changed, use it in this new iteration.
   // num_trajectory_ might change while this function runs. Keep it constant
   // for the duration of this function.
@@ -220,66 +213,29 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   int num_spline_points = resampled_policy.num_spline_points;
   int num_parameters = resampled_policy.num_parameters;
 
-  // reset parameters scratch to zero
-  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
-
-  // reset elite average
-  elite_avg.Reset(horizon);
-
-  // set elite average trajectory times
-  for (int tt = 0; tt <= horizon; tt++) {
-    elite_avg.times[tt] = time + tt * model->opt.timestep;
-  }
-
-  // best elite
-  int idx = trajectory_order[0];
+  // averaged return over elites
+  double avg_return = 0.0;
 
-  // add parameters
-  mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
-           num_parameters);
+  // reset parameters scratch
+  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
 
-  // copy first elite trajectory
-  mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
-           model->nu * (horizon - 1));
-  mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
-           trajectory[idx].dim_trace * horizon);
-  mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
-           elite_avg.dim_residual * horizon);
-  mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-  elite_avg.total_return = trajectory[idx].total_return;
-
-  // loop over remaining elites to compute average
-  for (int i = 1; i < n_elite; i++) {
+  // loop over elites to compute average
+  for (int i = 0; i < n_elite; i++) {
     // ordered trajectory index
     int idx = trajectory_order[i];
 
     // add parameters
     mju_addTo(parameters_scratch.data(),
               candidate_policy[idx].parameters.data(), num_parameters);
 
-    // add elite trajectory
-    mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
-              model->nu * (horizon - 1));
-    mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
-              trajectory[idx].dim_trace * horizon);
-    mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
-              elite_avg.dim_residual * horizon);
-    mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-    elite_avg.total_return += trajectory[idx].total_return;
+    // add total return
+    avg_return += trajectory[idx].total_return;
   }
 
   // normalize
   mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
           num_parameters);
-  mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
-          model->nu * (horizon - 1));
-  mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
-          elite_avg.dim_trace * horizon);
-  mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
-          elite_avg.dim_residual * horizon);
-  mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
-          horizon);
-  elite_avg.total_return /= n_elite;
+  avg_return /= n_elite;
 
   // loop over elites to compute variance
   std::fill(variance.begin(), variance.end(), 0.0);  // reset variance to zero
@@ -304,25 +260,28 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   }
 
   // improvement: compare nominal to elite average
-  improvement = mju_max(
-      elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
-      0.0);
+  improvement =
+      mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);
 
   // stop timer
   policy_update_compute_time = GetDuration(policy_update_start);
 }
 
 // compute trajectory using nominal policy
-void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+void CrossEntropyPlanner::NominalTrajectory(int horizon) {
   // set policy
   auto nominal_policy = [&cp = resampled_policy](
                             double* action, const double* state, double time) {
     cp.Action(action, state, time);
   };
 
   // rollout nominal policy
-  elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
-                    time, mocap.data(), userdata.data(), horizon);
+  nominal_trajectory.Rollout(nominal_policy, task, model,
+                             data_[ThreadPool::WorkerId()].get(), state.data(),
+                             time, mocap.data(), userdata.data(), horizon);
+}
+void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+  NominalTrajectory(horizon);
 }
 
 // set action from policy
@@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {
 
   LinearRange(resampled_policy.times.data(), time_shift,
               resampled_policy.times[0], num_spline_points);
+
+  resampled_policy.representation = policy.representation;
 }
 
 // add random noise to nominal policy
@@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
           state.data(), time, mocap.data(), userdata.data(), horizon);
     });
   }
-  pool.WaitCount(count_before + num_trajectory);
+  // nominal
+  pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); });
+
+  // wait
+  pool.WaitCount(count_before + num_trajectory + 1);
   pool.ResetCount();
 }
 
-// returns the nominal trajectory (this is the purple trace)
-const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
+// returns the **nominal** trajectory (this is the purple trace)
+const Trajectory* CrossEntropyPlanner::BestTrajectory() {
+  return &nominal_trajectory;
+}
 
 // visualize planner-specific traces
 void CrossEntropyPlanner::Traces(mjvScene* scn) {
 
@@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner {
 
   // compute trajectory using nominal policy
   void NominalTrajectory(int horizon, ThreadPool& pool) override;
+  void NominalTrajectory(int horizon);
 
   // set action from policy
   void ActionFromPolicy(double* action, const double* state, double time,
@@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner {
 
   // trajectories
   Trajectory trajectory[kMaxTrajectory];
-  Trajectory elite_avg;
+  Trajectory nominal_trajectory;
 
   // order of indices of rolled out trajectories, ordered by total return
   std::vector<int> trajectory_order;
@@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner {
   // improvement
   double improvement;
 
-  // flags
-  int processed_noise_status;
-
   // timing
   std::atomic<double> noise_compute_time;
   double rollouts_compute_time;
 
@@ -46,7 +46,7 @@ class AgentTest(parameterized.TestCase):
   def test_set_task_parameters(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -56,7 +56,7 @@ def test_set_task_parameters(self):
   def test_set_subprocess_working_dir(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
 
@@ -76,7 +76,7 @@ def test_set_subprocess_working_dir(self):
   def test_step_env_with_planner(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/particle/task_timevarying.xml"
+        / "build/mjpc/tasks/particle/task_timevarying.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -109,7 +109,7 @@ def test_step_env_with_planner(self):
   def test_env_initialized_to_home_keyframe(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/quadruped/task_flat.xml"
+        / "build/mjpc/tasks/quadruped/task_flat.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
 
@@ -126,7 +126,7 @@ def test_action_averaging_doesnt_change_state(self, nominal):
     # out physics, but the API should be implemented not to mutate the state
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -161,7 +161,7 @@ def test_action_averaging_improves_control(self):
     # expect action averaging to be a bit better
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -213,7 +213,7 @@ def test_stepping_on_agent_side(self):
     """Test an alternative way of stepping the physics, on the agent side."""
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -246,7 +246,7 @@ def test_stepping_on_agent_side(self):
   def test_set_cost_weights(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
 
@@ -271,7 +271,7 @@ def test_set_cost_weights(self):
   def test_get_cost_weights(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
 
@@ -310,7 +310,7 @@ def test_get_cost_weights(self):
   def test_set_state_with_lists(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/particle/task_timevarying.xml"
+        / "build/mjpc/tasks/particle/task_timevarying.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -330,7 +330,7 @@ def test_set_state_with_lists(self):
   def test_get_set_default_mode(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -341,7 +341,7 @@ def test_get_set_default_mode(self):
   def test_get_set_mode(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/quadruped/task_flat.xml"
+        / "build/mjpc/tasks/quadruped/task_flat.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -352,7 +352,7 @@ def test_get_set_mode(self):
   def test_get_all_modes(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/quadruped/task_flat.xml"
+        / "build/mjpc/tasks/quadruped/task_flat.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -365,7 +365,7 @@ def test_get_all_modes(self):
   def test_set_mode_error(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/quadruped/task_flat.xml"
+        / "build/mjpc/tasks/quadruped/task_flat.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Quadruped Flat", model=model) as agent:
@@ -374,7 +374,7 @@ def test_set_mode_error(self):
   def test_set_task_parameters_from_another_agent(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/cartpole/task.xml"
+        / "build/mjpc/tasks/cartpole/task.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="Cartpole", model=model) as agent:
@@ -393,7 +393,7 @@ def test_set_task_parameters_from_another_agent(self):
   def test_best_trajectory(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/particle/task_timevarying.xml"
+        / "build/mjpc/tasks/particle/task_timevarying.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     data = mujoco.MjData(model)
@@ -418,7 +418,7 @@ def test_best_trajectory(self):
   def test_set_mocap(self):
     model_path = (
         pathlib.Path(__file__).parent.parent.parent
-        / "mjpc/tasks/particle/task_timevarying.xml"
+        / "build/mjpc/tasks/particle/task_timevarying.xml"
     )
     model = mujoco.MjModel.from_xml_path(str(model_path))
     with agent_lib.Agent(task_id="ParticleFixed", model=model) as agent: