google-deepmind
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cmake/MujocoLinkOptions.cmake‎
Lines changed: 5 additions & 5 deletions b/‎cmake/MujocoLinkOptions.cmake‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/OVERVIEW.md‎
Lines changed: 5 additions & 2 deletions b/‎docs/OVERVIEW.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎mjpc/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎mjpc/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎mjpc/agent.cc‎
Lines changed: 2 additions & 2 deletions b/‎mjpc/agent.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎mjpc/direct/direct.cc‎
Lines changed: 10 additions & 25 deletions b/‎mjpc/direct/direct.cc‎
Lines changed: 10 additions & 25 deletions
diff --git a/‎mjpc/direct/direct.h‎
Lines changed: 0 additions & 1 deletion b/‎mjpc/direct/direct.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mjpc/estimators/batch.cc‎
Lines changed: 0 additions & 12 deletions b/‎mjpc/estimators/batch.cc‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎mjpc/estimators/batch.h‎
Lines changed: 1 addition & 2 deletions b/‎mjpc/estimators/batch.h‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎mjpc/grpc/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎mjpc/grpc/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
@@ -16,9 +16,9 @@ real-time predictive control with [MuJoCo](https://mujoco.org/), developed by
 Google DeepMind.
 
 MJPC allows the user to easily author and solve complex robotics tasks, and
-currently supports three shooting-based planners: derivative-based iLQG and
-Gradient Descent, and a simple yet very competitive derivative-free method
-called Predictive Sampling.
+currently supports multiple shooting-based planners. Derivative-based methods include iLQG and
+Gradient Descent, while derivative-free methods include a simple yet very competitive planner
+called Predictive Sampling and the Cross Entropy Method (with diagonal covariance).
 
 - [Overview](#overview)
 - [Graphical User Interface](#graphical-user-interface)
 
@@ -23,7 +23,7 @@ function(get_mujoco_extra_link_options OUTPUT_VAR)
     set(EXTRA_LINK_OPTIONS)
 
     if(WIN32)
-      set(CMAKE_REQUIRED_FLAGS "-fuse-ld=lld-link")
+      set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=lld-link")
       check_c_source_compiles("int main() {}" SUPPORTS_LLD)
       if(SUPPORTS_LLD)
         set(EXTRA_LINK_OPTIONS
@@ -34,24 +34,24 @@ function(get_mujoco_extra_link_options OUTPUT_VAR)
         )
       endif()
     else()
-      set(CMAKE_REQUIRED_FLAGS "-fuse-ld=lld")
+      set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=lld")
       check_c_source_compiles("int main() {}" SUPPORTS_LLD)
       if(SUPPORTS_LLD)
         set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -fuse-ld=lld)
       else()
-        set(CMAKE_REQUIRED_FLAGS "-fuse-ld=gold")
+        set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=gold")
         check_c_source_compiles("int main() {}" SUPPORTS_GOLD)
         if(SUPPORTS_GOLD)
           set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -fuse-ld=gold)
         endif()
       endif()
 
-      set(CMAKE_REQUIRED_FLAGS ${EXTRA_LINK_OPTIONS} "-Wl,--gc-sections")
+      set(CMAKE_REQUIRED_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} "-Wl,--gc-sections")
       check_c_source_compiles("int main() {}" SUPPORTS_GC_SECTIONS)
       if(SUPPORTS_GC_SECTIONS)
         set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -Wl,--gc-sections)
       else()
-        set(CMAKE_REQUIRED_FLAGS ${EXTRA_LINK_OPTIONS} "-Wl,-dead_strip")
+        set(CMAKE_REQUIRED_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} "-Wl,-dead_strip")
         check_c_source_compiles("int main() {}" SUPPORTS_DEAD_STRIP)
         if(SUPPORTS_DEAD_STRIP)
           set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -Wl,-dead_strip)
 
@@ -132,7 +132,7 @@ Values in brackets should be replaced by the designer.
 
 `Agent` settings can be specified by prepending `agent_` for corresponding class members.
 
-`Planner` settings can similarly be specified by prepending the corresponding optimizer name, (e.g., `sampling_`, `gradient_`, `ilqg_`).
+`Planner` settings can similarly be specified by prepending the corresponding optimizer name, (e.g., `sampling_`, `cross_entropy_`, `gradient_`, `ilqg_`).
 
 It is also possible to create GUI elements for  parameters that are passed to the residual function.  These are specified by the prefix `residual_`, when the suffix will be the display name of the slider:
 
@@ -280,12 +280,15 @@ Additionally, custom labeled buttons can be added to the GUI by specifying a str
 
 The purpose of `Planner` is to find improved policies using numerical optimization.
 
-This library includes three planners that use different techniques to perform this search:
+This library includes multiple planners that use different techniques to perform this search:
 
 - **Predictive Sampling**
   - random search
   - derivative free
   - spline representation for controls
+- **Cross Entropy Method**
+  - all properties of Predictive Sampling
+  - refits a nominal policy to mean of elite samples instead of using the best
 - **Gradient Descent**
   - requires gradients
   - spline representation for controls
 
@@ -40,6 +40,8 @@ add_library(
   tasks/acrobot/acrobot.h
   tasks/cartpole/cartpole.cc
   tasks/cartpole/cartpole.h
+  tasks/cube/solve.cc
+  tasks/cube/solve.h
   tasks/fingers/fingers.cc
   tasks/fingers/fingers.h
   tasks/hand/hand.cc
@@ -54,6 +56,8 @@ add_library(
   tasks/manipulation/common.h
   tasks/manipulation/manipulation.cc
   tasks/manipulation/manipulation.h
+  tasks/op3/stand.cc
+  tasks/op3/stand.h
   tasks/panda/panda.cc
   tasks/panda/panda.h
   tasks/particle/particle.cc
@@ -75,6 +79,8 @@ add_library(
   planners/cost_derivatives.h
   planners/model_derivatives.cc
   planners/model_derivatives.h
+  planners/cross_entropy/planner.cc
+  planners/cross_entropy/planner.h
   planners/robust/robust_planner.cc
   planners/robust/robust_planner.h
   planners/sampling/planner.cc
 
@@ -105,7 +105,7 @@ void Agent::Initialize(const mjModel* model) {
   state.Initialize(model);
 
   // initialize estimator
-  if (reset_estimator) {
+  if (reset_estimator && estimator_enabled) {
     for (const auto& estimator : estimators_) {
       estimator->Initialize(model_);
       estimator->Reset();
@@ -169,7 +169,7 @@ void Agent::Reset(const double* initial_repeated_action) {
   state.Reset();
 
   // estimator
-  if (reset_estimator) {
+  if (reset_estimator && estimator_enabled) {
     for (const auto& estimator : estimators_) {
       estimator->Reset();
     }
 
@@ -148,9 +148,6 @@ void Direct::Initialize(const mjModel* model) {
   act.Initialize(na, configuration_length_);
   times.Initialize(1, configuration_length_);
 
-  // ctrl
-  ctrl.Initialize(model->nu, configuration_length_);
-
   // prior
   configuration_previous.Initialize(nq, configuration_length_);
 
@@ -348,7 +345,6 @@ void Direct::Reset(const mjData* data) {
   acceleration.Reset();
   act.Reset();
   times.Reset();
-  ctrl.Reset();
 
   // prior
   configuration_previous.Reset();
@@ -637,8 +633,6 @@ void Direct::SetConfigurationLength(int length) {
   act.SetLength(configuration_length_);
   times.SetLength(configuration_length_);
 
-  ctrl.SetLength(configuration_length_);
-
   configuration_previous.SetLength(configuration_length_);
 
   sensor_measurement.SetLength(configuration_length_);
@@ -1489,8 +1483,7 @@ void Direct::InverseDynamicsPrediction() {
   auto start = std::chrono::steady_clock::now();
 
   // dimension
-  int nq = model->nq, nv = model->nv, na = model->na, nu = model->nu,
-      ns = nsensordata_;
+  int nq = model->nq, nv = model->nv, na = model->na, ns = nsensordata_;
 
   // set parameters
   if (nparam_ > 0) {
@@ -1502,7 +1495,7 @@ void Direct::InverseDynamicsPrediction() {
   int count_before = pool_.GetCount();
 
   // first time step
-  pool_.Schedule([&batch = *this, nq, nv, nu]() {
+  pool_.Schedule([&batch = *this, nq, nv]() {
     // time index
     int t = 0;
 
@@ -1518,7 +1511,6 @@ void Direct::InverseDynamicsPrediction() {
     mju_copy(d->qpos, q0, nq);
     mju_zero(d->qvel, nv);
     mju_zero(d->qacc, nv);
-    mju_zero(d->ctrl, nu);
     d->time = batch.times.Get(t)[0];
 
     // position sensors
@@ -1551,12 +1543,11 @@ void Direct::InverseDynamicsPrediction() {
   // loop over predictions
   for (int t = 1; t < configuration_length_ - 1; t++) {
     // schedule
-    pool_.Schedule([&batch = *this, nq, nv, na, ns, nu, t]() {
+    pool_.Schedule([&batch = *this, nq, nv, na, ns, t]() {
       // terms
       double* qt = batch.configuration.Get(t);
       double* vt = batch.velocity.Get(t);
       double* at = batch.acceleration.Get(t);
-      double* ct = batch.ctrl.Get(t);
 
       // data
       mjData* d = batch.data_[t].get();
@@ -1565,7 +1556,6 @@ void Direct::InverseDynamicsPrediction() {
       mju_copy(d->qpos, qt, nq);
       mju_copy(d->qvel, vt, nv);
       mju_copy(d->qacc, at, nv);
-      mju_copy(d->ctrl, ct, nu);
 
       // inverse dynamics
       mj_inverse(batch.model, d);
@@ -1585,7 +1575,7 @@ void Direct::InverseDynamicsPrediction() {
   }
 
   // last time step
-  pool_.Schedule([&batch = *this, nq, nv, nu]() {
+  pool_.Schedule([&batch = *this, nq, nv]() {
     // time index
     int t = batch.ConfigurationLength() - 1;
 
@@ -1602,7 +1592,6 @@ void Direct::InverseDynamicsPrediction() {
     mju_copy(d->qpos, qT, nq);
     mju_copy(d->qvel, vT, nv);
     mju_zero(d->qacc, nv);
-    mju_zero(d->ctrl, nu);
     d->time = batch.times.Get(t)[0];
 
     // position sensors
@@ -1653,7 +1642,7 @@ void Direct::InverseDynamicsDerivatives() {
   auto start = std::chrono::steady_clock::now();
 
   // dimension
-  int nq = model->nq, nv = model->nv, nu = model->nu;
+  int nq = model->nq, nv = model->nv;
 
   // set parameters
   if (nparam_ > 0) {
@@ -1665,7 +1654,7 @@ void Direct::InverseDynamicsDerivatives() {
   int count_before = pool_.GetCount();
 
   // first time step
-  pool_.Schedule([&batch = *this, nq, nv, nu]() {
+  pool_.Schedule([&batch = *this, nq, nv]() {
     // time index
     int t = 0;
 
@@ -1680,7 +1669,6 @@ void Direct::InverseDynamicsDerivatives() {
     mju_copy(d->qpos, q0, nq);
     mju_zero(d->qvel, nv);
     mju_zero(d->qacc, nv);
-    mju_zero(d->ctrl, nu);
     d->time = batch.times.Get(t)[0];
 
     // finite-difference derivatives
@@ -1725,12 +1713,11 @@ void Direct::InverseDynamicsDerivatives() {
   // loop over predictions
   for (int t = 1; t < configuration_length_ - 1; t++) {
     // schedule
-    pool_.Schedule([&batch = *this, nq, nv, nu, t]() {
+    pool_.Schedule([&batch = *this, nq, nv, t]() {
       // unpack
       double* q = batch.configuration.Get(t);
       double* v = batch.velocity.Get(t);
       double* a = batch.acceleration.Get(t);
-      double* c = batch.ctrl.Get(t);
 
       double* dsdq = batch.block_sensor_configuration_.Get(t);
       double* dsdv = batch.block_sensor_velocity_.Get(t);
@@ -1743,11 +1730,10 @@ void Direct::InverseDynamicsDerivatives() {
       double* dadf = batch.block_force_acceleration_.Get(t);
       mjData* data = batch.data_[t].get();  // TODO(taylor): WorkerID
 
-      // set (state, acceleration) + ctrl
+      // set state, acceleration
       mju_copy(data->qpos, q, nq);
       mju_copy(data->qvel, v, nv);
       mju_copy(data->qacc, a, nv);
-      mju_copy(data->ctrl, c, nu);
 
       // finite-difference derivatives
       mjd_inverseFD(batch.model, data, batch.finite_difference.tolerance,
@@ -1767,7 +1753,7 @@ void Direct::InverseDynamicsDerivatives() {
   }
 
   // last time step
-  pool_.Schedule([&batch = *this, nq, nv, nu]() {
+  pool_.Schedule([&batch = *this, nq, nv]() {
     // time index
     int t = batch.ConfigurationLength() - 1;
 
@@ -1784,7 +1770,6 @@ void Direct::InverseDynamicsDerivatives() {
     mju_copy(d->qpos, qT, nq);
     mju_copy(d->qvel, vT, nv);
     mju_zero(d->qacc, nv);
-    mju_zero(d->ctrl, nu);
     d->time = batch.times.Get(t)[0];
 
     // finite-difference derivatives
@@ -2061,7 +2046,7 @@ double Direct::Cost(double* gradient, double* hessian) {
     // set dense rows in band Hessian
     if (hessian) {
       mju_copy(hessian + nvel_ * nband_, dense_parameter_.data(),
-             nparam_ * ntotal_);
+               nparam_ * ntotal_);
     }
   }
 
 
@@ -161,7 +161,6 @@ class Direct {
   DirectTrajectory<double> acceleration;            // nv x T
   DirectTrajectory<double> act;                     // na x T
   DirectTrajectory<double> times;                   //  1 x T
-  DirectTrajectory<double> ctrl;                    // nu x T
   DirectTrajectory<double> sensor_measurement;      // ns x T
   DirectTrajectory<double> sensor_prediction;       // ns x T
   DirectTrajectory<int> sensor_mask;                // num_sensor x T
 
@@ -119,9 +119,6 @@ void Batch::Initialize(const mjModel* model) {
   act_cache_.Initialize(na, max_history_);
   times_cache_.Initialize(1, max_history_);
 
-  // ctrl
-  ctrl_cache_.Initialize(model->nu, max_history_);
-
   // prior
   configuration_previous_cache_.Initialize(nq, max_history_);
 
@@ -241,7 +238,6 @@ void Batch::Reset(const mjData* data) {
   acceleration_cache_.Reset();
   act_cache_.Reset();
   times_cache_.Reset();
-  ctrl_cache_.Reset();
 
   // prior
   configuration_previous_cache_.Reset();
@@ -325,9 +321,6 @@ void Batch::Update(const double* ctrl, const double* sensor) {
   // set next time
   times.Set(&d->time, t + 1);
 
-  // set ctrl
-  this->ctrl.Set(ctrl, t);
-
   // set sensor
   sensor_measurement.Set(sensor + sensor_start_index_, t);
 
@@ -527,7 +520,6 @@ void Batch::Shift(int shift) {
   acceleration.Shift(shift);
   act.Shift(shift);
   times.Shift(shift);
-  ctrl.Shift(shift);
 
   configuration_previous.Shift(shift);
 
@@ -815,7 +807,6 @@ void Batch::ShiftResizeTrajectory(int new_head, int new_length) {
   acceleration_cache_.Reset();
   act_cache_.Reset();
   times_cache_.Reset();
-  ctrl_cache_.Reset();
   sensor_measurement_cache_.Reset();
   sensor_prediction_cache_.Reset();
   sensor_mask_cache_.Reset();
@@ -831,7 +822,6 @@ void Batch::ShiftResizeTrajectory(int new_head, int new_length) {
   acceleration_cache_.SetLength(length);
   act_cache_.SetLength(length);
   times_cache_.SetLength(length);
-  ctrl_cache_.SetLength(length);
   sensor_measurement_cache_.SetLength(length);
   sensor_prediction_cache_.SetLength(length);
   sensor_mask_cache_.SetLength(length);
@@ -846,7 +836,6 @@ void Batch::ShiftResizeTrajectory(int new_head, int new_length) {
     acceleration_cache_.Set(acceleration.Get(i), i);
     act_cache_.Set(act.Get(i), i);
     times_cache_.Set(times.Get(i), i);
-    ctrl_cache_.Set(ctrl.Get(i), i);
     sensor_measurement_cache_.Set(sensor_measurement.Get(i), i);
     sensor_prediction_cache_.Set(sensor_prediction.Get(i), i);
     sensor_mask_cache_.Set(sensor_mask.Get(i), i);
@@ -867,7 +856,6 @@ void Batch::ShiftResizeTrajectory(int new_head, int new_length) {
     acceleration.Set(acceleration_cache_.Get(new_head + i), i);
     act.Set(act_cache_.Get(new_head + i), i);
     times.Set(times_cache_.Get(new_head + i), i);
-    ctrl.Set(ctrl_cache_.Get(new_head + i), i);
     sensor_measurement.Set(sensor_measurement_cache_.Get(new_head + i), i);
     sensor_prediction.Set(sensor_prediction_cache_.Get(new_head + i), i);
     sensor_mask.Set(sensor_mask_cache_.Get(new_head + i), i);
 
@@ -32,7 +32,7 @@
 namespace mjpc {
 
 // max filter history
-inline constexpr int kMaxFilterHistory = 128;
+inline constexpr int kMaxFilterHistory = 64;
 
 // ----- batch estimator ----- //
 // based on: "Physically-Consistent Sensor Fusion in Contact-Rich Behaviors"
@@ -239,7 +239,6 @@ class Batch : public Direct, public Estimator {
   DirectTrajectory<double> acceleration_cache_;            // nv x T
   DirectTrajectory<double> act_cache_;                     // na x T
   DirectTrajectory<double> times_cache_;                   //  1 x T
-  DirectTrajectory<double> ctrl_cache_;                    // nu x T
   DirectTrajectory<double> sensor_measurement_cache_;      // ns x T
   DirectTrajectory<double> sensor_prediction_cache_;       // ns x T
   DirectTrajectory<int> sensor_mask_cache_;                // num_sensor x T
 
@@ -37,7 +37,7 @@ findorfetch(
 
 find_package(ZLIB REQUIRED)
 set(gRPC_ZLIB_PROVIDER "package" CACHE INTERNAL "")
-
+set(ZLIB_BUILD_EXAMPLES OFF)
 set(_PROTOBUF_LIBPROTOBUF libprotobuf)
 set(_REFLECTION grpc++_reflection)
 set(_PROTOBUF_PROTOC $<TARGET_FILE:protoc>)