diff --git a/README.md b/README.md index d40873c1b..363a1498a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Google DeepMind. MJPC allows the user to easily author and solve complex robotics tasks, and currently supports multiple shooting-based planners. Derivative-based methods include iLQG and Gradient Descent, while derivative-free methods include a simple yet very competitive planner -called Predictive Sampling and the Cross Entropy Method (with diagonal covariance). +called Predictive Sampling, the Cross Entropy Method (with diagonal covariance), and Model Predictive Path Integral control (with fixed, diagonal covariance). - [Overview](#overview) - [Graphical User Interface](#graphical-user-interface) diff --git a/docs/OVERVIEW.md b/docs/OVERVIEW.md index dd2370178..5e72a7c1d 100644 --- a/docs/OVERVIEW.md +++ b/docs/OVERVIEW.md @@ -289,6 +289,9 @@ This library includes multiple planners that use different techniques to perform - **Cross Entropy Method** - all properties of Predictive Sampling - refits a nominal policy to mean of elite samples instead of using the best +- **Model Predictive Path Integral Control** + - all properties of Predictive Sampling + - refits a nominal policy to a weighted average of perturbed policies centered around the previous nominal policy - **Gradient Descent** - requires gradients - spline representation for controls diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt index bd795b9f5..63142cb3b 100644 --- a/mjpc/CMakeLists.txt +++ b/mjpc/CMakeLists.txt @@ -79,6 +79,8 @@ add_library( planners/model_derivatives.h planners/cross_entropy/planner.cc planners/cross_entropy/planner.h + planners/mppi/planner.cc + planners/mppi/planner.h planners/robust/robust_planner.cc planners/robust/robust_planner.h planners/sampling/planner.cc diff --git a/mjpc/planners/include.cc b/mjpc/planners/include.cc index 110de59ac..51a105f55 100644 --- a/mjpc/planners/include.cc +++ b/mjpc/planners/include.cc @@ -21,6 +21,7 @@ #include "mjpc/planners/gradient/planner.h" #include "mjpc/planners/ilqg/planner.h" #include "mjpc/planners/ilqs/planner.h" +#include "mjpc/planners/mppi/planner.h" #include "mjpc/planners/planner.h" #include "mjpc/planners/robust/robust_planner.h" #include "mjpc/planners/sampling/planner.h" @@ -32,7 +33,8 @@ const char kPlannerNames[] = "iLQG\n" "iLQS\n" "Robust Sampling\n" - "Cross Entropy"; + "Cross Entropy\n" + "MPPI"; // load all available planners std::vector> LoadPlanners() { @@ -46,6 +48,7 @@ std::vector> LoadPlanners() { planners.emplace_back( new RobustPlanner(std::make_unique())); planners.emplace_back(new mjpc::CrossEntropyPlanner); + planners.emplace_back(new mjpc::MPPIPlanner); return planners; } diff --git a/mjpc/planners/mppi/planner.cc b/mjpc/planners/mppi/planner.cc new file mode 100644 index 000000000..a9728af23 --- /dev/null +++ b/mjpc/planners/mppi/planner.cc @@ -0,0 +1,549 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mjpc/planners/mppi/planner.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "mjpc/array_safety.h" +#include "mjpc/planners/policy.h" +#include "mjpc/states/state.h" +#include "mjpc/trajectory.h" +#include "mjpc/utilities.h" + +namespace mjpc { + +namespace mju = ::mujoco::util_mjpc; + +// initialize data and settings +void MPPIPlanner::Initialize(mjModel* model, const Task& task) { + // delete mjData instances since model might have changed. + data_.clear(); + + // allocate one mjData for nominal. + ResizeMjData(model, 1); + + // model + this->model = model; + + // task + this->task = &task; + + // rollout parameters + timestep_power = 1.0; + + // sampling noise + noise_exploration_ = GetNumberOrDefault(0.1, model, "sampling_exploration"); + + // set number of trajectories to rollout + num_trajectory_ = GetNumberOrDefault(10, model, "sampling_trajectories"); + + // set the temperature of the cost energy distribution + lambda_ = GetNumberOrDefault(0.01, model, "lambda"); + + // initialize weights + std::fill(weight_vec.begin(), weight_vec.end(), 0.0); + denom = 0.0; + temp_weight = 0.0; + + if (num_trajectory_ > kMaxTrajectory) { + mju_error_i("Too many trajectories, %d is the maximum allowed.", + kMaxTrajectory); + } +} + +// allocate memory +void MPPIPlanner::Allocate() { + // initial state + int num_state = model->nq + model->nv + model->na; + + // state + state.resize(num_state); + mocap.resize(7 * model->nmocap); + userdata.resize(model->nuserdata); + + // policy + int num_max_parameter = model->nu * kMaxTrajectoryHorizon; + policy.Allocate(model, *task, kMaxTrajectoryHorizon); + resampled_policy.Allocate(model, *task, kMaxTrajectoryHorizon); + previous_policy.Allocate(model, *task, kMaxTrajectoryHorizon); + + // scratch + parameters_scratch.resize(num_max_parameter); + times_scratch.resize(kMaxTrajectoryHorizon); + + // noise + noise.resize(kMaxTrajectory * (model->nu * kMaxTrajectoryHorizon)); + + // allocating weights for MPPI update + weight_vec.resize(kMaxTrajectory); + + // need to initialize an arbitrary order of the trajectories + trajectory_order.resize(kMaxTrajectory); + for (int i = 0; i < kMaxTrajectory; i++) { + trajectory_order[i] = i; + } + + // trajectories and parameters + for (int i = 0; i < kMaxTrajectory; i++) { + trajectory[i].Initialize(num_state, model->nu, task->num_residual, + task->num_trace, kMaxTrajectoryHorizon); + trajectory[i].Allocate(kMaxTrajectoryHorizon); + candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon); + } + + // nominal trajectory + nominal_trajectory.Initialize(num_state, model->nu, task->num_residual, + task->num_trace, kMaxTrajectoryHorizon); + nominal_trajectory.Allocate(kMaxTrajectoryHorizon); +} + +// reset memory to zeros +void MPPIPlanner::Reset(int horizon, const double* initial_repeated_action) { + // state + std::fill(state.begin(), state.end(), 0.0); + std::fill(mocap.begin(), mocap.end(), 0.0); + std::fill(userdata.begin(), userdata.end(), 0.0); + time = 0.0; + + // policy parameters + policy.Reset(horizon, initial_repeated_action); + resampled_policy.Reset(horizon, initial_repeated_action); + previous_policy.Reset(horizon, initial_repeated_action); + + // scratch + std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0); + std::fill(times_scratch.begin(), times_scratch.end(), 0.0); + + // noise + std::fill(noise.begin(), noise.end(), 0.0); + + // trajectory samples + for (int i = 0; i < kMaxTrajectory; i++) { + trajectory[i].Reset(kMaxTrajectoryHorizon); + candidate_policy[i].Reset(horizon); + } + nominal_trajectory.Reset(kMaxTrajectoryHorizon); + + for (const auto& d : data_) { + mju_zero(d->ctrl, model->nu); + } + + // improvement + improvement = 0.0; +} + +// set state +void MPPIPlanner::SetState(const State& state) { + state.CopyTo(this->state.data(), this->mocap.data(), this->userdata.data(), + &this->time); +} + +// optimize nominal policy using random sampling +void MPPIPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { + // check horizon + if (horizon != nominal_trajectory.horizon) { + NominalTrajectory(horizon, pool); + } + + // if num_trajectory_ has changed, use it in this new iteration. + // num_trajectory_ might change while this function runs. Keep it constant + // for the duration of this function. + int num_trajectory = num_trajectory_; + + // resize number of mjData + ResizeMjData(model, pool.NumThreads()); + + // copy nominal policy + policy.num_parameters = model->nu * policy.num_spline_points; + { + const std::shared_lock lock(mtx_); + resampled_policy.CopyFrom(policy, policy.num_spline_points); + } + + // resample nominal policy to current time + this->ResamplePolicy(horizon); + + // ----- rollout noisy policies ----- // + // start timer + auto rollouts_start = std::chrono::steady_clock::now(); + + // simulate noisy policies + this->Rollouts(num_trajectory, horizon, pool); + + // sort candidate policies and trajectories by score + for (int i = 0; i < num_trajectory; i++) { + trajectory_order[i] = i; + } + + // sort so that the first ncandidates elements are the best candidates, and + // the rest are in an unspecified order + std::partial_sort( + trajectory_order.begin(), trajectory_order.begin() + num_trajectory, + trajectory_order.begin() + num_trajectory, + [&trajectory = trajectory](int a, int b) { + return trajectory[a].total_return < trajectory[b].total_return; + }); + + // stop timer + rollouts_compute_time = GetDuration(rollouts_start); + + // ----- update policy ----- // + // start timer + auto policy_update_start = std::chrono::steady_clock::now(); + + // reset parameters scratch to zero + std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0); + + // reset nominal trajectory + nominal_trajectory.Reset(horizon); + + // set nominal trajectory times + for (int tt = 0; tt <= horizon; tt++) { + nominal_trajectory.times[tt] = time + tt * model->opt.timestep; + } + + // best trajectory + int idx = trajectory_order[0]; + + // ----- MPPI update ----- // + temp_weight = 0.0; // storage for intermediate weights + denom = 0.0; + std::fill(weight_vec.begin(), weight_vec.end(), 0.0); + double lambda = lambda_; // fixed in this function + + // (1) computing MPPI weights + for (int i = 0; i < num_trajectory; i++) { + // subtract a baseline for variance reduction + numerical stability + double diff = trajectory[i].total_return - trajectory[idx].total_return; + temp_weight = std::exp(-diff / lambda); + denom += temp_weight; + weight_vec[i] = temp_weight; + } + + // (2) updating the distribution parameters + std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0); + for (int i = 0; i < num_trajectory; i++) { + // The vanilla MPPI update looks like + // mu <- mu + E[S(U) * dU] / E[S(U)], + // where U is the sequence of open loop inputs, S is the cost, and dU is the + // random deviation applied to the noise. If we take Monte Carlo + // approximations of the expectations, we can rewrite this update as + // mu <- mu + \sum_i{w_i * dU}, + // where \sum_i{w_i} = 1, so + // mu <- \sum_i{w_i * (mu + dU)}, where mu + dU = U. + + // add to parameters of nominal policy + mju_addToScl(parameters_scratch.data(), + candidate_policy[i].parameters.data(), weight_vec[i] / denom, + policy.num_parameters); + + // add to nominal trajectory + mju_addToScl(nominal_trajectory.actions.data(), + trajectory[i].actions.data(), weight_vec[i] / denom, + model->nu * (horizon - 1)); + mju_addToScl(nominal_trajectory.trace.data(), trajectory[i].trace.data(), + weight_vec[i] / denom, 3 * horizon); + mju_addToScl(nominal_trajectory.residual.data(), + trajectory[i].residual.data(), weight_vec[i] / denom, + nominal_trajectory.dim_residual * horizon); + mju_addToScl(nominal_trajectory.costs.data(), trajectory[i].costs.data(), + weight_vec[i] / denom, horizon); + nominal_trajectory.total_return += + trajectory[i].total_return * weight_vec[i] / denom; + } + + // update + { + const std::shared_lock lock(mtx_); + policy.CopyParametersFrom(parameters_scratch, times_scratch); + } + + // improvement: compare nominal to elite average + improvement = mju_max( + nominal_trajectory.total_return - trajectory[idx].total_return, 0.0); + + // stop timer + policy_update_compute_time = GetDuration(policy_update_start); +} + +// compute trajectory using nominal policy +void MPPIPlanner::NominalTrajectory(int horizon, ThreadPool& pool) { + // set policy + auto nominal_policy = [&cp = resampled_policy]( + double* action, const double* state, double time) { + cp.Action(action, state, time); + }; + + // rollout nominal policy + nominal_trajectory.Rollout(nominal_policy, task, model, data_[0].get(), + state.data(), time, mocap.data(), userdata.data(), + horizon); +} + +// set action from policy +void MPPIPlanner::ActionFromPolicy(double* action, const double* state, + double time, bool use_previous) { + const std::shared_lock lock(mtx_); + if (use_previous) { + previous_policy.Action(action, state, time); + } else { + policy.Action(action, state, time); + } +} + +// update policy via resampling +void MPPIPlanner::ResamplePolicy(int horizon) { + // dimensions + int num_parameters = resampled_policy.num_parameters; + int num_spline_points = resampled_policy.num_spline_points; + + // time + double nominal_time = time; + double time_shift = mju_max( + (horizon - 1) * model->opt.timestep / (num_spline_points - 1), 1.0e-5); + + // get spline points + for (int t = 0; t < num_spline_points; t++) { + times_scratch[t] = nominal_time; + resampled_policy.Action(DataAt(parameters_scratch, t * model->nu), nullptr, + nominal_time); + nominal_time += time_shift; + } + + // copy resampled policy parameters + mju_copy(resampled_policy.parameters.data(), parameters_scratch.data(), + num_parameters); + mju_copy(resampled_policy.times.data(), times_scratch.data(), + num_spline_points); + + // time step power scaling + PowerSequence(resampled_policy.times.data(), time_shift, + resampled_policy.times[0], + resampled_policy.times[num_spline_points - 1], timestep_power, + num_spline_points); +} + +// add random noise to nominal policy +void MPPIPlanner::AddNoiseToPolicy(int i) { + // start timer + auto noise_start = std::chrono::steady_clock::now(); + + // dimensions + int num_spline_points = candidate_policy[i].num_spline_points; + int num_parameters = candidate_policy[i].num_parameters; + + // sampling token + absl::BitGen gen_; + + // shift index + int shift = i * (model->nu * kMaxTrajectoryHorizon); + + // sample noise + double noise_exploration = noise_exploration_; // fixed for this func + for (int k = 0; k < num_parameters; k++) { + noise[k + shift] = absl::Gaussian(gen_, 0.0, noise_exploration); + } + + // add noise + mju_addTo(candidate_policy[i].parameters.data(), DataAt(noise, shift), + num_parameters); + + // clamp parameters + for (int t = 0; t < num_spline_points; t++) { + Clamp(DataAt(candidate_policy[i].parameters, t * model->nu), + model->actuator_ctrlrange, model->nu); + } + + // end timer + IncrementAtomic(noise_compute_time, GetDuration(noise_start)); +} + +// compute candidate trajectories +void MPPIPlanner::Rollouts(int num_trajectory, int horizon, ThreadPool& pool) { + // reset noise compute time + noise_compute_time = 0.0; + + // random search + int count_before = pool.GetCount(); + for (int i = 0; i < num_trajectory; i++) { + pool.Schedule([&s = *this, &model = this->model, &task = this->task, + &state = this->state, &time = this->time, + &mocap = this->mocap, &userdata = this->userdata, horizon, + i]() { + // copy nominal policy and sample noise + { + const std::shared_lock lock(s.mtx_); + s.candidate_policy[i].CopyFrom(s.resampled_policy, + s.resampled_policy.num_spline_points); + s.candidate_policy[i].representation = + s.resampled_policy.representation; + + // sample noise + s.AddNoiseToPolicy(i); + } + + // ----- rollout sample policy ----- // + + // policy + auto sample_policy_i = [&candidate_policy = s.candidate_policy, &i]( + double* action, const double* state, + double time) { + candidate_policy[i].Action(action, state, time); + }; + + // policy rollout + s.trajectory[i].Rollout( + sample_policy_i, task, model, s.data_[ThreadPool::WorkerId()].get(), + state.data(), time, mocap.data(), userdata.data(), horizon); + }); + } + pool.WaitCount(count_before + num_trajectory); + pool.ResetCount(); +} + +// returns the nominal trajectory (this is the purple trace) +const Trajectory* MPPIPlanner::BestTrajectory() { return &nominal_trajectory; } + +// visualize planner-specific traces +void MPPIPlanner::Traces(mjvScene* scn) { + // sample color + float color[4]; + color[0] = 1.0; + color[1] = 1.0; + color[2] = 1.0; + color[3] = 1.0; + + // width of a sample trace, in pixels + double width = GetNumberOrDefault(3, model, "agent_sample_width"); + + // scratch + double zero3[3] = {0}; + double zero9[9] = {0}; + + // best + auto best = this->BestTrajectory(); + + // sample traces + for (int k = 0; k < num_trajectory_; k++) { + // plot sample + for (int i = 0; i < best->horizon - 1; i++) { + if (scn->ngeom + task->num_trace > scn->maxgeom) break; + for (int j = 0; j < task->num_trace; j++) { + // initialize geometry + mjv_initGeom(&scn->geoms[scn->ngeom], mjGEOM_LINE, zero3, zero3, zero9, + color); + + // elite index + int idx = trajectory_order[k]; + // make geometry + mjv_makeConnector( + &scn->geoms[scn->ngeom], mjGEOM_LINE, width, + trajectory[idx].trace[3 * task->num_trace * i + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 2 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]); + + // increment number of geometries + scn->ngeom += 1; + } + } + } +} + +// planner-specific GUI elements +void MPPIPlanner::GUI(mjUI& ui) { + mjuiDef defMPPI[] = { + {mjITEM_SLIDERINT, "Rollouts", 2, &num_trajectory_, "0 1"}, + {mjITEM_SELECT, "Spline", 2, &policy.representation, + "Zero\nLinear\nCubic"}, + {mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"}, + {mjITEM_SLIDERNUM, "Noise Std", 2, &noise_exploration_, "0 1"}, + {mjITEM_SLIDERNUM, "Temperature", 2, &lambda_, "0.01 1.0"}, + {mjITEM_END}}; + + // set number of trajectory slider limits + mju::sprintf_arr(defMPPI[0].other, "%i %i", 1, kMaxTrajectory); + + // set spline point limits + mju::sprintf_arr(defMPPI[2].other, "%i %i", MinSamplingSplinePoints, + MaxSamplingSplinePoints); + + // set noise standard deviation limits + mju::sprintf_arr(defMPPI[3].other, "%f %f", MinNoiseStdDev, MaxNoiseStdDev); + + // add cross entropy planner + mjui_add(&ui, defMPPI); +} + +// planner-specific plots +void MPPIPlanner::Plots(mjvFigure* fig_planner, mjvFigure* fig_timer, + int planner_shift, int timer_shift, int planning, + int* shift) { + // ----- planner ----- // + double planner_bounds[2] = {-6.0, 6.0}; + + // improvement + mjpc::PlotUpdateData(fig_planner, planner_bounds, + fig_planner->linedata[0 + planner_shift][0] + 1, + mju_log10(mju_max(improvement, 1.0e-6)), 100, + 0 + planner_shift, 0, 1, -100); + + // legend + mju::strcpy_arr(fig_planner->linename[0 + planner_shift], "Avg - Best"); + + fig_planner->range[1][0] = planner_bounds[0]; + fig_planner->range[1][1] = planner_bounds[1]; + + // bounds + double timer_bounds[2] = {0.0, 1.0}; + + // ----- timer ----- // + + PlotUpdateData( + fig_timer, timer_bounds, fig_timer->linedata[0 + timer_shift][0] + 1, + 1.0e-3 * noise_compute_time * planning, 100, 0 + timer_shift, 0, 1, -100); + + PlotUpdateData(fig_timer, timer_bounds, + fig_timer->linedata[1 + timer_shift][0] + 1, + 1.0e-3 * rollouts_compute_time * planning, 100, + 1 + timer_shift, 0, 1, -100); + + PlotUpdateData(fig_timer, timer_bounds, + fig_timer->linedata[2 + timer_shift][0] + 1, + 1.0e-3 * policy_update_compute_time * planning, 100, + 2 + timer_shift, 0, 1, -100); + + // legend + mju::strcpy_arr(fig_timer->linename[0 + timer_shift], "Noise"); + mju::strcpy_arr(fig_timer->linename[1 + timer_shift], "Rollout"); + mju::strcpy_arr(fig_timer->linename[2 + timer_shift], "Policy Update"); + + // planner shift + shift[0] += 1; + + // timer shift + shift[1] += 3; +} + +} // namespace mjpc diff --git a/mjpc/planners/mppi/planner.h b/mjpc/planners/mppi/planner.h new file mode 100644 index 000000000..36b0fcce9 --- /dev/null +++ b/mjpc/planners/mppi/planner.h @@ -0,0 +1,145 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MJPC_PLANNERS_MPPI_PLANNER_H_ +#define MJPC_PLANNERS_MPPI_PLANNER_H_ + +#include + +#include +#include +#include +#include + +#include "mjpc/planners/planner.h" +#include "mjpc/planners/sampling/planner.h" +#include "mjpc/states/state.h" +#include "mjpc/trajectory.h" + +namespace mjpc { + +class MPPIPlanner : public Planner { + public: + // constructor + MPPIPlanner() = default; + + // destructor + ~MPPIPlanner() override = default; + + // ----- methods ----- // + + // initialize data and settings + void Initialize(mjModel* model, const Task& task) override; + + // allocate memory + void Allocate() override; + + // reset memory to zeros + void Reset(int horizon, + const double* initial_repeated_action = nullptr) override; + + // set state + void SetState(const State& state) override; + + // optimize nominal policy using random sampling + void OptimizePolicy(int horizon, ThreadPool& pool) override; + + // compute trajectory using nominal policy + void NominalTrajectory(int horizon, ThreadPool& pool) override; + + // set action from policy + void ActionFromPolicy(double* action, const double* state, double time, + bool use_previous = false) override; + + // resample nominal policy + void ResamplePolicy(int horizon); + + // add noise to nominal policy + void AddNoiseToPolicy(int i); + + // compute candidate trajectories + void Rollouts(int num_trajectory, int horizon, ThreadPool& pool); + + // return trajectory with best total return + const Trajectory* BestTrajectory() override; + + // visualize planner-specific traces + void Traces(mjvScene* scn) override; + + // planner-specific GUI elements + void GUI(mjUI& ui) override; + + // planner-specific plots + void Plots(mjvFigure* fig_planner, mjvFigure* fig_timer, int planner_shift, + int timer_shift, int planning, int* shift) override; + + // ----- members ----- // + mjModel* model; + const Task* task; + + // state + std::vector state; + double time; + std::vector mocap; + std::vector userdata; + + // policy + SamplingPolicy policy; // (Guarded by mtx_) + SamplingPolicy candidate_policy[kMaxTrajectory]; + SamplingPolicy resampled_policy; + SamplingPolicy previous_policy; + + // scratch + std::vector parameters_scratch; + std::vector times_scratch; + + // trajectories + Trajectory trajectory[kMaxTrajectory]; + Trajectory nominal_trajectory; // the weighted traj of rollouts + + // order of indices of rolled out trajectories, ordered by total return + std::vector trajectory_order; + + // rollout parameters + double timestep_power; + + // ----- noise ----- // + double noise_exploration_; // standard deviation for sampling normal: N(0, + // std) + std::vector noise; + + // improvement + double improvement; + + // flags + int processed_noise_status; + + // timing + std::atomic noise_compute_time; + double rollouts_compute_time; + double policy_update_compute_time; + + // mppi + double lambda_; // the temp of the energy-based model + std::vector weight_vec; // MPPI weights + double denom; // sum of weight_vec + double temp_weight; // temp variable for storing weights + + int num_trajectory_; + mutable std::shared_mutex mtx_; +}; + +} // namespace mjpc + +#endif // MJPC_PLANNERS_MPPI_PLANNER_H_