Skip to content

Commit 6b6b9ff

Browse files
authored
Merge branch 'main' into direct_optimizer_python
2 parents 3b9bb41 + 78e0e31 commit 6b6b9ff

File tree

29 files changed

+234
-240
lines changed

29 files changed

+234
-240
lines changed

cmake/MujocoLinkOptions.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ function(get_mujoco_extra_link_options OUTPUT_VAR)
2323
set(EXTRA_LINK_OPTIONS)
2424

2525
if(WIN32)
26-
set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=lld-link")
26+
set(CMAKE_REQUIRED_FLAGS "-fuse-ld=lld-link")
2727
check_c_source_compiles("int main() {}" SUPPORTS_LLD)
2828
if(SUPPORTS_LLD)
2929
set(EXTRA_LINK_OPTIONS
@@ -34,24 +34,24 @@ function(get_mujoco_extra_link_options OUTPUT_VAR)
3434
)
3535
endif()
3636
else()
37-
set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=lld")
37+
set(CMAKE_REQUIRED_FLAGS "-fuse-ld=lld")
3838
check_c_source_compiles("int main() {}" SUPPORTS_LLD)
3939
if(SUPPORTS_LLD)
4040
set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -fuse-ld=lld)
4141
else()
42-
set(CMAKE_REQUIRED_LINK_OPTIONS "-fuse-ld=gold")
42+
set(CMAKE_REQUIRED_FLAGS "-fuse-ld=gold")
4343
check_c_source_compiles("int main() {}" SUPPORTS_GOLD)
4444
if(SUPPORTS_GOLD)
4545
set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -fuse-ld=gold)
4646
endif()
4747
endif()
4848

49-
set(CMAKE_REQUIRED_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} "-Wl,--gc-sections")
49+
set(CMAKE_REQUIRED_FLAGS ${EXTRA_LINK_OPTIONS} "-Wl,--gc-sections")
5050
check_c_source_compiles("int main() {}" SUPPORTS_GC_SECTIONS)
5151
if(SUPPORTS_GC_SECTIONS)
5252
set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -Wl,--gc-sections)
5353
else()
54-
set(CMAKE_REQUIRED_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} "-Wl,-dead_strip")
54+
set(CMAKE_REQUIRED_FLAGS ${EXTRA_LINK_OPTIONS} "-Wl,-dead_strip")
5555
check_c_source_compiles("int main() {}" SUPPORTS_DEAD_STRIP)
5656
if(SUPPORTS_DEAD_STRIP)
5757
set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} -Wl,-dead_strip)

mjpc/agent.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class Agent {
136136
mjpc::Planner& ActivePlanner() const { return *planners_[planner_]; }
137137
mjpc::Estimator& ActiveEstimator() const { return *estimators_[estimator_]; }
138138
int ActiveEstimatorIndex() const { return estimator_; }
139+
double ComputeTime() const { return agent_compute_time_; }
139140
Task* ActiveTask() const { return tasks_[active_task_id_].get(); }
140141
// a residual function that can be used from trajectory rollouts. must only
141142
// be used from trajectory rollout threads (no locking).

mjpc/direct/direct.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#define MJPC_DIRECT_OPTIMIZER_H_
1717

1818
#include <memory>
19-
#include <mutex>
2019
#include <string>
2120
#include <vector>
2221

mjpc/estimators/batch.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "mjpc/array_safety.h"
2525
#include "mjpc/estimators/estimator.h"
2626
#include "mjpc/direct/direct.h"
27-
#include "mjpc/norm.h"
2827
#include "mjpc/threadpool.h"
2928
#include "mjpc/utilities.h"
3029

mjpc/grpc/agent_service.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ grpc::Status AgentService::Init(grpc::ServerContext* context,
118118
model = mj_copyModel(nullptr, agent_model);
119119
data_ = mj_makeData(model);
120120
rollout_data_.reset(mj_makeData(model));
121+
int home_id = mj_name2id(model, mjOBJ_KEY, "home");
122+
if (home_id >= 0) {
123+
mj_resetDataKeyframe(model, data_, home_id);
124+
mj_resetDataKeyframe(model, rollout_data_.get(), home_id);
125+
}
121126
mjcb_sensor = residual_sensor_callback;
122127

123128
agent_.SetState(data_);

mjpc/planners/cross_entropy/planner.cc

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,19 @@
1414

1515
#include "mjpc/planners/cross_entropy/planner.h"
1616

17-
#include <absl/random/random.h>
18-
#include <mujoco/mujoco.h>
19-
2017
#include <algorithm>
2118
#include <chrono>
2219
#include <cmath>
23-
#include <mutex>
2420
#include <shared_mutex>
2521

22+
#include <absl/random/random.h>
23+
#include <mujoco/mujoco.h>
2624
#include "mjpc/array_safety.h"
27-
#include "mjpc/planners/policy.h"
25+
#include "mjpc/planners/planner.h"
26+
#include "mjpc/planners/sampling/planner.h"
2827
#include "mjpc/states/state.h"
28+
#include "mjpc/task.h"
29+
#include "mjpc/threadpool.h"
2930
#include "mjpc/trajectory.h"
3031
#include "mjpc/utilities.h"
3132

@@ -47,9 +48,6 @@ void CrossEntropyPlanner::Initialize(mjModel* model, const Task& task) {
4748
// task
4849
this->task = &task;
4950

50-
// rollout parameters
51-
timestep_power = 1.0;
52-
5351
// sampling noise
5452
std_initial_ =
5553
GetNumberOrDefault(0.1, model,
@@ -138,7 +136,7 @@ void CrossEntropyPlanner::Reset(int horizon,
138136

139137
// variance
140138
double var = std_initial_ * std_initial_;
141-
fill(variance.begin(), variance.end(), var);
139+
std::fill(variance.begin(), variance.end(), var);
142140

143141
// trajectory samples
144142
for (int i = 0; i < kMaxTrajectory; i++) {
@@ -362,11 +360,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {
362360
mju_copy(resampled_policy.times.data(), times_scratch.data(),
363361
num_spline_points);
364362

365-
// time step power scaling
366-
PowerSequence(resampled_policy.times.data(), time_shift,
367-
resampled_policy.times[0],
368-
resampled_policy.times[num_spline_points - 1], timestep_power,
369-
num_spline_points);
363+
LinearRange(resampled_policy.times.data(), time_shift,
364+
resampled_policy.times[0], num_spline_points);
370365
}
371366

372367
// add random noise to nominal policy
@@ -513,8 +508,6 @@ void CrossEntropyPlanner::GUI(mjUI& ui) {
513508
{mjITEM_SELECT, "Spline", 2, &policy.representation,
514509
"Zero\nLinear\nCubic"},
515510
{mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
516-
// {mjITEM_SLIDERNUM, "Spline Pow. ", 2, &timestep_power, "0 10"},
517-
// {mjITEM_SELECT, "Noise type", 2, &noise_type, "Gaussian\nUniform"},
518511
{mjITEM_SLIDERNUM, "Init. Std", 2, &std_initial_, "0 1"},
519512
{mjITEM_SLIDERNUM, "Min. Std", 2, &std_min_, "0.01 0.5"},
520513
{mjITEM_SLIDERINT, "Elite", 2, &n_elite_, "2 128"},

mjpc/planners/cross_entropy/planner.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@
1515
#ifndef MJPC_PLANNERS_CROSS_ENTROPY_PLANNER_H_
1616
#define MJPC_PLANNERS_CROSS_ENTROPY_PLANNER_H_
1717

18-
#include <mujoco/mujoco.h>
19-
2018
#include <atomic>
21-
#include <memory>
2219
#include <shared_mutex>
2320
#include <vector>
2421

22+
#include <mujoco/mujoco.h>
2523
#include "mjpc/planners/planner.h"
26-
#include "mjpc/planners/sampling/planner.h"
24+
#include "mjpc/planners/sampling/policy.h"
2725
#include "mjpc/states/state.h"
26+
#include "mjpc/task.h"
27+
#include "mjpc/threadpool.h"
2828
#include "mjpc/trajectory.h"
2929

3030
namespace mjpc {
@@ -116,9 +116,6 @@ class CrossEntropyPlanner : public Planner {
116116
// order of indices of rolled out trajectories, ordered by total return
117117
std::vector<int> trajectory_order;
118118

119-
// rollout parameters
120-
double timestep_power;
121-
122119
// ----- noise ----- //
123120
double std_initial_; // standard deviation for sampling normal: N(0,
124121
// std)

mjpc/planners/gradient/planner.cc

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ void GradientPlanner::Initialize(mjModel* model, const Task& task) {
4949
// task
5050
this->task = &task;
5151

52-
// rollout parameters
53-
timestep_power = 1.0;
54-
5552
// dimensions
5653
dim_state = model->nq + model->nv + model->na; // state dimension
5754
dim_state_derivative =
@@ -374,11 +371,8 @@ void GradientPlanner::ResamplePolicy(int horizon) {
374371
mju_copy(candidate_policy[0].times.data(), times_scratch.data(),
375372
num_spline_points);
376373

377-
// time step power scaling
378-
PowerSequence(candidate_policy[0].times.data(), time_shift,
379-
candidate_policy[0].times[0],
380-
candidate_policy[0].times[num_spline_points - 1],
381-
timestep_power, num_spline_points);
374+
LinearRange(candidate_policy[0].times.data(), time_shift,
375+
candidate_policy[0].times[0], num_spline_points);
382376
}
383377

384378
// compute candidate trajectories
@@ -474,7 +468,6 @@ void GradientPlanner::GUI(mjUI& ui) {
474468
{mjITEM_SELECT, "Spline", 2, &policy.representation,
475469
"Zero\nLinear\nCubic"},
476470
{mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
477-
// {mjITEM_SLIDERNUM, "Spline Pow. ", 2, &timestep_power, "0 10"},
478471
{mjITEM_END}};
479472

480473
// set number of trajectory slider limits

mjpc/planners/gradient/planner.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,6 @@ class GradientPlanner : public Planner {
123123
Trajectory trajectory[kMaxTrajectory];
124124
int num_trajectory;
125125

126-
// rollout parameters
127-
double timestep_power;
128-
129126
// model derivatives
130127
ModelDerivatives model_derivative;
131128

mjpc/planners/gradient/spline_mapping.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,27 @@ namespace mjpc {
2525
// ----- spline constants ----- //
2626
inline constexpr int kMinGradientSplinePoints = 1;
2727
inline constexpr int kMaxGradientSplinePoints = 25;
28-
inline constexpr int kMinGradientSplinePower = 1;
29-
inline constexpr int kMaxGradientSplinePower = 5;
3028

3129
// matrix representation for mapping between spline points and interpolated time
32-
// series
30+
// series.
31+
// A spline is made of num_input points, and each has one associated time, and
32+
// `dim` associated parameters.
33+
// The time series has num_output entries, each associated with a time and with
34+
// dim associated values.
35+
//
36+
// For sampling policies, we have
37+
// dim = model->nu
38+
// num_input = num_spline_points
39+
// num_output = trajectory_length
40+
//
41+
// The mapping is a matrix, A, of shape (dim*num_output) x (dim*num_input),
42+
// which can be used to go from spline parameters to the values of the sampled
43+
// time series, assuming a fixed set of times.
44+
//
45+
// Given a vector of containing spline parameters, v (length=dim*num_input),
46+
// flattened so that the parameters for each spline point are next to each
47+
// other, A*v gives the corresponding interpolated values, sampled at
48+
// output_times.
3349
class SplineMapping {
3450
public:
3551
// constructor

0 commit comments

Comments
 (0)