Skip to content

Commit 0819b85

Browse files
committed
merge
2 parents 6d2c04c + 189c25c commit 0819b85

33 files changed

+505
-116
lines changed

mjpc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ add_library(
3838
tasks/tasks.h
3939
tasks/acrobot/acrobot.cc
4040
tasks/acrobot/acrobot.h
41+
tasks/allegro/allegro.cc
42+
tasks/allegro/allegro.h
4143
tasks/bimanual/bimanual.cc
4244
tasks/bimanual/bimanual.h
4345
tasks/cartpole/cartpole.cc

mjpc/grpc/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ set(BUILD_SHARED_LIBS
2222

2323
find_package(ZLIB REQUIRED)
2424
set(gRPC_ZLIB_PROVIDER "package" CACHE INTERNAL "")
25+
set(gRPC_BUILD_GRPC_CSHARP_PLUGIN OFF)
26+
set(gRPC_BUILD_GRPC_NODE_PLUGIN OFF)
27+
set(gRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN OFF)
28+
set(gRPC_BUILD_GRPC_PHP_PLUGIN OFF)
29+
set(gRPC_BUILD_GRPC_RUBY_PLUGIN OFF)
30+
set(RE2_BUILD_TESTING OFF)
2531
set(ZLIB_BUILD_EXAMPLES OFF)
2632

2733
findorfetch(

mjpc/planners/gradient/planner.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ void GradientPlanner::Reset(int horizon,
144144
expected = 0.0;
145145
improvement = 0.0;
146146
surprise = 0.0;
147+
148+
// derivative skip
149+
derivative_skip_ = GetNumberOrDefault(0, model, "derivative_skip");
147150
}
148151

149152
// set state
@@ -191,6 +194,7 @@ void GradientPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
191194

192195
// update policy
193196
double c_best = c_prev;
197+
int skip = derivative_skip_;
194198
for (int i = 0; i < settings.max_rollout; i++) {
195199
// ----- model derivatives ----- //
196200
// start timer
@@ -200,7 +204,8 @@ void GradientPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
200204
model_derivative.Compute(
201205
model, data_, trajectory[0].states.data(), trajectory[0].actions.data(),
202206
trajectory[0].times.data(), dim_state, dim_state_derivative, dim_action,
203-
dim_sensor, horizon, settings.fd_tolerance, settings.fd_mode, pool);
207+
dim_sensor, horizon, settings.fd_tolerance, settings.fd_mode, pool,
208+
skip);
204209

205210
// stop timer
206211
model_derivative_time += GetDuration(model_derivative_start);
@@ -468,6 +473,7 @@ void GradientPlanner::GUI(mjUI& ui) {
468473
{mjITEM_SELECT, "Spline", 2, &policy.representation,
469474
"Zero\nLinear\nCubic"},
470475
{mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
476+
{mjITEM_SLIDERINT, "Deriv. Skip", 2, &derivative_skip_, "0 16"},
471477
{mjITEM_END}};
472478

473479
// set number of trajectory slider limits

mjpc/planners/gradient/planner.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ class GradientPlanner : public Planner {
160160

161161
private:
162162
mutable std::shared_mutex mtx_;
163+
int derivative_skip_ = 0;
163164
};
164165

165166
} // namespace mjpc

mjpc/planners/ilqg/planner.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ void iLQGPlanner::Reset(int horizon, const double* initial_repeated_action) {
137137
improvement = 0.0;
138138
expected = 0.0;
139139
surprise = 0.0;
140+
141+
// derivative skip
142+
derivative_skip_ = GetNumberOrDefault(0, model, "derivative_skip");
140143
}
141144

142145
// set state
@@ -248,6 +251,7 @@ void iLQGPlanner::GUI(mjUI& ui) {
248251
"Zero\nLinear\nCubic"},
249252
{mjITEM_SELECT, "Reg. Type", 2, &settings.regularization_type,
250253
"Control\nFeedback\nValue\nNone"},
254+
{mjITEM_SLIDERINT, "Deriv. Skip", 2, &derivative_skip_, "0 16"},
251255
{mjITEM_CHECKINT, "Terminal Print", 2, &settings.verbose, ""},
252256
{mjITEM_END}};
253257

@@ -393,7 +397,7 @@ void iLQGPlanner::Iteration(int horizon, ThreadPool& pool) {
393397
candidate_policy[0].trajectory.actions.data(),
394398
candidate_policy[0].trajectory.times.data(), dim_state,
395399
dim_state_derivative, dim_action, dim_sensor, horizon,
396-
settings.fd_tolerance, settings.fd_mode, pool);
400+
settings.fd_tolerance, settings.fd_mode, pool, derivative_skip_);
397401

398402
// stop timer
399403
double model_derivative_time = GetDuration(model_derivative_start);

mjpc/planners/ilqg/planner.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class iLQGPlanner : public Planner {
157157
private:
158158
int num_trajectory_ = 1;
159159
int num_rollouts_gui_ = 1;
160+
int derivative_skip_ = 0;
160161
};
161162

162163
} // namespace mjpc

mjpc/planners/model_derivatives.cc

Lines changed: 110 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -48,40 +48,119 @@ void ModelDerivatives::Compute(const mjModel* m,
4848
const double* h, int dim_state,
4949
int dim_state_derivative, int dim_action,
5050
int dim_sensor, int T, double tol, int mode,
51-
ThreadPool& pool) {
52-
{
53-
int count_before = pool.GetCount();
54-
for (int t = 0; t < T; t++) {
55-
pool.Schedule([&m, &data, &A = A, &B = B, &C = C, &D = D, &x, &u, &h,
56-
dim_state, dim_state_derivative, dim_action, dim_sensor,
57-
tol, mode, t, T]() {
58-
mjData* d = data[ThreadPool::WorkerId()].get();
59-
// set state
60-
SetState(m, d, x + t * dim_state);
61-
d->time = h[t];
62-
63-
// set action
64-
mju_copy(d->ctrl, u + t * dim_action, dim_action);
51+
ThreadPool& pool, int skip) {
52+
// reset indices
53+
evaluate_.clear();
54+
interpolate_.clear();
6555

66-
// Jacobians
67-
if (t == T - 1) {
68-
// Jacobians
69-
mjd_transitionFD(m, d, tol, mode, nullptr, nullptr,
70-
DataAt(C, t * (dim_sensor * dim_state_derivative)),
71-
nullptr);
72-
} else {
73-
// derivatives
74-
mjd_transitionFD(
75-
m, d, tol, mode,
76-
DataAt(A, t * (dim_state_derivative * dim_state_derivative)),
77-
DataAt(B, t * (dim_state_derivative * dim_action)),
78-
DataAt(C, t * (dim_sensor * dim_state_derivative)),
79-
DataAt(D, t * (dim_sensor * dim_action)));
80-
}
81-
});
56+
// evaluate indices
57+
int s = skip + 1;
58+
evaluate_.push_back(0);
59+
for (int t = s; t < T - s; t += s) {
60+
evaluate_.push_back(t);
61+
}
62+
evaluate_.push_back(T - 2);
63+
evaluate_.push_back(T - 1);
64+
65+
// interpolate indices
66+
for (int t = 0, e = 0; t < T; t++) {
67+
if (e == evaluate_.size() || evaluate_[e] > t) {
68+
interpolate_.push_back(t);
69+
} else {
70+
e++;
8271
}
83-
pool.WaitCount(count_before + T);
8472
}
73+
74+
// evaluate derivatives
75+
int count_before = pool.GetCount();
76+
for (int t : evaluate_) {
77+
pool.Schedule([&m, &data, &A = A, &B = B, &C = C, &D = D, &x, &u, &h,
78+
dim_state, dim_state_derivative, dim_action, dim_sensor, tol,
79+
mode, t, T]() {
80+
mjData* d = data[ThreadPool::WorkerId()].get();
81+
// set state
82+
SetState(m, d, x + t * dim_state);
83+
d->time = h[t];
84+
85+
// set action
86+
mju_copy(d->ctrl, u + t * dim_action, dim_action);
87+
88+
// Jacobians
89+
if (t == T - 1) {
90+
// Jacobians
91+
mjd_transitionFD(m, d, tol, mode, nullptr, nullptr,
92+
DataAt(C, t * (dim_sensor * dim_state_derivative)),
93+
nullptr);
94+
} else {
95+
// derivatives
96+
mjd_transitionFD(
97+
m, d, tol, mode,
98+
DataAt(A, t * (dim_state_derivative * dim_state_derivative)),
99+
DataAt(B, t * (dim_state_derivative * dim_action)),
100+
DataAt(C, t * (dim_sensor * dim_state_derivative)),
101+
DataAt(D, t * (dim_sensor * dim_action)));
102+
}
103+
});
104+
}
105+
pool.WaitCount(count_before + evaluate_.size());
106+
pool.ResetCount();
107+
108+
// interpolate derivatives
109+
count_before = pool.GetCount();
110+
for (int t : interpolate_) {
111+
pool.Schedule([&A = A, &B = B, &C = C, &D = D, &evaluate_ = this->evaluate_,
112+
dim_state_derivative, dim_action, dim_sensor, t]() {
113+
// find interval
114+
int bounds[2];
115+
FindInterval(bounds, evaluate_, t, evaluate_.size());
116+
int e0 = evaluate_[bounds[0]];
117+
int e1 = evaluate_[bounds[1]];
118+
119+
// normalized input
120+
double tt = double(t - e0) / double(e1 - e0);
121+
if (bounds[0] == bounds[1]) {
122+
tt = 0.0;
123+
}
124+
125+
// A
126+
int nA = dim_state_derivative * dim_state_derivative;
127+
double* Ai = DataAt(A, t * nA);
128+
const double* AL = DataAt(A, e0 * nA);
129+
const double* AU = DataAt(A, e1 * nA);
130+
131+
mju_scl(Ai, AL, 1.0 - tt, nA);
132+
mju_addToScl(Ai, AU, tt, nA);
133+
134+
// B
135+
int nB = dim_state_derivative * dim_action;
136+
double* Bi = DataAt(B, t * nB);
137+
const double* BL = DataAt(B, e0 * nB);
138+
const double* BU = DataAt(B, e1 * nB);
139+
140+
mju_scl(Bi, BL, 1.0 - tt, nB);
141+
mju_addToScl(Bi, BU, tt, nB);
142+
143+
// C
144+
int nC = dim_sensor * dim_state_derivative;
145+
double* Ci = DataAt(C, t * nC);
146+
const double* CL = DataAt(C, e0 * nC);
147+
const double* CU = DataAt(C, e1 * nC);
148+
149+
mju_scl(Ci, CL, 1.0 - tt, nC);
150+
mju_addToScl(Ci, CU, tt, nC);
151+
152+
// D
153+
int nD = dim_sensor * dim_action;
154+
double* Di = DataAt(D, t * nD);
155+
const double* DL = DataAt(D, e0 * nD);
156+
const double* DU = DataAt(D, e1 * nD);
157+
158+
mju_scl(Di, DL, 1.0 - tt, nD);
159+
mju_addToScl(Di, DU, tt, nD);
160+
});
161+
}
162+
163+
pool.WaitCount(count_before + interpolate_.size());
85164
pool.ResetCount();
86165
}
87166

mjpc/planners/model_derivatives.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ModelDerivatives {
4545
void Compute(const mjModel* m, const std::vector<UniqueMjData>& data,
4646
const double* x, const double* u, const double* h, int dim_state,
4747
int dim_state_derivative, int dim_action, int dim_sensor, int T,
48-
double tol, int mode, ThreadPool& pool);
48+
double tol, int mode, ThreadPool& pool, int skip = 0);
4949

5050
// Jacobians
5151
std::vector<double> A; // model Jacobians wrt state
@@ -56,6 +56,10 @@ class ModelDerivatives {
5656
// (T * dim_sensor * dim_state_derivative)
5757
std::vector<double> D; // output Jacobians wrt action
5858
// (T * dim_sensor * dim_action)
59+
60+
// indices
61+
std::vector<int> evaluate_;
62+
std::vector<int> interpolate_;
5963
};
6064

6165
} // namespace mjpc

mjpc/planners/sample_gradient/planner.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ void SampleGradientPlanner::Initialize(mjModel* model, const Task& task) {
4949
// task
5050
this->task = &task;
5151

52-
// rollout parameters
53-
timestep_power = 1.0;
54-
5552
// exploration noise
5653
noise_exploration = GetNumberOrDefault(0.1, model, "sampling_exploration");
5754

mjpc/planners/sample_gradient/planner.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@ class SampleGradientPlanner : public Planner {
122122
// order of indices of rolled out trajectories, ordered by total return
123123
std::vector<int> trajectory_order;
124124

125-
// rollout parameters
126-
double timestep_power;
127-
128125
// zero-mean Gaussian noise standard deviation
129126
double noise_exploration;
130127
std::vector<double> noise;

0 commit comments

Comments
 (0)