Skip to content

Commit 8ef5ded

Browse files
committed
return shaping
1 parent 825b372 commit 8ef5ded

File tree

2 files changed

+41
-14
lines changed

2 files changed

+41
-14
lines changed

mjpc/planners/sample_gradient/planner.cc

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -431,25 +431,51 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
431431
// -- compute approximate gradient -- //
432432
// average return
433433
int num_noisy = num_trajectory - num_gradient;
434-
double avg_return = 0.0;
435-
for (int i = 0; i < num_noisy; i++) {
436-
avg_return += trajectory[i].total_return;
434+
435+
// fitness shaping
436+
// https://www.jmlr.org/papers/volume15/wierstra14a/wierstra14a.pdf
437+
if (return_weight_.size() != num_noisy) {
438+
// resize number of weights
439+
return_weight_.resize(num_noisy);
440+
441+
// -- sort noisy samples only (exclude gradient samples) -- //
442+
// initial order for partial sort
443+
for (int i = 0; i < num_noisy; i++) {
444+
trajectory_order[i] = i;
445+
}
446+
447+
// sort lowest to highest total return
448+
std::partial_sort(
449+
trajectory_order.begin(), trajectory_order.begin() + num_noisy,
450+
trajectory_order.begin() + num_noisy,
451+
[&trajectory = trajectory](int a, int b) {
452+
return trajectory[a].total_return < trajectory[b].total_return;
453+
});
454+
455+
// compute normalization
456+
double f0 = std::log(0.5 * num_noisy + 1.0);
457+
double den = 0.0;
458+
for (int i = 0; i < num_noisy; i++) {
459+
den += std::max(0.0, f0 - std::log(trajectory_order[i] + 1));
460+
}
461+
462+
// compute weights
463+
for (int i = 0; i < num_noisy; i++) {
464+
return_weight_[i] =
465+
std::max(0.0, f0 - std::log(trajectory_order[i] + 1)) / den -
466+
1.0 / num_noisy;
467+
}
437468
}
438-
avg_return /= num_noisy;
439469

440470
// gradient
441471
std::fill(gradient.begin(), gradient.end(), 0.0);
442-
for (int i = 1; i < num_noisy; i++) {
443-
double* noisei = noise.data() + i * (model->nu * kMaxTrajectoryHorizon);
444-
mju_addToScl(gradient.data(), noisei,
445-
(trajectory[i].total_return - avg_return) / (num_noisy - 1),
472+
for (int i = 0; i < num_noisy; i++) {
473+
double* noisei = noise.data() +
474+
trajectory_order[i] * (model->nu * kMaxTrajectoryHorizon);
475+
mju_addToScl(gradient.data(), noisei, return_weight_[i] / num_noisy,
446476
num_parameters);
447477
}
448478

449-
// normalize gradient
450-
// TODO(taylor): should we normalize?
451-
// mju_normalize(gradient.data(), num_parameters);
452-
453479
// compute step sizes along gradient
454480
std::vector<double> step_size(num_gradient);
455481
LogScale(step_size.data(), gradient_max_step_size, gradient_min_step_size,
@@ -466,8 +492,6 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
466492
candidate_policy[i].representation = resampled_policy.representation;
467493

468494
// scaling
469-
// TODO(taylor): scale by num_parameters?
470-
// TODO(taylor): divide by reward std dev?
471495
double scaling = step_size[i - num_noisy] / noise_exploration;
472496

473497
// gradient step

mjpc/planners/sample_gradient/planner.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ class SampleGradientPlanner : public Planner {
155155
double gradient_max_step_size = 2.0;
156156
double gradient_min_step_size = 1.0e-3;
157157

158+
// return weight
159+
std::vector<double> return_weight_;
160+
158161
// nominal index
159162
const int idx_nominal = 0;
160163

0 commit comments

Comments
 (0)