@@ -431,25 +431,51 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
431431 // -- compute approximate gradient -- //
432432 // average return
433433 int num_noisy = num_trajectory - num_gradient;
434- double avg_return = 0.0 ;
435- for (int i = 0 ; i < num_noisy; i++) {
436- avg_return += trajectory[i].total_return ;
434+
435+ // fitness shaping
436+ // https://www.jmlr.org/papers/volume15/wierstra14a/wierstra14a.pdf
437+ if (return_weight_.size () != num_noisy) {
438+ // resize number of weights
439+ return_weight_.resize (num_noisy);
440+
441+ // -- sort noisy samples only (exclude gradient samples) -- //
442+ // initial order for partial sort
443+ for (int i = 0 ; i < num_noisy; i++) {
444+ trajectory_order[i] = i;
445+ }
446+
447+ // sort lowest to highest total return
448+ std::partial_sort (
449+ trajectory_order.begin (), trajectory_order.begin () + num_noisy,
450+ trajectory_order.begin () + num_noisy,
451+ [&trajectory = trajectory](int a, int b) {
452+ return trajectory[a].total_return < trajectory[b].total_return ;
453+ });
454+
455+ // compute normalization
456+ double f0 = std::log (0.5 * num_noisy + 1.0 );
457+ double den = 0.0 ;
458+ for (int i = 0 ; i < num_noisy; i++) {
459+ den += std::max (0.0 , f0 - std::log (trajectory_order[i] + 1 ));
460+ }
461+
462+ // compute weights
463+ for (int i = 0 ; i < num_noisy; i++) {
464+ return_weight_[i] =
465+ std::max (0.0 , f0 - std::log (trajectory_order[i] + 1 )) / den -
466+ 1.0 / num_noisy;
467+ }
437468 }
438- avg_return /= num_noisy;
439469
440470 // gradient
441471 std::fill (gradient.begin (), gradient.end (), 0.0 );
442- for (int i = 1 ; i < num_noisy; i++) {
443- double * noisei = noise.data () + i * (model-> nu * kMaxTrajectoryHorizon );
444- mju_addToScl (gradient. data (), noisei,
445- (trajectory [i]. total_return - avg_return) / ( num_noisy - 1 ) ,
472+ for (int i = 0 ; i < num_noisy; i++) {
473+ double * noisei = noise.data () +
474+ trajectory_order[i] * (model-> nu * kMaxTrajectoryHorizon );
475+ mju_addToScl (gradient. data (), noisei, return_weight_ [i] / num_noisy,
446476 num_parameters);
447477 }
448478
449- // normalize gradient
450- // TODO(taylor): should we normalize?
451- // mju_normalize(gradient.data(), num_parameters);
452-
453479 // compute step sizes along gradient
454480 std::vector<double > step_size (num_gradient);
455481 LogScale (step_size.data (), gradient_max_step_size, gradient_min_step_size,
@@ -466,8 +492,6 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
466492 candidate_policy[i].representation = resampled_policy.representation ;
467493
468494 // scaling
469- // TODO(taylor): scale by num_parameters?
470- // TODO(taylor): divide by reward std dev?
471495 double scaling = step_size[i - num_noisy] / noise_exploration;
472496
473497 // gradient step
0 commit comments