fix LogScale, adjust sample gradient planner step size

thowell · thowell · commit 7fdb52bc96b7 · 2024-02-02T15:49:09.000-07:00
diff --git a/mjpc/planners/sample_gradient/planner.cc b/mjpc/planners/sample_gradient/planner.cc
@@ -476,10 +476,12 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
                  num_parameters);
   }
 
-  // compute step sizes along gradient
-  std::vector<double> step_size(num_gradient);
-  LogScale(step_size.data(), gradient_max_step_size, gradient_min_step_size,
-           num_gradient);
+  // compute step sizes for gradient direction
+  if (step_size_.size() != num_gradient) {
+    step_size_.resize(num_gradient);
+    LogScale(step_size_.data(), gradient_max_step_size, gradient_min_step_size,
+             num_gradient);
+  }
 
   // gradient filter gf * grad + (1 - gf) * grad_prev
   double gradient_filter = gradient_filter_;
@@ -492,7 +494,7 @@ void SampleGradientPlanner::GradientCandidates(int num_trajectory,
     candidate_policy[i].representation = resampled_policy.representation;
 
     // scaling
-    double scaling = step_size[i - num_noisy] / noise_exploration;
+    double scaling = step_size_[i - num_noisy] / noise_exploration;
 
     // gradient step
     mju_addToScl(candidate_policy[i].parameters.data(), gradient.data(),
@@ -620,17 +622,23 @@ void SampleGradientPlanner::Plots(mjvFigure* fig_planner, mjvFigure* fig_timer,
                        mju_log10(mju_max(improvement, 1.0e-6)), 100,
                        0 + planner_shift, 0, 1, -100);
 
-  // winner type
-  double winner_type =
-      winner_type_ == kPerturb ? -6.0 : (winner_type_ == kGradient ? 6.0 : 0.0);
+  // winner plot value
+  double winner_plot_val = -6.0;  // nominal
+  if (winner_type_ == kPerturb) {
+    winner_plot_val = 0.0;
+  } else if (winner_type_ == kGradient) {
+    int num_noisy = num_trajectory_ - num_gradient_;
+    winner_plot_val = 6.0 * (winner - num_noisy) / num_gradient_;
+  }
+
   mjpc::PlotUpdateData(fig_planner, planner_bounds,
                        fig_planner->linedata[1 + planner_shift][0] + 1,
-                       winner_type, 100, 1 + planner_shift, 0, 1, -100);
+                       winner_plot_val, 100, 1 + planner_shift, 0, 1, -100);
 
   // legend
   mju::strcpy_arr(fig_planner->linename[0 + planner_shift], "Improvement");
   mju::strcpy_arr(fig_planner->linename[1 + planner_shift],
-                  "Perturb|Nominal|Gradient");
+                  "Nominal|Perturb|Gradient");
 
   fig_planner->range[1][0] = planner_bounds[0];
   fig_planner->range[1][1] = planner_bounds[1];
diff --git a/mjpc/planners/sample_gradient/planner.h b/mjpc/planners/sample_gradient/planner.h
@@ -152,6 +152,9 @@ class SampleGradientPlanner : public Planner {
   std::vector<double> gradient;
   std::vector<double> gradient_previous;
   double gradient_filter_ = 1.0;
+
+  // gradient step size
+  std::vector<double> step_size_;
   double gradient_max_step_size = 2.0;
   double gradient_min_step_size = 1.0e-3;
 
diff --git a/mjpc/utilities.cc b/mjpc/utilities.cc
@@ -839,9 +839,9 @@ bool CheckWarnings(mjData* data) {
 // compute vector with log-based scaling between min and max values
 void LogScale(double* values, double max_value, double min_value, int steps) {
   double step =
-      mju_log(max_value) - mju_log(min_value) / mju_max((steps - 1), 1);
+      (std::log(max_value) - std::log(min_value)) / std::max((steps - 1), 1);
   for (int i = 0; i < steps; i++) {
-    values[i] = mju_exp(mju_log(min_value) + i * step);
+    values[i] = std::exp(std::log(min_value) + i * step);
   }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -839,9 +839,9 @@ bool CheckWarnings(mjData* data) {`
`839`	`839`	`// compute vector with log-based scaling between min and max values`
`840`	`840`	`void LogScale(double* values, double max_value, double min_value, int steps) {`
`841`	`841`	`double step =`
`842`		`- mju_log(max_value) - mju_log(min_value) / mju_max((steps - 1), 1);`
	`842`	`+ (std::log(max_value) - std::log(min_value)) / std::max((steps - 1), 1);`
`843`	`843`	`for (int i = 0; i < steps; i++) {`
`844`		`- values[i] = mju_exp(mju_log(min_value) + i * step);`
	`844`	`+ values[i] = std::exp(std::log(min_value) + i * step);`
`845`	`845`	`}`
`846`	`846`	`}`
`847`	`847`