Add option to save best model in ES (#476)

boomanaiden154 · web-flow · commit 4c590d6d3083 · 2025-03-20T20:27:22.000-07:00
This patch adds a config option to BlackboxLearnerConfig that enables
saving the best model when a new best comes up. This patch also adds in
support in BlackboxLearner to actually save the model to the save
directory.
diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
@@ -76,6 +76,9 @@ class BlackboxLearnerConfig:
   # Learning rate
   step_size: float
 
+  # Whether or not to save a policy if it has the greatest reward seen so far.
+  save_best_policy: bool
+
 
 def _prune_skipped_perturbations(perturbations: list[npt.NDArray[np.float32]],
                                  rewards: list[float | None]):
@@ -152,6 +155,7 @@ def __init__(self,
     self._step = initial_step
     self._deadline = deadline
     self._seed = seed
+    self._global_max_reward = 0.0
 
     self._summary_writer = tf.summary.create_file_writer(output_dir)
 
@@ -270,6 +274,18 @@ def run_step(self, pool: FixedWorkerPool) -> None:
     self._log_rewards(rewards)
     self._log_tf_summary(rewards)
 
+    if self._config.save_best_policy and np.max(
+        rewards) > self._global_max_reward:
+      self._global_max = np.max(rewards)
+      logging.info('Found new best model with reward %f at step '
+                   '%d, saving.', self._global_max, self._step)
+      max_index = np.argmax(rewards)
+      perturbation = initial_perturbations[max_index]
+      self._policy_saver_fn(
+          parameters=self._model_weights + perturbation,
+          policy_name=f'best_policy_{self._global_max}_step_{self._step}',
+      )
+
     self._save_model()
 
     self._step += 1
diff --git a/compiler_opt/es/blackbox_learner_test.py b/compiler_opt/es/blackbox_learner_test.py
@@ -65,7 +65,8 @@ def setUp(self):
         evaluator=blackbox_evaluator.SamplingBlackboxEvaluator,
         total_num_perturbations=3,
         precision_parameter=1,
-        step_size=1.0)
+        step_size=1.0,
+        save_best_policy=False)
 
     self._cps = corpus.create_corpus_for_testing(
         location=tempfile.gettempdir(),