Fix metric logging (#72)

Northbadge · web-flow · commit e5f5ddd88a1b · 2022-08-01T12:48:54.000-07:00
- Decouples the summary intervals, so one is not reliant on the other

- Switches to modulo instead of remembering the last step number, this is more robust, especially if training is restarted at some step which is not a multiple of the set intervals (which previously would cause no data to be written)
diff --git a/compiler_opt/rl/trainer.py b/compiler_opt/rl/trainer.py
@@ -114,9 +114,6 @@ def __init__(
     self._checkpointer.initialize_or_restore()
 
     self._start_time = time.time()
-    self._last_checkpoint_step = 0
-    self._last_log_step = 0
-    self._summary_last_log_step = 0
 
   def _initialize_metrics(self):
     """Initializes metrics."""
@@ -126,8 +123,7 @@ def _initialize_metrics(self):
 
   def _update_metrics(self, experience, monitor_dict):
     """Updates metrics and exports to Tensorboard."""
-    if (self._global_step.numpy() >=
-        self._summary_last_log_step + self._summary_log_interval):
+    if tf.math.equal(self._global_step % self._summary_log_interval, 0):
       is_action = ~experience.is_boundary()
 
       self._data_action_mean.update_state(
@@ -136,6 +132,10 @@ def _update_metrics(self, experience, monitor_dict):
           experience.reward, sample_weight=is_action)
       self._num_trajectories.update_state(experience.is_first())
 
+    # Check earlier rather than later if we should record summaries.
+    # TF also checks it, but much later. Needed to avoid looping through
+    # the dict so gave the if a bigger scope
+    if tf.summary.should_record_summaries():
       with tf.name_scope('default/'):
         tf.summary.scalar(
             name='data_action_mean',
@@ -158,28 +158,23 @@ def _update_metrics(self, experience, monitor_dict):
       tf.summary.histogram(
           name='reward', data=experience.reward, step=self._global_step)
 
-      self._summary_last_log_step = self._global_step.numpy()
-
   def _reset_metrics(self):
     """Reset num_trajectories."""
     self._num_trajectories.reset_states()
 
   def _log_experiment(self, loss):
     """Log training info."""
-    global_step_val = self._global_step.numpy()
-    if global_step_val - self._last_log_step >= self._log_interval:
+    if tf.math.equal(self._global_step % self._log_interval, 0):
+      global_step_val = self._global_step.numpy()
       logging.info('step = %d, loss = %g', global_step_val, loss)
       time_acc = time.time() - self._start_time
-      steps_per_sec = (global_step_val - self._last_log_step) / time_acc
+      steps_per_sec = self._log_interval / time_acc
       logging.info('%.3f steps/sec', steps_per_sec)
-      self._last_log_step = global_step_val
       self._start_time = time.time()
 
   def _save_checkpoint(self):
-    if (self._global_step.numpy() - self._last_checkpoint_step >=
-        self._checkpoint_interval):
+    if tf.math.equal(self._global_step % self._checkpoint_interval, 0):
       self._checkpointer.save(global_step=self._global_step)
-      self._last_checkpoint_step = self._global_step.numpy()
 
   def global_step_numpy(self):
     return self._global_step.numpy()
diff --git a/compiler_opt/rl/trainer_test.py b/compiler_opt/rl/trainer_test.py
@@ -92,20 +92,24 @@ def test_training(self):
         tf.compat.v1.train.AdamOptimizer(),
         num_outer_dims=2)
     test_trainer = trainer.Trainer(
-        root_dir=self.get_temp_dir(), agent=test_agent, summary_log_interval=1)
+        root_dir=self.get_temp_dir(),
+        agent=test_agent,
+        summary_log_interval=1,
+        summary_export_interval=10)
     self.assertEqual(0, test_trainer._global_step.numpy())
 
     dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
     monitor_dict = {'default': {'test': 1}}
 
     with mock.patch.object(
         tf.summary, 'scalar', autospec=True) as mock_scalar_summary:
-      test_trainer.train(dataset_iter, monitor_dict, num_iterations=10)
+      test_trainer.train(dataset_iter, monitor_dict, num_iterations=100)
       self.assertEqual(
           10,
           sum(1 for c in mock_scalar_summary.mock_calls
               if c[2]['name'] == 'test'))
-      self.assertEqual(10, test_trainer._global_step.numpy())
+      self.assertEqual(100, test_trainer._global_step.numpy())
+      self.assertEqual(100, test_trainer.global_step_numpy())
 
   def test_training_with_multiple_times(self):
     test_agent = behavioral_cloning_agent.BehavioralCloningAgent(