Make the tensorboard data interval adjustable (google#62)

boomanaiden154 · web-flow · commit ac094ca6ca99 · 2022-07-18T19:23:06.000-07:00
Currently, all of the tensorboard variables get updated every
single iteration. This is quite costly in terms of performance. I'm
seeing about a 25% performance improvement by just running this once
every 100 iterations instead of running it every iteration. The data
showing up in tensorboard will definitely be more sparse, but it should
be good enough to see the overall trends that we're looking for.
diff --git a/compiler_opt/rl/trainer.py b/compiler_opt/rl/trainer.py
@@ -50,7 +50,8 @@ def __init__(
       # Params for summaries and logging
       checkpoint_interval=10000,
       log_interval=100,
-      summary_interval=1000,
+      summary_log_interval=100,
+      summary_export_interval=1000,
       summaries_flush_secs=10):
     """Initialize the Trainer object.
 
@@ -62,16 +63,19 @@ def __init__(
       checkpoint_interval: int, the training step interval for saving
         checkpoint.
       log_interval: int, the training step interval for logging.
-      summary_interval: int, the training step interval for exporting to
-        tensorboard.
+      summary_log_interval: the number of steps in between logging metrics
+        to tensorboard.
+      summary_export_interval: int, the training step interval for exporting
+        to tensorboard.
       summaries_flush_secs: int, the seconds for flushing to tensorboard.
     """
     self._root_dir = root_dir
     self._agent = agent
     self._random_network_distillation = random_network_distillation
     self._checkpoint_interval = checkpoint_interval
     self._log_interval = log_interval
-    self._summary_interval = summary_interval
+    self._summary_log_interval = summary_log_interval
+    self._summary_export_interval = summary_export_interval
 
     self._summary_writer = tf.summary.create_file_writer(
         self._root_dir, flush_millis=summaries_flush_secs * 1000)
@@ -108,6 +112,7 @@ def __init__(
     self._start_time = time.time()
     self._last_checkpoint_step = 0
     self._last_log_step = 0
+    self._summary_last_log_step = 0
 
   def _initialize_metrics(self):
     """Initializes metrics."""
@@ -117,35 +122,39 @@ def _initialize_metrics(self):
 
   def _update_metrics(self, experience, monitor_dict):
     """Updates metrics and exports to Tensorboard."""
-    is_action = ~experience.is_boundary()
-
-    self._data_action_mean.update_state(
-        experience.action, sample_weight=is_action)
-    self._data_reward_mean.update_state(
-        experience.reward, sample_weight=is_action)
-    self._num_trajectories.update_state(experience.is_first())
-
-    with tf.name_scope('default/'):
-      tf.summary.scalar(
-          name='data_action_mean',
-          data=self._data_action_mean.result(),
-          step=self._global_step)
-      tf.summary.scalar(
-          name='data_reward_mean',
-          data=self._data_reward_mean.result(),
-          step=self._global_step)
-      tf.summary.scalar(
-          name='num_trajectories',
-          data=self._num_trajectories.result(),
-          step=self._global_step)
-
-    for name_scope, d in monitor_dict.items():
-      with tf.name_scope(name_scope + '/'):
-        for key, value in d.items():
-          tf.summary.scalar(name=key, data=value, step=self._global_step)
-
-    tf.summary.histogram(
-        name='reward', data=experience.reward, step=self._global_step)
+    if (self._global_step.numpy() >=
+        self._summary_last_log_step + self._summary_log_interval):
+      is_action = ~experience.is_boundary()
+
+      self._data_action_mean.update_state(
+          experience.action, sample_weight=is_action)
+      self._data_reward_mean.update_state(
+          experience.reward, sample_weight=is_action)
+      self._num_trajectories.update_state(experience.is_first())
+
+      with tf.name_scope('default/'):
+        tf.summary.scalar(
+            name='data_action_mean',
+            data=self._data_action_mean.result(),
+            step=self._global_step)
+        tf.summary.scalar(
+            name='data_reward_mean',
+            data=self._data_reward_mean.result(),
+            step=self._global_step)
+        tf.summary.scalar(
+            name='num_trajectories',
+            data=self._num_trajectories.result(),
+            step=self._global_step)
+
+      for name_scope, d in monitor_dict.items():
+        with tf.name_scope(name_scope + '/'):
+          for key, value in d.items():
+            tf.summary.scalar(name=key, data=value, step=self._global_step)
+
+      tf.summary.histogram(
+          name='reward', data=experience.reward, step=self._global_step)
+
+      self._summary_last_log_step = self._global_step.numpy()
 
   def _reset_metrics(self):
     """Reset num_trajectories."""
@@ -176,8 +185,8 @@ def train(self, dataset_iter, monitor_dict, num_iterations):
     self._reset_metrics()
     # context management is implemented in decorator
     # pylint: disable=not-context-manager
-    with tf.summary.record_if(
-        lambda: tf.math.equal(self._global_step % self._summary_interval, 0)):
+    with tf.summary.record_if(lambda: tf.math.equal(
+        self._global_step % self._summary_export_interval, 0)):
       for _ in range(num_iterations):
         # When the data is not enough to fill in a batch, next(dataset_iter)
         # will throw StopIteration exception, logging a warning message instead
diff --git a/compiler_opt/rl/trainer_test.py b/compiler_opt/rl/trainer_test.py
@@ -92,7 +92,7 @@ def test_training(self):
         tf.compat.v1.train.AdamOptimizer(),
         num_outer_dims=2)
     test_trainer = trainer.Trainer(
-        root_dir=self.get_temp_dir(), agent=test_agent)
+        root_dir=self.get_temp_dir(), agent=test_agent, summary_log_interval=1)
     self.assertEqual(0, test_trainer._global_step.numpy())
 
     dataset_iter = _create_test_data(batch_size=3, sequence_length=3)