Add a test for trainer metrics (#399)

boomanaiden154 · web-flow · commit fffde334b2ba · 2024-12-18T22:04:54.000-08:00
This patch adds a test for the metrics in the trainer class to ensure that they are actually set. This patch just directly inspects the state rather than trying to ensure that we log to tensorboard too, but this should be good enough, and is definitely better than what we had before (nothing).
diff --git a/compiler_opt/rl/trainer_test.py b/compiler_opt/rl/trainer_test.py
@@ -28,8 +28,10 @@
 
 
 def _create_test_data(batch_size, sequence_length):
+  # Use the value zero, which signals the beginning of a sequence, which
+  # allows us to test the num_trajectories metric.
   test_trajectory = trajectory.Trajectory(
-      step_type=tf.fill([batch_size, sequence_length], 1),
+      step_type=tf.fill([batch_size, sequence_length], 0),
       observation={
           'callee_users':
               tf.fill([batch_size, sequence_length],
@@ -131,6 +133,27 @@ def test_training_with_multiple_times(self):
     test_trainer.train(dataset_iter, monitor_dict, num_iterations=10)
     self.assertEqual(20, test_trainer._global_step.numpy())
 
+  def test_training_metrics(self):
+    test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
+        self._time_step_spec,
+        self._action_spec,
+        self._network,
+        tf.compat.v1.train.AdadeltaOptimizer(),
+        num_outer_dims=2)
+    test_trainer = trainer.Trainer(
+        root_dir=self.get_temp_dir(), agent=test_agent, summary_log_interval=1)
+    self.assertEqual(0, test_trainer._data_action_mean.result().numpy())
+    self.assertEqual(0, test_trainer._data_reward_mean.result().numpy())
+    self.assertEqual(0, test_trainer._num_trajectories.result().numpy())
+
+    dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
+    monitor_dict = {'default': {'test': 1}}
+    test_trainer.train(dataset_iter, monitor_dict, num_iterations=10)
+
+    self.assertEqual(1, test_trainer._data_action_mean.result().numpy())
+    self.assertEqual(2, test_trainer._data_reward_mean.result().numpy())
+    self.assertEqual(90, test_trainer._num_trajectories.result().numpy())
+
   def test_inference(self):
     test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
         self._time_step_spec,