Make calculate_reward publicly exposed in compilation_runner

boomanaiden154 · boomanaiden154 · commit 8e066405f3e4 · 2025-03-09T06:29:01.000Z
This patch switches _calculate_reward from being an internal member to
it being public in compilation_runner. This is necessary as we want to
reuse it in TraceBlackboxEvaluator.
diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py
@@ -43,7 +43,7 @@
     'Put temporary files into given directory and keep them past exit.')
 
 
-def _calculate_reward(policy: float, baseline: float) -> float:
+def calculate_reward(policy: float, baseline: float) -> float:
   # This assumption allows us to imply baseline + constant.DELTA > 0.
   assert baseline >= 0
   return 1 - (policy + constant.DELTA) / (baseline + constant.DELTA)
@@ -465,14 +465,14 @@ def collect_data(self,
       moving_average_reward = reward_stat[k].moving_average_reward
       sequence_example = _overwrite_trajectory_reward(
           sequence_example=sequence_example,
-          reward=_calculate_reward(
+          reward=calculate_reward(
               policy=policy_reward, baseline=moving_average_reward))
       sequence_example_list.append(sequence_example)
       reward_stat[k].moving_average_reward = (
           moving_average_reward * self._moving_average_decay_rate +
           policy_reward * (1 - self._moving_average_decay_rate))
       rewards.append(
-          _calculate_reward(policy=policy_reward, baseline=default_reward))
+          calculate_reward(policy=policy_reward, baseline=default_reward))
       policy_rewards.append(policy_reward)
       keys.append(k)
 
diff --git a/compiler_opt/rl/compilation_runner_test.py b/compiler_opt/rl/compilation_runner_test.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Tests for compiler_opt.rl.compilation_runner."""
 
+import math
 import os
 import string
 import subprocess
@@ -254,6 +255,14 @@ def stop_and_start():
     # should be at least 1 second due to the pause.
     self.assertGreater(time.time() - start_time, 1)
 
+  def test_calculate_reward_zero_delta(self):
+    reward = compilation_runner.calculate_reward(3, 0)
+    self.assertTrue(math.isfinite(reward))
+
+  def test_calculate_reward(self):
+    reward = compilation_runner.calculate_reward(1, 2)
+    self.assertAlmostEqual(reward, 0.5, 2)
+
 
 if __name__ == '__main__':
   tf.test.main()