[BC] weighted_bc_trainer_lib.py bug fixes (#445)

tvmarino · web-flow · commit 547903f7bc19 · 2025-02-19T21:31:31.000-05:00
Fixed ```metric.reset_states() -&gt; metric.reset_state()```, removed a
debugging line in ```weighted_bc_trainer_lib```, slight naming changes
for ```generate_bc_trajectories```.
diff --git a/compiler_opt/rl/imitation_learning/weighted_bc_trainer_lib.py b/compiler_opt/rl/imitation_learning/weighted_bc_trainer_lib.py
@@ -392,7 +392,7 @@ def train(self, filepaths: list[str]):
     for epoch in range(self._epochs):
       logging.info('Epoch %s', epoch)
       for metric in self._metrics:
-        metric.reset_states()
+        metric.reset_state()
       for step, (x_batch_train, y_batch_train) in enumerate(dataset):
         weight_labels = [y_batch_train[:, 1]]
         weights_arr = [self._trainig_weights.get_weights()]
@@ -411,8 +411,6 @@ def train(self, filepaths: list[str]):
                          (step + 1) * self._batch_size)
             for metric in self._metrics:
               logging.info('%s: %s', metric.name, metric.result())
-        if step > 1000:  # debugging
-          break
 
     if self._save_model_dir:
       keras.models.save_model(self._model,
diff --git a/compiler_opt/rl/inlining/imitation_learning_config.py b/compiler_opt/rl/inlining/imitation_learning_config.py
@@ -81,7 +81,7 @@ def get_task_type() -> type[env.InliningForSizeTask]:
 
 
 @gin.register
-def greedy_policy(state: time_step.TimeStep):
+def default_policy(state: time_step.TimeStep):
   """Greedy policy playing the inlining_default action."""
   return np.array(state.observation['inlining_default'])
 
diff --git a/compiler_opt/rl/inlining/imitation_learning_runner.py b/compiler_opt/rl/inlining/imitation_learning_runner.py
@@ -37,7 +37,9 @@ def main(_):
   logging.info(gin.config_str())
 
   generate_bc_trajectories_lib.gen_trajectories(
-      callable_policies=[imitation_learning_config.greedy_policy],
+      # Set callable policies here directly since callables can not be
+      # gin configured when they are pickled.
+      callable_policies=[imitation_learning_config.default_policy],
       explore_on_features={
           'is_callee_avail_external':
               imitation_learning_config.explore_on_avail_external