[BC] Trainer fixes (google#444)

tvmarino · web-flow · commit 935f5fab2bfa · 2025-02-18T15:09:26.000-05:00
Changes to weighted_bc_trainer_lib and weighted_bc_trainer to force only
```_train_step``` to execute in Graph mode. Fix a possible division by 0
in weights creation. Makes sure that only ```_train_step``` executes in Graph mode.
diff --git a/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py b/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py
@@ -825,7 +825,7 @@ def select_best_exploration(
       loaded_module_spec: corpus.LoadedModuleSpec,
   ) -> tuple[tuple[int, ProfilingDictValueType, ProfilingDictValueType],
              tf.train.SequenceExample]:
-
+    logging.set_verbosity('info')
     num_calls = len(self._tf_policy_action)
     time_call_compiler = 0
     logging.info('Processing module: %s', loaded_module_spec.name)
diff --git a/compiler_opt/rl/imitation_learning/weighted_bc_trainer.py b/compiler_opt/rl/imitation_learning/weighted_bc_trainer.py
@@ -13,20 +13,20 @@
 # limitations under the License.
 """Module for training an inlining policy with imitation learning."""
 
-from absl import app
-from absl import flags
-from absl import logging
+import json
 
 import gin
-import json
-from compiler_opt.rl import policy_saver
+import tensorflow as tf
+from absl import app, flags, logging
 
+from compiler_opt.rl import policy_saver
+from compiler_opt.rl.imitation_learning.weighted_bc_trainer_lib import (
+    ImitationLearningTrainer,
+    TrainingWeights,
+    WrapKerasModel,
+)
 from compiler_opt.rl.inlining import imitation_learning_config as config
 
-from compiler_opt.rl.imitation_learning.weighted_bc_trainer_lib import TrainingWeights
-from compiler_opt.rl.imitation_learning.weighted_bc_trainer_lib import ImitationLearningTrainer
-from compiler_opt.rl.imitation_learning.weighted_bc_trainer_lib import WrapKerasModel
-
 _TRAINING_DATA = flags.DEFINE_multi_string(
     'training_data', None, 'Training data for one step of BC-Max')
 _PROFILING_DATA = flags.DEFINE_multi_string(
@@ -78,6 +78,8 @@ def main(_):
       _GIN_FILES.value, _GIN_BINDINGS.value, skip_unknown=False)
   logging.info(gin.config_str())
 
+  tf.compat.v1.enable_eager_execution()  # pytype: disable=module-attr
+
   train()
 
 
diff --git a/compiler_opt/rl/imitation_learning/weighted_bc_trainer_lib.py b/compiler_opt/rl/imitation_learning/weighted_bc_trainer_lib.py
@@ -177,8 +177,8 @@ def update_weights(
         bucket_loss += np.maximum(prof[SequenceExampleFeatureNames.regret], 0)
       losses_per_bucket.append(bucket_loss)
     logging.info('Losses per bucket: %s', losses_per_bucket)
-    losses_per_bucket_normalized = losses_per_bucket / np.max(
-        np.abs(losses_per_bucket))
+    losses_per_bucket_normalized = losses_per_bucket / (
+        np.max(np.abs(losses_per_bucket)) + 1e-6)
     probs_t = self._get_exp_gradient_step(losses_per_bucket_normalized, 1.0)
     self._round += 1
     self._probs = (self._probs * (self._round - 1) + probs_t) / self._round
@@ -228,6 +228,7 @@ def __init__(
       self._trainig_weights = TrainingWeights()
     self._features_to_remove = features_to_remove
     self._global_step = 0
+    self._is_model_init = False
 
     observation_spec, action_spec = config.get_inlining_signature_spec()
     sequence_features = {
@@ -322,13 +323,12 @@ def load_dataset(self, filepaths: list[str]) -> tf.data.TFRecordDataset:
                 self._make_feature_label, num_processors=self._num_processors))
     dataset = dataset.unbatch().shuffle(self._shuffle_size).batch(
         self._batch_size, drop_remainder=True)  # 4194304
-    dataset = dataset.apply(tf.data.experimental.ignore_errors())
 
     return dataset
 
   def _create_weights(self, labels, weights_arr):
-    p_norm = min(weights_arr)  # check that this should be min
-    weights_arr = tf.map_fn(lambda x: p_norm / x, tf.constant(weights_arr))
+    p_norm = tf.reduce_min(weights_arr)
+    weights_arr = tf.math.divide(p_norm, weights_arr)
     int_labels = tf.cast(labels, tf.int32)
     return tf.gather(weights_arr, int_labels)
 
@@ -365,6 +365,7 @@ def _update_metrics(self, y_true, y_pred, loss, weights):
           tf.summary.scalar(
               name=metric.name, data=metric.result(), step=self._global_step)
 
+  @tf.function
   def _train_step(self, example, label, weight_labels, weights_arr):
     y_true = label[:, 0]
     y_true = tf.reshape(y_true, [self._batch_size, 1])
@@ -381,10 +382,15 @@ def train(self, filepaths: list[str]):
     """Train the model for number of the specified number of epochs."""
     dataset = self.load_dataset(filepaths)
     logging.info('Datasets loaded from %s', str(filepaths))
-    input_shape = dataset.element_spec[0].shape[-1]
-    self._initialize_model(input_shape=input_shape)
-    self._initialize_metrics()
-    for _ in range(self._epochs):
+    input_shape = int(dataset.element_spec[0].shape[-1])
+    if not self._is_model_init:
+      self._initialize_model(input_shape=input_shape)
+      self._initialize_metrics()
+      self._is_model_init = True
+      self._global_step = 0
+    logging.info('Training started')
+    for epoch in range(self._epochs):
+      logging.info('Epoch %s', epoch)
       for metric in self._metrics:
         metric.reset_states()
       for step, (x_batch_train, y_batch_train) in enumerate(dataset):