Move TFLite Policy Generation to ES Workers

boomanaiden154 · web-flow · commit a7e3fdf5d9e6 · 2025-03-03T21:55:33.000-08:00
This patch moves TFLite Policy Generation for ES to the workers. This step is reasonably expensive and doing it serially on the main node takes quite some time with a reasonable number of perturbations. This patch moves this step to the workers to parallelize it. Reviewers: mtrofin Reviewed By: mtrofin Pull Request: #448
diff --git a/compiler_opt/es/blackbox_evaluator.py b/compiler_opt/es/blackbox_evaluator.py
@@ -23,7 +23,6 @@
 from compiler_opt.rl import corpus
 from compiler_opt.es import blackbox_optimizers
 from compiler_opt.distributed import buffered_scheduler
-from compiler_opt.rl import policy_saver
 
 
 class BlackboxEvaluator(metaclass=abc.ABCMeta):
@@ -35,8 +34,8 @@ def __init__(self, train_corpus: corpus.Corpus):
 
   @abc.abstractmethod
   def get_results(
-      self, pool: FixedWorkerPool, perturbations: list[policy_saver.Policy]
-  ) -> list[concurrent.futures.Future]:
+      self, pool: FixedWorkerPool,
+      perturbations: list[bytes]) -> list[concurrent.futures.Future]:
     raise NotImplementedError()
 
   @abc.abstractmethod
@@ -73,8 +72,8 @@ def __init__(self, train_corpus: corpus.Corpus,
     super().__init__(train_corpus)
 
   def get_results(
-      self, pool: FixedWorkerPool, perturbations: list[policy_saver.Policy]
-  ) -> list[concurrent.futures.Future]:
+      self, pool: FixedWorkerPool,
+      perturbations: list[bytes]) -> list[concurrent.futures.Future]:
     if not self._samples:
       for _ in range(self._total_num_perturbations):
         sample = self._train_corpus.sample(self._num_ir_repeats_within_worker)
@@ -120,13 +119,13 @@ def __init__(self, train_corpus: corpus.Corpus,
     self._baseline: float | None = None
 
   def get_results(
-      self, pool: FixedWorkerPool, perturbations: list[policy_saver.Policy]
-  ) -> list[concurrent.futures.Future]:
+      self, pool: FixedWorkerPool,
+      perturbations: list[bytes]) -> list[concurrent.futures.Future]:
     job_args = [{
         'modules': self._train_corpus.module_specs,
         'function_index_path': self._function_index_path,
         'bb_trace_path': self._bb_trace_path,
-        'tflite_policy': perturbation
+        'policy_as_bytes': perturbation,
     } for perturbation in perturbations]
 
     _, futures = buffered_scheduler.schedule_on_worker_pool(
@@ -145,7 +144,7 @@ def set_baseline(self, pool: FixedWorkerPool) -> None:
         'modules': self._train_corpus.module_specs,
         'function_index_path': self._function_index_path,
         'bb_trace_path': self._bb_trace_path,
-        'tflite_policy': None,
+        'policy_as_bytes': None,
     }]
 
     _, futures = buffered_scheduler.schedule_on_worker_pool(
diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
@@ -13,22 +13,18 @@
 # limitations under the License.
 """Class for coordinating blackbox optimization."""
 
-import os
 from absl import logging
 import dataclasses
 import gin
 import math
 import numpy as np
 import numpy.typing as npt
-import tempfile
 import tensorflow as tf
 from typing import Protocol
 
 from compiler_opt.distributed.worker import FixedWorkerPool
 from compiler_opt.es import blackbox_optimizers
-from compiler_opt.es import policy_utils
 from compiler_opt.rl import corpus
-from compiler_opt.rl import policy_saver
 from compiler_opt.es import blackbox_evaluator  # pylint: disable=unused-import
 
 # Pytype cannot pick up the pyi file for tensorflow.summary. Disable the error
@@ -128,7 +124,6 @@ class BlackboxLearner:
   def __init__(self,
                blackbox_opt: blackbox_optimizers.BlackboxOptimizer,
                train_corpus: corpus.Corpus,
-               tf_policy_path: str,
                output_dir: str,
                policy_saver_fn: PolicySaverCallableType,
                model_weights: npt.NDArray[np.float32],
@@ -141,7 +136,6 @@ def __init__(self,
     Args:
       blackbox_opt: the blackbox optimizer to use
       train_corpus: the training corpus to utiilize
-      tf_policy_path: where to write the tf policy
       output_dir: the directory to write all outputs
       policy_saver_fn: function to save a policy to cns
       model_weights: the weights of the current model
@@ -151,7 +145,6 @@ def __init__(self,
     """
     self._blackbox_opt = blackbox_opt
     self._train_corpus = train_corpus
-    self._tf_policy_path = tf_policy_path
     self._output_dir = output_dir
     self._policy_saver_fn = policy_saver_fn
     self._model_weights = model_weights
@@ -237,29 +230,6 @@ def _save_model(self) -> None:
   def get_model_weights(self) -> npt.NDArray[np.float32]:
     return self._model_weights
 
-  # TODO: The current conversion is inefficient (performance-wise). We should
-  # consider doing this on the worker side.
-  def _get_policy_from_perturbation(
-      self, perturbation: npt.NDArray[np.float32]) -> policy_saver.Policy:
-    sm = tf.saved_model.load(self._tf_policy_path)
-    # devectorize the perturbation
-    policy_utils.set_vectorized_parameters_for_policy(sm, perturbation)
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-      sm_dir = os.path.join(tmpdir, 'sm')
-      tf.saved_model.save(sm, sm_dir, signatures=sm.signatures)
-      src = os.path.join(self._tf_policy_path, policy_saver.OUTPUT_SIGNATURE)
-      dst = os.path.join(sm_dir, policy_saver.OUTPUT_SIGNATURE)
-      tf.io.gfile.copy(src, dst)
-
-      # convert to tflite
-      tfl_dir = os.path.join(tmpdir, 'tfl')
-      policy_saver.convert_mlgo_model(sm_dir, tfl_dir)
-
-      # create and return policy
-      policy_obj = policy_saver.Policy.from_filesystem(tfl_dir)
-      return policy_obj
-
   def run_step(self, pool: FixedWorkerPool) -> None:
     """Run a single step of blackbox learning.
     This does not instantaneously return due to several I/O
@@ -275,12 +245,16 @@ def run_step(self, pool: FixedWorkerPool) -> None:
           p for p in initial_perturbations for p in (p, -p)
       ]
 
-    perturbations_as_policies = [
-        self._get_policy_from_perturbation(perturbation)
+    # TODO(boomanaiden154): This should be adding the perturbation to
+    # the existing model weights. That currently results in the model
+    # weights all being NaN, presumably due to rewards not being scaled for
+    # the regalloc_trace problem.
+    perturbations_as_bytes = [
+        perturbation.astype(np.float32).tobytes()
         for perturbation in initial_perturbations
     ]
 
-    results = self._evaluator.get_results(pool, perturbations_as_policies)
+    results = self._evaluator.get_results(pool, perturbations_as_bytes)
     rewards = self._evaluator.get_rewards(results)
 
     num_pruned = _prune_skipped_perturbations(initial_perturbations, rewards)
diff --git a/compiler_opt/es/blackbox_learner_test.py b/compiler_opt/es/blackbox_learner_test.py
@@ -24,6 +24,10 @@
 from tf_agents.networks import actor_distribution_network
 from tf_agents.policies import actor_policy
 
+# Pytype cannot pick up the pyi file for tensorflow.summary. Disable the error
+# here as these errors are false positives.
+# pytype: disable=pyi-error
+
 from compiler_opt.distributed.local import local_worker_manager
 from compiler_opt.es import blackbox_learner
 from compiler_opt.es import policy_utils
@@ -124,7 +128,6 @@ def _policy_saver_fn(parameters: npt.NDArray[np.float32],
             extra_params=None,
             step_size=1),
         train_corpus=self._cps,
-        tf_policy_path=os.path.join(policy_save_path, policy_name),
         output_dir=output_dir,
         policy_saver_fn=_policy_saver_fn,
         model_weights=init_params,
diff --git a/compiler_opt/es/blackbox_test_utils.py b/compiler_opt/es/blackbox_test_utils.py
@@ -54,10 +54,10 @@ def __init__(self, arg, *, kwarg):
     del kwarg  # Unused.
     self._function_value = 0.0
 
-  def compile_corpus_and_evaluate(
-      self, modules: Collection[corpus.ModuleSpec], function_index_path: str,
-      bb_trace_path: str, tflite_policy: policy_saver.Policy | None) -> float:
-    if modules and function_index_path and bb_trace_path and tflite_policy:
+  def compile_corpus_and_evaluate(self, modules: Collection[corpus.ModuleSpec],
+                                  function_index_path: str, bb_trace_path: str,
+                                  policy_as_bytes: bytes | None) -> float:
+    if modules and function_index_path and bb_trace_path and policy_as_bytes:
       self._function_value += 1
       return self._function_value
     else:
diff --git a/compiler_opt/es/es_trainer_lib.py b/compiler_opt/es/es_trainer_lib.py
@@ -20,16 +20,17 @@
 import tensorflow as tf
 import os
 
+# Pytype cannot pick up the pyi file for tensorflow.summary. Disable the error
+# here as these errors are false positives.
+# pytype: disable=pyi-error
+
 from compiler_opt.distributed.local import local_worker_manager
 from compiler_opt.es import blackbox_optimizers
 from compiler_opt.es import gradient_ascent_optimization_algorithms
 from compiler_opt.es import blackbox_learner
 from compiler_opt.es import policy_utils
-from compiler_opt.rl import policy_saver
 from compiler_opt.rl import corpus
 
-POLICY_NAME = "policy"
-
 FLAGS = flags.FLAGS
 
 _GRAD_REG_ALPHA = flags.DEFINE_float(
@@ -79,11 +80,6 @@ def train(additional_compilation_flags=(),
 
   # Construct the policy and upload it
   policy = policy_utils.create_actor_policy()
-  saver = policy_saver.PolicySaver({POLICY_NAME: policy})
-
-  # Save the policy
-  policy_save_path = os.path.join(_OUTPUT_PATH.value, "policy")
-  saver.save(policy_save_path)
 
   # Get initial parameter
   if not _PRETRAINED_POLICY_PATH.value:
@@ -201,7 +197,6 @@ def train(additional_compilation_flags=(),
   learner = blackbox_learner.BlackboxLearner(
       blackbox_opt=blackbox_optimizer,
       train_corpus=cps,
-      tf_policy_path=os.path.join(policy_save_path, POLICY_NAME),
       output_dir=_OUTPUT_PATH.value,
       policy_saver_fn=policy_saver_function,
       model_weights=init_current_input,
@@ -216,8 +211,10 @@ def train(additional_compilation_flags=(),
   logging.info("Ready to train: running for %d steps.",
                learner_config.total_steps)
 
-  with worker_manager_class(worker_class,
-                            learner_config.total_num_perturbations) as pool:
+  with worker_manager_class(
+      worker_class,
+      learner_config.total_num_perturbations,
+      worker_kwargs=dict(gin_config=gin.operative_config_str())) as pool:
     for _ in range(learner_config.total_steps):
       learner.run_step(pool)
 
diff --git a/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py b/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py
@@ -26,12 +26,14 @@
 import json
 import concurrent.futures
 import tempfile
+import shutil
 
 import gin
 
 from compiler_opt.rl import corpus
 from compiler_opt.distributed import worker
 from compiler_opt.rl import policy_saver
+from compiler_opt.es import policy_utils
 
 
 @gin.configurable
@@ -44,8 +46,16 @@ class RegallocTraceWorker(worker.Worker):
   segments.
   """
 
-  def __init__(self, clang_path: str, basic_block_trace_model_path: str,
-               thread_count: int, corpus_path: str):
+  def _setup_base_policy(self):
+    self._tf_base_temp_dir = tempfile.mkdtemp()
+    policy = policy_utils.create_actor_policy()
+    saver = policy_saver.PolicySaver({"policy": policy})
+    saver.save(self._tf_base_temp_dir)
+    self._tf_base_policy_path = os.path.join(self._tf_base_temp_dir, "policy")
+
+  def __init__(self, *, gin_config: str, clang_path: str,
+               basic_block_trace_model_path: str, thread_count: int,
+               corpus_path: str):
     """Initializes the RegallocTraceWorker class.
 
     Args:
@@ -64,6 +74,16 @@ def __init__(self, clang_path: str, basic_block_trace_model_path: str,
     self._thread_count = thread_count
     self._corpus_path = corpus_path
 
+    gin.parse_config(gin_config)
+    self._setup_base_policy()
+
+  # Deletion here is best effort as it occurs at GC time. If the shutdown is
+  # forced, cleanup might not happen as expected. This does not matter too
+  # much though as resource leakage will be small, and any cloud setups will
+  # have tempdirs wiped periodically.
+  def __del__(self):
+    shutil.rmtree(self._tf_base_temp_dir)
+
   def _compile_module(self, module_to_compile: corpus.ModuleSpec,
                       output_directory: str, tflite_policy_path: str | None):
     command_vector = [self._clang_path]
@@ -97,20 +117,13 @@ def _compile_module(self, module_to_compile: corpus.ModuleSpec,
     subprocess.run(command_vector, check=True, capture_output=True)
 
   def _build_corpus(self, modules: Collection[corpus.ModuleSpec],
-                    output_directory: str,
-                    tflite_policy: policy_saver.Policy | None):
-    with tempfile.TemporaryDirectory() as tflite_policy_dir:
-      if tflite_policy:
-        tflite_policy.to_filesystem(tflite_policy_dir)
-      else:
-        tflite_policy_dir = None
-
-      with concurrent.futures.ThreadPoolExecutor(
-          max_workers=self._thread_count) as thread_pool:
-        compile_futures = [
-            thread_pool.submit(self._compile_module, module, output_directory,
-                               tflite_policy_dir) for module in modules
-        ]
+                    output_directory: str, tflite_policy_path: str | None):
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=self._thread_count) as thread_pool:
+      compile_futures = [
+          thread_pool.submit(self._compile_module, module, output_directory,
+                             tflite_policy_path) for module in modules
+      ]
 
     for future in compile_futures:
       if future.exception() is not None:
@@ -158,11 +171,16 @@ def _evaluate_corpus(self, module_directory: str, function_index_path: str,
 
     return segment_costs
 
-  def compile_corpus_and_evaluate(
-      self, modules: Collection[corpus.ModuleSpec], function_index_path: str,
-      bb_trace_path: str, tflite_policy: policy_saver.Policy | None) -> float:
+  def compile_corpus_and_evaluate(self, modules: Collection[corpus.ModuleSpec],
+                                  function_index_path: str, bb_trace_path: str,
+                                  policy_as_bytes: bytes | None) -> float:
     with tempfile.TemporaryDirectory() as compilation_dir:
-      self._build_corpus(modules, compilation_dir, tflite_policy)
+      tflite_policy_path = None
+      if policy_as_bytes is not None:
+        tflite_policy_path = policy_utils.convert_to_tflite(
+            policy_as_bytes, compilation_dir, self._tf_base_policy_path)
+
+      self._build_corpus(modules, compilation_dir, tflite_policy_path)
 
       segment_costs = self._evaluate_corpus(compilation_dir,
                                             function_index_path, bb_trace_path)
diff --git a/compiler_opt/es/regalloc_trace/regalloc_trace_worker_test.py b/compiler_opt/es/regalloc_trace/regalloc_trace_worker_test.py