Add base class for WorkerPoolManagers (#470)

boomanaiden154 · web-flow · commit f943179cb1e6 · 2025-03-10T17:48:51.000-07:00
This patch introduces a base class that worker pool managers can inherit
from. This enforces the interface, particularly for instantiation, that
we have been having a bit of trouble with recently given some recent
refactorings. I've validated that this patch would have caught the
issues that have already been fixed.
diff --git a/compiler_opt/distributed/local/local_worker_manager.py b/compiler_opt/distributed/local/local_worker_manager.py
@@ -38,8 +38,8 @@
 from absl import flags, logging
 # pylint: disable=unused-import
 from compiler_opt.distributed import worker
+from compiler_opt.distributed import worker_manager
 
-from contextlib import AbstractContextManager
 from multiprocessing import connection
 from typing import Any
 from collections.abc import Callable
@@ -281,7 +281,7 @@ def close_local_worker_pool(pool: worker.FixedWorkerPool):
     stub.join()
 
 
-class LocalWorkerPoolManager(AbstractContextManager):
+class LocalWorkerPoolManager(worker_manager.WorkerManager):
   """A pool of workers hosted on the local machines, each in its own process."""
 
   def __init__(self,
diff --git a/compiler_opt/distributed/worker_manager.py b/compiler_opt/distributed/worker_manager.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The interface for WorkerManager."""
+
+import abc
+from contextlib import AbstractContextManager
+import pickle
+
+from compiler_opt.distributed import worker
+
+
+class WorkerManager(AbstractContextManager, metaclass=abc.ABCMeta):
+  """An interface that implementations should derive from."""
+
+  @abc.abstractmethod
+  def __init__(self,
+               worker_class: type[worker.Worker],
+               pickle_func=pickle.dumps,
+               *,
+               count: int | None,
+               worker_args: tuple = (),
+               worker_kwargs: dict | None = None):
+    raise ValueError("Not Implemented")
+
+  @abc.abstractmethod
+  def __enter__(self) -> worker.FixedWorkerPool:
+    raise ValueError("Not Implemented")
+
+  @abc.abstractmethod
+  def __exit__(self, *args):
+    raise ValueError("Not Implemented")
diff --git a/compiler_opt/es/es_trainer_lib.py b/compiler_opt/es/es_trainer_lib.py
@@ -24,6 +24,7 @@
 # here as these errors are false positives.
 # pytype: disable=pyi-error
 
+from compiler_opt.distributed import worker_manager
 from compiler_opt.distributed.local import local_worker_manager
 from compiler_opt.es import blackbox_optimizers
 from compiler_opt.es import gradient_ascent_optimization_algorithms
@@ -68,7 +69,9 @@ def train(additional_compilation_flags=(),
           beta2=0.999,
           momentum=0.0,
           gradient_ascent_optimizer_type=GradientAscentOptimizerType.ADAM,
-          worker_manager_class=local_worker_manager.LocalWorkerPoolManager):
+          worker_manager_class: type[
+              worker_manager.WorkerManager] = local_worker_manager
+          .LocalWorkerPoolManager):
   """Train with ES."""
 
   if not _TRAIN_CORPORA.value:
diff --git a/compiler_opt/rl/distributed/ppo_collect_lib.py b/compiler_opt/rl/distributed/ppo_collect_lib.py
@@ -33,6 +33,7 @@
 from tf_agents.utils import common
 from tf_agents.trajectories import trajectory
 
+from compiler_opt.distributed import worker_manager
 from compiler_opt.rl import gin_external_configurables  # pylint: disable=unused-import
 from compiler_opt.rl import local_data_collector
 from compiler_opt.rl import corpus
@@ -102,7 +103,8 @@ def observe(self, result: compilation_runner.CompilationResult) -> None:
 
 def collect(corpus_path: str, replay_buffer_server_address: str,
             variable_container_server_address: str, num_workers: int | None,
-            worker_manager_class, sequence_length: int) -> None:
+            worker_manager_class: type[worker_manager.WorkerManager],
+            sequence_length: int) -> None:
   """Collects experience using a policy updated after every episode.
 
   Args:
diff --git a/compiler_opt/rl/distributed/ppo_eval_lib.py b/compiler_opt/rl/distributed/ppo_eval_lib.py
@@ -27,6 +27,7 @@
 from tf_agents.train.utils import train_utils
 from tf_agents.utils import common
 
+from compiler_opt.distributed import worker_manager
 from compiler_opt.rl import data_reader
 from compiler_opt.rl import local_data_collector
 from compiler_opt.rl import gin_external_configurables  # pylint: disable=unused-import
@@ -39,7 +40,7 @@
 
 def evaluate(root_dir: str, corpus_path: str,
              variable_container_server_address: str, num_workers: int | None,
-             worker_manager_class):
+             worker_manager_class: type[worker_manager.WorkerManager]):
   """Evaluate a given policy on the given corpus.
 
   Args:
diff --git a/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py b/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py
@@ -43,6 +43,7 @@
 from compiler_opt.rl import env
 
 from compiler_opt.distributed import worker
+from compiler_opt.distributed import worker_manager
 from compiler_opt.distributed import buffered_scheduler
 from compiler_opt.distributed.local import local_worker_manager
 
@@ -900,7 +901,9 @@ def gen_trajectories(
     profiling_file_path: str | None = None,
     worker_wait_sec: float | None = None,
     worker_class_type=ModuleWorker,
-    worker_manager_class=local_worker_manager.LocalWorkerPoolManager,
+    worker_manager_class: type[
+        worker_manager.WorkerManager] = local_worker_manager
+    .LocalWorkerPoolManager,
 ):
   """Generates all trajectories for imitation learning training.
 
diff --git a/compiler_opt/rl/train_locally.py b/compiler_opt/rl/train_locally.py
@@ -27,6 +27,7 @@
 from tf_agents.agents import tf_agent
 from tf_agents.system import system_multiprocessing as multiprocessing
 
+from compiler_opt.distributed import worker_manager
 from compiler_opt.distributed.local.local_worker_manager import LocalWorkerPoolManager
 from compiler_opt.rl import agent_config
 from compiler_opt.rl import best_trajectory
@@ -58,7 +59,8 @@
 
 
 @gin.configurable
-def train_eval(worker_manager_class=LocalWorkerPoolManager,
+def train_eval(worker_manager_class: type[
+    worker_manager.WorkerManager] = LocalWorkerPoolManager,
                agent_config_type=agent_config.PPOAgentConfig,
                warmstart_policy_dir=None,
                num_policy_iterations=0,
diff --git a/compiler_opt/tools/generate_default_trace.py b/compiler_opt/tools/generate_default_trace.py
@@ -26,6 +26,7 @@
 import tensorflow as tf
 
 from compiler_opt.distributed import worker
+from compiler_opt.distributed import worker_manager
 from compiler_opt.distributed import buffered_scheduler
 from compiler_opt.distributed.local import local_worker_manager
 
@@ -112,8 +113,9 @@ def main(_):
   generate_trace()
 
 
-def generate_trace(
-    worker_manager_class=local_worker_manager.LocalWorkerPoolManager):
+def generate_trace(worker_manager_class: type[
+    worker_manager.WorkerManager] = local_worker_manager.LocalWorkerPoolManager
+                  ):
 
   config = registry.get_configuration()