google
diff --git a/‎compiler_opt/rl/env.py
Lines changed: 362 additions & 0 deletions b/‎compiler_opt/rl/env.py
Lines changed: 362 additions & 0 deletions
@@ -0,0 +1,362 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Gymlike environment definition for MLGO."""
+
+from __future__ import annotations
+
+import math
+import subprocess
+import abc
+import contextlib
+import io
+import os
+import tempfile
+from typing import Any, Generator, List, Optional, Tuple, Type
+
+import numpy as np
+
+from compiler_opt.rl import corpus
+from compiler_opt.rl import log_reader
+
+OBS_T = Any
+
+OBS_KEY = 'obs'
+REWARD_KEY = 'reward'
+SCORE_POLICY_KEY = 'score_policy'
+SCORE_DEFAULT_KEY = 'score_default'
+CONTEXT_KEY = 'context'
+MODULE_NAME_KEY = 'module_name'
+OBS_ID_KEY = 'obs_id'
+STEP_TYPE_KEY = 'step_type'
+
+FIRST_STEP_STR = 'first'
+MID_STEP_STR = 'mid'
+LAST_STEP_STR = 'last'
+
+_TERMINAL_OBS = {
+    OBS_KEY: {},
+    REWARD_KEY: 0.0,
+    SCORE_POLICY_KEY: 0.0,
+    SCORE_DEFAULT_KEY: 0.0,
+    CONTEXT_KEY: '',
+    MODULE_NAME_KEY: '',
+    OBS_ID_KEY: -1,
+    STEP_TYPE_KEY: LAST_STEP_STR,
+}
+
+_INTERACTIVE_PIPE_FILE_BASE = 'interactive-pipe-base'
+
+
+class MLGOTask(metaclass=abc.ABCMeta):
+  """Abstract base class for MLGO Tasks.
+
+  A Task is an learning problem in LLVM, for example:
+   - inlining-for-size
+   - inlining-for-speed
+   - register allocation (for speed)
+
+  The Task type for a given problem defines how to build and score modules for
+  the problem, both interactively and non-interactively.
+  """
+
+  @abc.abstractmethod
+  def get_cmdline(self, clang_path: str, base_args: List[str],
+                  interactive_base_path: Optional[str],
+                  working_dir: str) -> List[str]:
+    """Get the cmdline for building with this task.
+
+    The resulting list[str] should be able to be passed to subprocess.run to
+    execute clang.
+
+    Args:
+      clang_path: path to the clang executable.
+      base_args: base arguments for building the module. Generally, these flags
+        should not be modified and simply added to the result.
+      interactive_base_path: the path to the interactive pipe base. if None,
+        then don't run clang interactively.
+      working_dir: directory where all artifacts from compilation should be
+        written. This will be a temp directory whose lifetime is managed outside
+        of the Task.
+
+    Returns:
+      The constructed command line.
+    """
+    pass
+
+  @abc.abstractmethod
+  def get_module_scores(self, working_dir: str) -> dict[str, float]:
+    """Get the scores for each context in the module.
+
+    This method should not be aware of whether the module was built with the
+    default heuristic or a ML policy.
+
+    Args:
+      working_dir: Directory which was passed as working_dir to get_cmdline.
+        Used to recover binaries/artifacts from the build
+
+    Returns:
+      A dictionary mapping [context name] -> [score].
+    """
+    pass
+
+
+class ClangProcess:
+  """Simple wrapper class around a clang process.
+
+  This is used wrap both the clang process and the method to return the scores
+  associated to the default-compiled binary.
+  """
+
+  def __init__(self, proc, get_scores_fn, module_name):
+    self._proc = proc
+    self._get_scores_fn = get_scores_fn
+    self._module_name = module_name
+
+  def get_scores(self, timeout: Optional[int] = None):
+    self._proc.wait(timeout=timeout)
+    return self._get_scores_fn()
+
+
+class InteractiveClang(ClangProcess):
+  """Wrapper around clang's interactive mode."""
+
+  def __init__(
+      self,
+      proc,
+      get_scores_fn,
+      module_name: str,
+      reader_pipe: io.BufferedReader,
+      writer_pipe: io.BufferedWriter,
+  ):
+    super().__init__(proc, get_scores_fn, module_name)
+    self._reader_pipe = reader_pipe
+    self._writer_pipe = writer_pipe
+    self._obs_gen = log_reader.read_log_from_file(self._reader_pipe)
+
+    self._is_first_obs = True
+
+    self._terminal_obs = _TERMINAL_OBS
+    self._terminal_obs[MODULE_NAME_KEY] = module_name
+
+  def _running(self) -> bool:
+    return self._proc.poll() is None
+
+  def get_observation(self) -> OBS_T:
+    if not self._running():
+      return self._terminal_obs
+
+    def _get_step_type():
+      step_type = FIRST_STEP_STR if self._is_first_obs else MID_STEP_STR
+      self._is_first_obs = False
+      return step_type
+
+    try:
+      obs: log_reader.ObservationRecord = next(self._obs_gen)
+
+      tv_dict = {}
+      for fv in obs.feature_values:
+        array = fv.to_numpy()
+        tv_dict[fv.spec.name] = np.reshape(array, newshape=fv.spec.shape)
+      return {
+          OBS_KEY: tv_dict,
+          REWARD_KEY: obs.score if obs.score else 0.0,
+          SCORE_POLICY_KEY: 0.0,
+          SCORE_DEFAULT_KEY: 0.0,
+          CONTEXT_KEY: obs.context,
+          MODULE_NAME_KEY: self._module_name,
+          OBS_ID_KEY: obs.observation_id,
+          STEP_TYPE_KEY: _get_step_type(),
+      }
+    except StopIteration:
+      return self._terminal_obs
+
+  def send_action(self, action: np.ndarray) -> None:
+    assert self._running()
+    data = action.tobytes()
+    bytes_sent = self._writer_pipe.write(data)
+    # Here we use the fact that for common types, the np.dtype and ctype should
+    # behave the same
+    assert bytes_sent == action.dtype.itemsize * math.prod(action.shape)
+    try:
+      self._writer_pipe.flush()
+    except BrokenPipeError:
+      # The pipe can break after we send the last action
+      pass
+
+
+_EPS = 1e-4
+
+
+def compute_relative_rewards(score_a: dict[str, float],
+                             score_b: dict[str, float]) -> dict[str, float]:
+
+  def _reward_fn(a: float, b: float) -> float:
+    return 1.0 - (a + _EPS) / (b + _EPS)
+
+  assert score_a.keys() == score_b.keys()
+  return {key: _reward_fn(score_a[key], score_b[key]) for key in score_a}
+
+
+@contextlib.contextmanager
+def clang_session(
+    clang_path: str,
+    module: corpus.LoadedModuleSpec,
+    task_type: Type[MLGOTask],
+    *,
+    interactive: bool,
+):
+  """Context manager for clang session.
+
+  We need to manage the context so resources like tempfiles and pipes have
+  their lifetimes managed appropriately.
+
+  Args:
+    clang_path: The clang binary to use for the InteractiveClang session.
+    module: The module to compile with clang.
+    task_type: Type of the MLGOTask to use.
+    interactive: Whether to use an interactive or default clang instance
+
+  Yields:
+    Either the constructed InteractiveClang or DefaultClang object.
+  """
+  with tempfile.TemporaryDirectory() as td:
+    task_working_dir = os.path.join(td, '__task_working_dir__')
+    os.mkdir(task_working_dir)
+    task = task_type()
+
+    base_args = list(module.build_command_line(td))
+    interactive_base = os.path.join(
+        td, _INTERACTIVE_PIPE_FILE_BASE) if interactive else None
+    cmdline = task.get_cmdline(clang_path, base_args, interactive_base,
+                               task_working_dir)
+
+    def _get_scores() -> dict[str, float]:
+      return task.get_module_scores(task_working_dir)
+
+    writer_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.in')
+    reader_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.out')
+    if interactive:
+      os.mkfifo(reader_name, 0o666)
+      os.mkfifo(writer_name, 0o666)
+    with subprocess.Popen(
+        cmdline, stderr=subprocess.PIPE, stdout=subprocess.PIPE) as proc:
+      try:
+        if interactive:
+          with io.BufferedWriter(io.FileIO(writer_name, 'wb')) as writer_pipe:
+            with io.BufferedReader(io.FileIO(reader_name, 'rb')) as reader_pipe:
+              yield InteractiveClang(
+                  proc,
+                  _get_scores,
+                  module.name,
+                  reader_pipe,
+                  writer_pipe,
+              )
+        else:
+          yield ClangProcess(
+              proc,
+              _get_scores,
+              module.name,
+          )
+
+      finally:
+        proc.kill()
+
+
+def _get_clang_generator(
+    clang_path: str,
+    task_type: Type[MLGOTask],
+) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]],
+               Optional[corpus.LoadedModuleSpec], None]:
+  """Returns a generator for creating InteractiveClang objects.
+
+  TODO: fix this docstring
+
+  Args:
+    clang_path: Path to the clang binary to use within InteractiveClang.
+    task_type: Type of the MLGO task to use.
+
+  Returns:
+    The generator for InteractiveClang objects.
+  """
+  while True:
+    # The following line should be type-hinted as follows:
+    #   module: corpus.LoadedModuleSpec = yield
+    # However, this triggers a yapf crash. See:
+    #   https://github.com/google/yapf/issues/1092
+    module = yield
+    with clang_session(
+        clang_path, module, task_type, interactive=True) as iclang:
+      with clang_session(
+          clang_path, module, task_type, interactive=False) as clang:
+        yield iclang, clang
+
+
+class MLGOEnvironmentBase:
+  """Base implementation for all MLGO environments.
+
+  Depending on the RL framework, one may want different implementations of an
+  enviroment (tf_agents: PyEnvironment, jax: dm-env, etc). This class
+  implements the core methods that are needed to then implement any of these
+  other environments as well.
+  """
+
+  def __init__(
+      self,
+      *,
+      clang_path: str,
+      task_type: Type[MLGOTask],
+      obs_spec,
+      action_spec,
+  ):
+    self._clang_generator = _get_clang_generator(clang_path, task_type)
+    self._obs_spec = obs_spec
+    self._action_spec = action_spec
+
+    self._iclang: Optional[InteractiveClang] = None
+    self._clang: Optional[ClangProcess] = None
+
+  @property
+  def obs_spec(self):
+    return self._obs_spec
+
+  @property
+  def action_spec(self):
+    return self._action_spec
+
+  def observation(self):
+    return self._last_obs
+
+  def _get_observation(self) -> OBS_T:
+    self._last_obs = self._iclang.get_observation()
+    if self._last_obs[STEP_TYPE_KEY] == 'last':
+      self._last_obs[SCORE_POLICY_KEY] = self._iclang.get_scores()
+      self._last_obs[SCORE_DEFAULT_KEY] = self._clang.get_scores()
+      self._last_obs[REWARD_KEY] = compute_relative_rewards(
+          self._last_obs[SCORE_POLICY_KEY], self._last_obs[SCORE_DEFAULT_KEY])
+    return self.observation()
+
+  def reset(self, module: corpus.LoadedModuleSpec):
+    # On the first call to reset(...), sending None starts the coroutine.
+    # On subsequent calls, this resumes execution after
+    # yielding the clang pair, which terminates the session pauses execution in
+    # the coroutine where it awaits a module
+    self._clang_generator.send(None)
+    # pytype: disable=attribute-error
+    self._iclang, self._clang = self._clang_generator.send(module)
+    return self._get_observation()
+
+  def step(self, action: np.ndarray):
+    self._iclang.send_action(action)
+    return self._get_observation()