|
| 1 | +# coding=utf-8 |
| 2 | +# Copyright 2020 Google LLC |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +"""Gymlike environment definition for MLGO.""" |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +import math |
| 20 | +import subprocess |
| 21 | +import abc |
| 22 | +import contextlib |
| 23 | +import io |
| 24 | +import os |
| 25 | +import tempfile |
| 26 | +from typing import Any, Generator, List, Optional, Tuple, Type |
| 27 | + |
| 28 | +import numpy as np |
| 29 | + |
| 30 | +from compiler_opt.rl import corpus |
| 31 | +from compiler_opt.rl import log_reader |
| 32 | + |
| 33 | +OBS_T = Any |
| 34 | + |
| 35 | +OBS_KEY = 'obs' |
| 36 | +REWARD_KEY = 'reward' |
| 37 | +SCORE_POLICY_KEY = 'score_policy' |
| 38 | +SCORE_DEFAULT_KEY = 'score_default' |
| 39 | +CONTEXT_KEY = 'context' |
| 40 | +MODULE_NAME_KEY = 'module_name' |
| 41 | +OBS_ID_KEY = 'obs_id' |
| 42 | +STEP_TYPE_KEY = 'step_type' |
| 43 | + |
| 44 | +FIRST_STEP_STR = 'first' |
| 45 | +MID_STEP_STR = 'mid' |
| 46 | +LAST_STEP_STR = 'last' |
| 47 | + |
| 48 | +_TERMINAL_OBS = { |
| 49 | + OBS_KEY: {}, |
| 50 | + REWARD_KEY: 0.0, |
| 51 | + SCORE_POLICY_KEY: 0.0, |
| 52 | + SCORE_DEFAULT_KEY: 0.0, |
| 53 | + CONTEXT_KEY: '', |
| 54 | + MODULE_NAME_KEY: '', |
| 55 | + OBS_ID_KEY: -1, |
| 56 | + STEP_TYPE_KEY: LAST_STEP_STR, |
| 57 | +} |
| 58 | + |
| 59 | +_INTERACTIVE_PIPE_FILE_BASE = 'interactive-pipe-base' |
| 60 | + |
| 61 | + |
| 62 | +class MLGOTask(metaclass=abc.ABCMeta): |
| 63 | + """Abstract base class for MLGO Tasks. |
| 64 | +
|
| 65 | + A Task is an learning problem in LLVM, for example: |
| 66 | + - inlining-for-size |
| 67 | + - inlining-for-speed |
| 68 | + - register allocation (for speed) |
| 69 | +
|
| 70 | + The Task type for a given problem defines how to build and score modules for |
| 71 | + the problem, both interactively and non-interactively. |
| 72 | + """ |
| 73 | + |
| 74 | + @abc.abstractmethod |
| 75 | + def get_cmdline(self, clang_path: str, base_args: List[str], |
| 76 | + interactive_base_path: Optional[str], |
| 77 | + working_dir: str) -> List[str]: |
| 78 | + """Get the cmdline for building with this task. |
| 79 | +
|
| 80 | + The resulting list[str] should be able to be passed to subprocess.run to |
| 81 | + execute clang. |
| 82 | +
|
| 83 | + Args: |
| 84 | + clang_path: path to the clang executable. |
| 85 | + base_args: base arguments for building the module. Generally, these flags |
| 86 | + should not be modified and simply added to the result. |
| 87 | + interactive_base_path: the path to the interactive pipe base. if None, |
| 88 | + then don't run clang interactively. |
| 89 | + working_dir: directory where all artifacts from compilation should be |
| 90 | + written. This will be a temp directory whose lifetime is managed outside |
| 91 | + of the Task. |
| 92 | +
|
| 93 | + Returns: |
| 94 | + The constructed command line. |
| 95 | + """ |
| 96 | + pass |
| 97 | + |
| 98 | + @abc.abstractmethod |
| 99 | + def get_module_scores(self, working_dir: str) -> dict[str, float]: |
| 100 | + """Get the scores for each context in the module. |
| 101 | +
|
| 102 | + This method should not be aware of whether the module was built with the |
| 103 | + default heuristic or a ML policy. |
| 104 | +
|
| 105 | + Args: |
| 106 | + working_dir: Directory which was passed as working_dir to get_cmdline. |
| 107 | + Used to recover binaries/artifacts from the build |
| 108 | +
|
| 109 | + Returns: |
| 110 | + A dictionary mapping [context name] -> [score]. |
| 111 | + """ |
| 112 | + pass |
| 113 | + |
| 114 | + |
| 115 | +class ClangProcess: |
| 116 | + """Simple wrapper class around a clang process. |
| 117 | +
|
| 118 | + This is used wrap both the clang process and the method to return the scores |
| 119 | + associated to the default-compiled binary. |
| 120 | + """ |
| 121 | + |
| 122 | + def __init__(self, proc, get_scores_fn, module_name): |
| 123 | + self._proc = proc |
| 124 | + self._get_scores_fn = get_scores_fn |
| 125 | + self._module_name = module_name |
| 126 | + |
| 127 | + def get_scores(self, timeout: Optional[int] = None): |
| 128 | + self._proc.wait(timeout=timeout) |
| 129 | + return self._get_scores_fn() |
| 130 | + |
| 131 | + |
| 132 | +class InteractiveClang(ClangProcess): |
| 133 | + """Wrapper around clang's interactive mode.""" |
| 134 | + |
| 135 | + def __init__( |
| 136 | + self, |
| 137 | + proc, |
| 138 | + get_scores_fn, |
| 139 | + module_name: str, |
| 140 | + reader_pipe: io.BufferedReader, |
| 141 | + writer_pipe: io.BufferedWriter, |
| 142 | + ): |
| 143 | + super().__init__(proc, get_scores_fn, module_name) |
| 144 | + self._reader_pipe = reader_pipe |
| 145 | + self._writer_pipe = writer_pipe |
| 146 | + self._obs_gen = log_reader.read_log_from_file(self._reader_pipe) |
| 147 | + |
| 148 | + self._is_first_obs = True |
| 149 | + |
| 150 | + self._terminal_obs = _TERMINAL_OBS |
| 151 | + self._terminal_obs[MODULE_NAME_KEY] = module_name |
| 152 | + |
| 153 | + def _running(self) -> bool: |
| 154 | + return self._proc.poll() is None |
| 155 | + |
| 156 | + def get_observation(self) -> OBS_T: |
| 157 | + if not self._running(): |
| 158 | + return self._terminal_obs |
| 159 | + |
| 160 | + def _get_step_type(): |
| 161 | + step_type = FIRST_STEP_STR if self._is_first_obs else MID_STEP_STR |
| 162 | + self._is_first_obs = False |
| 163 | + return step_type |
| 164 | + |
| 165 | + try: |
| 166 | + obs: log_reader.ObservationRecord = next(self._obs_gen) |
| 167 | + |
| 168 | + tv_dict = {} |
| 169 | + for fv in obs.feature_values: |
| 170 | + array = fv.to_numpy() |
| 171 | + tv_dict[fv.spec.name] = np.reshape(array, newshape=fv.spec.shape) |
| 172 | + return { |
| 173 | + OBS_KEY: tv_dict, |
| 174 | + REWARD_KEY: obs.score if obs.score else 0.0, |
| 175 | + SCORE_POLICY_KEY: 0.0, |
| 176 | + SCORE_DEFAULT_KEY: 0.0, |
| 177 | + CONTEXT_KEY: obs.context, |
| 178 | + MODULE_NAME_KEY: self._module_name, |
| 179 | + OBS_ID_KEY: obs.observation_id, |
| 180 | + STEP_TYPE_KEY: _get_step_type(), |
| 181 | + } |
| 182 | + except StopIteration: |
| 183 | + return self._terminal_obs |
| 184 | + |
| 185 | + def send_action(self, action: np.ndarray) -> None: |
| 186 | + assert self._running() |
| 187 | + data = action.tobytes() |
| 188 | + bytes_sent = self._writer_pipe.write(data) |
| 189 | + # Here we use the fact that for common types, the np.dtype and ctype should |
| 190 | + # behave the same |
| 191 | + assert bytes_sent == action.dtype.itemsize * math.prod(action.shape) |
| 192 | + try: |
| 193 | + self._writer_pipe.flush() |
| 194 | + except BrokenPipeError: |
| 195 | + # The pipe can break after we send the last action |
| 196 | + pass |
| 197 | + |
| 198 | + |
| 199 | +_EPS = 1e-4 |
| 200 | + |
| 201 | + |
| 202 | +def compute_relative_rewards(score_a: dict[str, float], |
| 203 | + score_b: dict[str, float]) -> dict[str, float]: |
| 204 | + |
| 205 | + def _reward_fn(a: float, b: float) -> float: |
| 206 | + return 1.0 - (a + _EPS) / (b + _EPS) |
| 207 | + |
| 208 | + assert score_a.keys() == score_b.keys() |
| 209 | + return {key: _reward_fn(score_a[key], score_b[key]) for key in score_a} |
| 210 | + |
| 211 | + |
| 212 | +@contextlib.contextmanager |
| 213 | +def clang_session( |
| 214 | + clang_path: str, |
| 215 | + module: corpus.LoadedModuleSpec, |
| 216 | + task_type: Type[MLGOTask], |
| 217 | + *, |
| 218 | + interactive: bool, |
| 219 | +): |
| 220 | + """Context manager for clang session. |
| 221 | +
|
| 222 | + We need to manage the context so resources like tempfiles and pipes have |
| 223 | + their lifetimes managed appropriately. |
| 224 | +
|
| 225 | + Args: |
| 226 | + clang_path: The clang binary to use for the InteractiveClang session. |
| 227 | + module: The module to compile with clang. |
| 228 | + task_type: Type of the MLGOTask to use. |
| 229 | + interactive: Whether to use an interactive or default clang instance |
| 230 | +
|
| 231 | + Yields: |
| 232 | + Either the constructed InteractiveClang or DefaultClang object. |
| 233 | + """ |
| 234 | + with tempfile.TemporaryDirectory() as td: |
| 235 | + task_working_dir = os.path.join(td, '__task_working_dir__') |
| 236 | + os.mkdir(task_working_dir) |
| 237 | + task = task_type() |
| 238 | + |
| 239 | + base_args = list(module.build_command_line(td)) |
| 240 | + interactive_base = os.path.join( |
| 241 | + td, _INTERACTIVE_PIPE_FILE_BASE) if interactive else None |
| 242 | + cmdline = task.get_cmdline(clang_path, base_args, interactive_base, |
| 243 | + task_working_dir) |
| 244 | + |
| 245 | + def _get_scores() -> dict[str, float]: |
| 246 | + return task.get_module_scores(task_working_dir) |
| 247 | + |
| 248 | + writer_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.in') |
| 249 | + reader_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.out') |
| 250 | + if interactive: |
| 251 | + os.mkfifo(reader_name, 0o666) |
| 252 | + os.mkfifo(writer_name, 0o666) |
| 253 | + with subprocess.Popen( |
| 254 | + cmdline, stderr=subprocess.PIPE, stdout=subprocess.PIPE) as proc: |
| 255 | + try: |
| 256 | + if interactive: |
| 257 | + with io.BufferedWriter(io.FileIO(writer_name, 'wb')) as writer_pipe: |
| 258 | + with io.BufferedReader(io.FileIO(reader_name, 'rb')) as reader_pipe: |
| 259 | + yield InteractiveClang( |
| 260 | + proc, |
| 261 | + _get_scores, |
| 262 | + module.name, |
| 263 | + reader_pipe, |
| 264 | + writer_pipe, |
| 265 | + ) |
| 266 | + else: |
| 267 | + yield ClangProcess( |
| 268 | + proc, |
| 269 | + _get_scores, |
| 270 | + module.name, |
| 271 | + ) |
| 272 | + |
| 273 | + finally: |
| 274 | + proc.kill() |
| 275 | + |
| 276 | + |
| 277 | +def _get_clang_generator( |
| 278 | + clang_path: str, |
| 279 | + task_type: Type[MLGOTask], |
| 280 | +) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]], |
| 281 | + Optional[corpus.LoadedModuleSpec], None]: |
| 282 | + """Returns a generator for creating InteractiveClang objects. |
| 283 | +
|
| 284 | + TODO: fix this docstring |
| 285 | +
|
| 286 | + Args: |
| 287 | + clang_path: Path to the clang binary to use within InteractiveClang. |
| 288 | + task_type: Type of the MLGO task to use. |
| 289 | +
|
| 290 | + Returns: |
| 291 | + The generator for InteractiveClang objects. |
| 292 | + """ |
| 293 | + while True: |
| 294 | + # The following line should be type-hinted as follows: |
| 295 | + # module: corpus.LoadedModuleSpec = yield |
| 296 | + # However, this triggers a yapf crash. See: |
| 297 | + # https://github.com/google/yapf/issues/1092 |
| 298 | + module = yield |
| 299 | + with clang_session( |
| 300 | + clang_path, module, task_type, interactive=True) as iclang: |
| 301 | + with clang_session( |
| 302 | + clang_path, module, task_type, interactive=False) as clang: |
| 303 | + yield iclang, clang |
| 304 | + |
| 305 | + |
| 306 | +class MLGOEnvironmentBase: |
| 307 | + """Base implementation for all MLGO environments. |
| 308 | +
|
| 309 | + Depending on the RL framework, one may want different implementations of an |
| 310 | + enviroment (tf_agents: PyEnvironment, jax: dm-env, etc). This class |
| 311 | + implements the core methods that are needed to then implement any of these |
| 312 | + other environments as well. |
| 313 | + """ |
| 314 | + |
| 315 | + def __init__( |
| 316 | + self, |
| 317 | + *, |
| 318 | + clang_path: str, |
| 319 | + task_type: Type[MLGOTask], |
| 320 | + obs_spec, |
| 321 | + action_spec, |
| 322 | + ): |
| 323 | + self._clang_generator = _get_clang_generator(clang_path, task_type) |
| 324 | + self._obs_spec = obs_spec |
| 325 | + self._action_spec = action_spec |
| 326 | + |
| 327 | + self._iclang: Optional[InteractiveClang] = None |
| 328 | + self._clang: Optional[ClangProcess] = None |
| 329 | + |
| 330 | + @property |
| 331 | + def obs_spec(self): |
| 332 | + return self._obs_spec |
| 333 | + |
| 334 | + @property |
| 335 | + def action_spec(self): |
| 336 | + return self._action_spec |
| 337 | + |
| 338 | + def observation(self): |
| 339 | + return self._last_obs |
| 340 | + |
| 341 | + def _get_observation(self) -> OBS_T: |
| 342 | + self._last_obs = self._iclang.get_observation() |
| 343 | + if self._last_obs[STEP_TYPE_KEY] == 'last': |
| 344 | + self._last_obs[SCORE_POLICY_KEY] = self._iclang.get_scores() |
| 345 | + self._last_obs[SCORE_DEFAULT_KEY] = self._clang.get_scores() |
| 346 | + self._last_obs[REWARD_KEY] = compute_relative_rewards( |
| 347 | + self._last_obs[SCORE_POLICY_KEY], self._last_obs[SCORE_DEFAULT_KEY]) |
| 348 | + return self.observation() |
| 349 | + |
| 350 | + def reset(self, module: corpus.LoadedModuleSpec): |
| 351 | + # On the first call to reset(...), sending None starts the coroutine. |
| 352 | + # On subsequent calls, this resumes execution after |
| 353 | + # yielding the clang pair, which terminates the session pauses execution in |
| 354 | + # the coroutine where it awaits a module |
| 355 | + self._clang_generator.send(None) |
| 356 | + # pytype: disable=attribute-error |
| 357 | + self._iclang, self._clang = self._clang_generator.send(module) |
| 358 | + return self._get_observation() |
| 359 | + |
| 360 | + def step(self, action: np.ndarray): |
| 361 | + self._iclang.send_action(action) |
| 362 | + return self._get_observation() |
0 commit comments