Skip to content

Commit ce66ac0

Browse files
authored
Add MLGO environments. (#228)
Add MLGO environment abstractions. This commit also contains an implementation of the inlining-for-size environment.
1 parent e08d84d commit ce66ac0

File tree

7 files changed

+655
-0
lines changed

7 files changed

+655
-0
lines changed

compiler_opt/rl/env.py

Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
# coding=utf-8
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Gymlike environment definition for MLGO."""
16+
17+
from __future__ import annotations
18+
19+
import math
20+
import subprocess
21+
import abc
22+
import contextlib
23+
import io
24+
import os
25+
import tempfile
26+
from typing import Any, Generator, List, Optional, Tuple, Type
27+
28+
import numpy as np
29+
30+
from compiler_opt.rl import corpus
31+
from compiler_opt.rl import log_reader
32+
33+
OBS_T = Any
34+
35+
OBS_KEY = 'obs'
36+
REWARD_KEY = 'reward'
37+
SCORE_POLICY_KEY = 'score_policy'
38+
SCORE_DEFAULT_KEY = 'score_default'
39+
CONTEXT_KEY = 'context'
40+
MODULE_NAME_KEY = 'module_name'
41+
OBS_ID_KEY = 'obs_id'
42+
STEP_TYPE_KEY = 'step_type'
43+
44+
FIRST_STEP_STR = 'first'
45+
MID_STEP_STR = 'mid'
46+
LAST_STEP_STR = 'last'
47+
48+
_TERMINAL_OBS = {
49+
OBS_KEY: {},
50+
REWARD_KEY: 0.0,
51+
SCORE_POLICY_KEY: 0.0,
52+
SCORE_DEFAULT_KEY: 0.0,
53+
CONTEXT_KEY: '',
54+
MODULE_NAME_KEY: '',
55+
OBS_ID_KEY: -1,
56+
STEP_TYPE_KEY: LAST_STEP_STR,
57+
}
58+
59+
_INTERACTIVE_PIPE_FILE_BASE = 'interactive-pipe-base'
60+
61+
62+
class MLGOTask(metaclass=abc.ABCMeta):
63+
"""Abstract base class for MLGO Tasks.
64+
65+
A Task is an learning problem in LLVM, for example:
66+
- inlining-for-size
67+
- inlining-for-speed
68+
- register allocation (for speed)
69+
70+
The Task type for a given problem defines how to build and score modules for
71+
the problem, both interactively and non-interactively.
72+
"""
73+
74+
@abc.abstractmethod
75+
def get_cmdline(self, clang_path: str, base_args: List[str],
76+
interactive_base_path: Optional[str],
77+
working_dir: str) -> List[str]:
78+
"""Get the cmdline for building with this task.
79+
80+
The resulting list[str] should be able to be passed to subprocess.run to
81+
execute clang.
82+
83+
Args:
84+
clang_path: path to the clang executable.
85+
base_args: base arguments for building the module. Generally, these flags
86+
should not be modified and simply added to the result.
87+
interactive_base_path: the path to the interactive pipe base. if None,
88+
then don't run clang interactively.
89+
working_dir: directory where all artifacts from compilation should be
90+
written. This will be a temp directory whose lifetime is managed outside
91+
of the Task.
92+
93+
Returns:
94+
The constructed command line.
95+
"""
96+
pass
97+
98+
@abc.abstractmethod
99+
def get_module_scores(self, working_dir: str) -> dict[str, float]:
100+
"""Get the scores for each context in the module.
101+
102+
This method should not be aware of whether the module was built with the
103+
default heuristic or a ML policy.
104+
105+
Args:
106+
working_dir: Directory which was passed as working_dir to get_cmdline.
107+
Used to recover binaries/artifacts from the build
108+
109+
Returns:
110+
A dictionary mapping [context name] -> [score].
111+
"""
112+
pass
113+
114+
115+
class ClangProcess:
116+
"""Simple wrapper class around a clang process.
117+
118+
This is used wrap both the clang process and the method to return the scores
119+
associated to the default-compiled binary.
120+
"""
121+
122+
def __init__(self, proc, get_scores_fn, module_name):
123+
self._proc = proc
124+
self._get_scores_fn = get_scores_fn
125+
self._module_name = module_name
126+
127+
def get_scores(self, timeout: Optional[int] = None):
128+
self._proc.wait(timeout=timeout)
129+
return self._get_scores_fn()
130+
131+
132+
class InteractiveClang(ClangProcess):
133+
"""Wrapper around clang's interactive mode."""
134+
135+
def __init__(
136+
self,
137+
proc,
138+
get_scores_fn,
139+
module_name: str,
140+
reader_pipe: io.BufferedReader,
141+
writer_pipe: io.BufferedWriter,
142+
):
143+
super().__init__(proc, get_scores_fn, module_name)
144+
self._reader_pipe = reader_pipe
145+
self._writer_pipe = writer_pipe
146+
self._obs_gen = log_reader.read_log_from_file(self._reader_pipe)
147+
148+
self._is_first_obs = True
149+
150+
self._terminal_obs = _TERMINAL_OBS
151+
self._terminal_obs[MODULE_NAME_KEY] = module_name
152+
153+
def _running(self) -> bool:
154+
return self._proc.poll() is None
155+
156+
def get_observation(self) -> OBS_T:
157+
if not self._running():
158+
return self._terminal_obs
159+
160+
def _get_step_type():
161+
step_type = FIRST_STEP_STR if self._is_first_obs else MID_STEP_STR
162+
self._is_first_obs = False
163+
return step_type
164+
165+
try:
166+
obs: log_reader.ObservationRecord = next(self._obs_gen)
167+
168+
tv_dict = {}
169+
for fv in obs.feature_values:
170+
array = fv.to_numpy()
171+
tv_dict[fv.spec.name] = np.reshape(array, newshape=fv.spec.shape)
172+
return {
173+
OBS_KEY: tv_dict,
174+
REWARD_KEY: obs.score if obs.score else 0.0,
175+
SCORE_POLICY_KEY: 0.0,
176+
SCORE_DEFAULT_KEY: 0.0,
177+
CONTEXT_KEY: obs.context,
178+
MODULE_NAME_KEY: self._module_name,
179+
OBS_ID_KEY: obs.observation_id,
180+
STEP_TYPE_KEY: _get_step_type(),
181+
}
182+
except StopIteration:
183+
return self._terminal_obs
184+
185+
def send_action(self, action: np.ndarray) -> None:
186+
assert self._running()
187+
data = action.tobytes()
188+
bytes_sent = self._writer_pipe.write(data)
189+
# Here we use the fact that for common types, the np.dtype and ctype should
190+
# behave the same
191+
assert bytes_sent == action.dtype.itemsize * math.prod(action.shape)
192+
try:
193+
self._writer_pipe.flush()
194+
except BrokenPipeError:
195+
# The pipe can break after we send the last action
196+
pass
197+
198+
199+
_EPS = 1e-4
200+
201+
202+
def compute_relative_rewards(score_a: dict[str, float],
203+
score_b: dict[str, float]) -> dict[str, float]:
204+
205+
def _reward_fn(a: float, b: float) -> float:
206+
return 1.0 - (a + _EPS) / (b + _EPS)
207+
208+
assert score_a.keys() == score_b.keys()
209+
return {key: _reward_fn(score_a[key], score_b[key]) for key in score_a}
210+
211+
212+
@contextlib.contextmanager
213+
def clang_session(
214+
clang_path: str,
215+
module: corpus.LoadedModuleSpec,
216+
task_type: Type[MLGOTask],
217+
*,
218+
interactive: bool,
219+
):
220+
"""Context manager for clang session.
221+
222+
We need to manage the context so resources like tempfiles and pipes have
223+
their lifetimes managed appropriately.
224+
225+
Args:
226+
clang_path: The clang binary to use for the InteractiveClang session.
227+
module: The module to compile with clang.
228+
task_type: Type of the MLGOTask to use.
229+
interactive: Whether to use an interactive or default clang instance
230+
231+
Yields:
232+
Either the constructed InteractiveClang or DefaultClang object.
233+
"""
234+
with tempfile.TemporaryDirectory() as td:
235+
task_working_dir = os.path.join(td, '__task_working_dir__')
236+
os.mkdir(task_working_dir)
237+
task = task_type()
238+
239+
base_args = list(module.build_command_line(td))
240+
interactive_base = os.path.join(
241+
td, _INTERACTIVE_PIPE_FILE_BASE) if interactive else None
242+
cmdline = task.get_cmdline(clang_path, base_args, interactive_base,
243+
task_working_dir)
244+
245+
def _get_scores() -> dict[str, float]:
246+
return task.get_module_scores(task_working_dir)
247+
248+
writer_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.in')
249+
reader_name = os.path.join(td, _INTERACTIVE_PIPE_FILE_BASE + '.out')
250+
if interactive:
251+
os.mkfifo(reader_name, 0o666)
252+
os.mkfifo(writer_name, 0o666)
253+
with subprocess.Popen(
254+
cmdline, stderr=subprocess.PIPE, stdout=subprocess.PIPE) as proc:
255+
try:
256+
if interactive:
257+
with io.BufferedWriter(io.FileIO(writer_name, 'wb')) as writer_pipe:
258+
with io.BufferedReader(io.FileIO(reader_name, 'rb')) as reader_pipe:
259+
yield InteractiveClang(
260+
proc,
261+
_get_scores,
262+
module.name,
263+
reader_pipe,
264+
writer_pipe,
265+
)
266+
else:
267+
yield ClangProcess(
268+
proc,
269+
_get_scores,
270+
module.name,
271+
)
272+
273+
finally:
274+
proc.kill()
275+
276+
277+
def _get_clang_generator(
278+
clang_path: str,
279+
task_type: Type[MLGOTask],
280+
) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]],
281+
Optional[corpus.LoadedModuleSpec], None]:
282+
"""Returns a generator for creating InteractiveClang objects.
283+
284+
TODO: fix this docstring
285+
286+
Args:
287+
clang_path: Path to the clang binary to use within InteractiveClang.
288+
task_type: Type of the MLGO task to use.
289+
290+
Returns:
291+
The generator for InteractiveClang objects.
292+
"""
293+
while True:
294+
# The following line should be type-hinted as follows:
295+
# module: corpus.LoadedModuleSpec = yield
296+
# However, this triggers a yapf crash. See:
297+
# https://github.com/google/yapf/issues/1092
298+
module = yield
299+
with clang_session(
300+
clang_path, module, task_type, interactive=True) as iclang:
301+
with clang_session(
302+
clang_path, module, task_type, interactive=False) as clang:
303+
yield iclang, clang
304+
305+
306+
class MLGOEnvironmentBase:
307+
"""Base implementation for all MLGO environments.
308+
309+
Depending on the RL framework, one may want different implementations of an
310+
enviroment (tf_agents: PyEnvironment, jax: dm-env, etc). This class
311+
implements the core methods that are needed to then implement any of these
312+
other environments as well.
313+
"""
314+
315+
def __init__(
316+
self,
317+
*,
318+
clang_path: str,
319+
task_type: Type[MLGOTask],
320+
obs_spec,
321+
action_spec,
322+
):
323+
self._clang_generator = _get_clang_generator(clang_path, task_type)
324+
self._obs_spec = obs_spec
325+
self._action_spec = action_spec
326+
327+
self._iclang: Optional[InteractiveClang] = None
328+
self._clang: Optional[ClangProcess] = None
329+
330+
@property
331+
def obs_spec(self):
332+
return self._obs_spec
333+
334+
@property
335+
def action_spec(self):
336+
return self._action_spec
337+
338+
def observation(self):
339+
return self._last_obs
340+
341+
def _get_observation(self) -> OBS_T:
342+
self._last_obs = self._iclang.get_observation()
343+
if self._last_obs[STEP_TYPE_KEY] == 'last':
344+
self._last_obs[SCORE_POLICY_KEY] = self._iclang.get_scores()
345+
self._last_obs[SCORE_DEFAULT_KEY] = self._clang.get_scores()
346+
self._last_obs[REWARD_KEY] = compute_relative_rewards(
347+
self._last_obs[SCORE_POLICY_KEY], self._last_obs[SCORE_DEFAULT_KEY])
348+
return self.observation()
349+
350+
def reset(self, module: corpus.LoadedModuleSpec):
351+
# On the first call to reset(...), sending None starts the coroutine.
352+
# On subsequent calls, this resumes execution after
353+
# yielding the clang pair, which terminates the session pauses execution in
354+
# the coroutine where it awaits a module
355+
self._clang_generator.send(None)
356+
# pytype: disable=attribute-error
357+
self._iclang, self._clang = self._clang_generator.send(module)
358+
return self._get_observation()
359+
360+
def step(self, action: np.ndarray):
361+
self._iclang.send_action(action)
362+
return self._get_observation()

0 commit comments

Comments
 (0)