Skip to content

Commit f86feef

Browse files
Add support for copying auxiliary files
This patch adds support for copying aux files to the local machine like profiles. These need similar handling to remote corpora but also need to go through the flag replacement process. This allows the user to set in the corpus replacement flags a generic name surrounded by brackets like {profile} and then have the worker fill in the replacement with the actual file path. Reviewers: mtrofin Reviewed By: mtrofin Pull Request: #469
1 parent f943179 commit f86feef

File tree

2 files changed

+98
-15
lines changed

2 files changed

+98
-15
lines changed

compiler_opt/es/regalloc_trace/regalloc_trace_worker.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,11 @@ def _setup_base_policy(self):
6060
saver.save(self._tf_base_temp_dir)
6161
self._tf_base_policy_path = os.path.join(self._tf_base_temp_dir, "policy")
6262

63-
def _copy_corpus(self, corpus_path: str,
64-
copy_corpus_locally_path: str | None) -> None:
63+
# TODO(issues/471): aux_file_replacement_flags should be refactored out of
64+
# regalloc_trace_worker as it will need to be used in other places
65+
# eventually.
66+
def _copy_corpus(self, corpus_path: str, copy_corpus_locally_path: str | None,
67+
aux_file_replacement_flags: dict[str, str]) -> None:
6568
"""Makes a local copy of the corpus if requested.
6669
6770
This function makes a local copy of the corpus by copying the remote
@@ -70,6 +73,8 @@ def _copy_corpus(self, corpus_path: str,
7073
Args:
7174
corpus_path: The path to the remote corpus.
7275
copy_corpus_locally: The local path to copy the corpus to.
76+
aux_file_replacement_flags: Additional files to copy over that are
77+
passed in through flags, like profiles.
7378
"""
7479
# We use the tensorflow APIs below rather than the standard Python file
7580
# APIs for compatibility with more filesystems.
@@ -97,18 +102,30 @@ def _copy_corpus(self, corpus_path: str,
97102
copy_thread_pool.submit(_make_dirs_and_copy, current_path,
98103
new_path))
99104

105+
if aux_file_replacement_flags is not None:
106+
for flag_name in aux_file_replacement_flags:
107+
aux_replacement_file = aux_file_replacement_flags[flag_name]
108+
new_path = os.path.join(copy_corpus_locally_path,
109+
os.path.basename(aux_replacement_file))
110+
copy_futures.append(
111+
copy_thread_pool.submit(_make_dirs_and_copy, aux_replacement_file,
112+
new_path))
113+
100114
for copy_future in copy_futures:
101115
if copy_future.exception() is not None:
102116
raise copy_future.exception()
103117

104-
def __init__(self,
105-
*,
106-
gin_config: str,
107-
clang_path: str,
108-
basic_block_trace_model_path: str,
109-
thread_count: int,
110-
corpus_path: str,
111-
copy_corpus_locally_path: str | None = None):
118+
def __init__(
119+
self,
120+
*,
121+
gin_config: str,
122+
clang_path: str,
123+
basic_block_trace_model_path: str,
124+
thread_count: int,
125+
corpus_path: str,
126+
copy_corpus_locally_path: str | None = None,
127+
aux_file_replacement_flags: dict[str, str] | None = None,
128+
):
112129
"""Initializes the RegallocTraceWorker class.
113130
114131
Args:
@@ -124,17 +141,39 @@ def __init__(self,
124141
copy_corpus_locally_path: If set, specifies the path that the corpus
125142
should be copied to before utilizing the modules for evaluation.
126143
Setting this to None signifies that no copying is desired.
144+
aux_file_replacement_flags: A dictionary mapping sentinel values intended
145+
to be set using the corpus replace_flags feature to actual file paths
146+
local to the worker. This is intended to be used in distributed
147+
training setups where training corpora and auxiliary files need to be
148+
copied locally before being compiled.
127149
"""
128150
self._clang_path = clang_path
129151
self._basic_block_trace_model_path = basic_block_trace_model_path
130152
self._thread_count = thread_count
153+
131154
self._has_local_corpus = False
132155
self._corpus_path = corpus_path
133156
if copy_corpus_locally_path is not None:
134-
self._copy_corpus(corpus_path, copy_corpus_locally_path)
157+
self._copy_corpus(corpus_path, copy_corpus_locally_path,
158+
aux_file_replacement_flags)
135159
self._corpus_path = copy_corpus_locally_path
136160
self._has_local_corpus = True
137161

162+
if (copy_corpus_locally_path is None and
163+
aux_file_replacement_flags is not None):
164+
raise ValueError(
165+
"additional_replacement_flags is incompatible with fully local "
166+
"corpus setups. Please directly replace the flag with the correct "
167+
"value.")
168+
self._aux_file_replacement_flags = aux_file_replacement_flags
169+
self._aux_file_replacement_context = {}
170+
if aux_file_replacement_flags is not None:
171+
for flag_name in self._aux_file_replacement_flags:
172+
self._aux_file_replacement_context[flag_name] = os.path.join(
173+
self._corpus_path,
174+
os.path.basename(self._aux_file_replacement_flags[flag_name]),
175+
)
176+
138177
gin.parse_config(gin_config)
139178
self._setup_base_policy()
140179

@@ -156,7 +195,7 @@ def _compile_module(self, module_to_compile: corpus.ModuleSpec,
156195
# using ThinLTO, we will just never end up replacing anything.
157196
os.path.join(self._corpus_path, module_to_compile.name) + ".thinlto.bc")
158197
command_vector.extend([
159-
option.format(context=context)
198+
option.format(context=context, **self._aux_file_replacement_context)
160199
for option in module_to_compile.command_line
161200
])
162201

compiler_opt/es/regalloc_trace/regalloc_trace_worker_test.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@
2828

2929

3030
def _setup_corpus(corpus_dir: str,
31-
has_thinlto: bool = False) -> list[corpus.ModuleSpec]:
31+
has_thinlto: bool = False,
32+
cli_flags: tuple = ()) -> list[corpus.ModuleSpec]:
3233
modules = [
33-
corpus.ModuleSpec("module_a.o", 1, ("-fmodule-a",), True),
34-
corpus.ModuleSpec("module_b.o", 1, ("-fmodule-b",), True)
34+
corpus.ModuleSpec("module_a.o", 1, ("-fmodule-a", *cli_flags), True),
35+
corpus.ModuleSpec("module_b.o", 1, ("-fmodule-b", *cli_flags), True)
3536
]
3637

3738
corpus_description = {
@@ -208,3 +209,46 @@ def test_copy_corpus_locally_thinlto(self):
208209
os.path.exists(os.path.join(corpus_copy_dir, "module_a.o.thinlto.bc")))
209210
self.assertTrue(
210211
os.path.exists(os.path.join(corpus_copy_dir, "module_b.o.thinlto.bc")))
212+
213+
def test_remote_corpus_replacement_flags(self):
214+
corpus_copy_base_dir = self.create_tempdir("corpus_copy")
215+
corpus_copy_dir = os.path.join(corpus_copy_base_dir.full_path,
216+
"corpus_copy")
217+
corpus_dir = self.create_tempdir("corpus")
218+
profile_path = os.path.join(corpus_dir, "profile.prof")
219+
Path(profile_path).touch()
220+
corpus_modules = _setup_corpus(corpus_dir.full_path, False,
221+
("-fprofile-instr-use={prof}",))
222+
223+
fake_clang_binary = self.create_tempfile("fake_clang")
224+
fake_clang_invocations = self.create_tempfile("fake_clang_invocations")
225+
_create_test_binary(fake_clang_binary.full_path,
226+
fake_clang_invocations.full_path)
227+
fake_bb_trace_model_binary = self.create_tempfile(
228+
"fake_basic_block_trace_model")
229+
fake_bb_trace_model_invocations = self.create_tempfile(
230+
"fake_basic_block_trace_model_invocations")
231+
_create_test_binary(fake_bb_trace_model_binary.full_path,
232+
fake_bb_trace_model_invocations.full_path)
233+
234+
worker = regalloc_trace_worker.RegallocTraceWorker(
235+
gin_config="",
236+
clang_path=fake_clang_binary.full_path,
237+
basic_block_trace_model_path=fake_bb_trace_model_binary.full_path,
238+
thread_count=1,
239+
corpus_path=corpus_dir.full_path,
240+
copy_corpus_locally_path=corpus_copy_dir,
241+
aux_file_replacement_flags={"prof": profile_path})
242+
243+
copied_profile_path = os.path.join(corpus_copy_dir, "profile.prof")
244+
self.assertTrue(os.path.exists(copied_profile_path))
245+
_ = worker.compile_corpus_and_evaluate(corpus_modules,
246+
"function_index_path.pb",
247+
"bb_trace_path.pb", None)
248+
clang_command_lines = fake_clang_invocations.read_text().split("\n")
249+
clang_command_lines.remove("")
250+
self.assertLen(clang_command_lines, 2)
251+
self.assertTrue(
252+
f"-fprofile-instr-use={copied_profile_path}" in clang_command_lines[0])
253+
self.assertTrue(
254+
f"-fprofile-instr-use={copied_profile_path}" in clang_command_lines[1])

0 commit comments

Comments
 (0)