Skip to content

Commit fc26cfc

Browse files
authored
Explicitly model 'replace' flags (#139)
* Explicitly model 'replace' flags Using 'delete' and 'add' for flags that must actually be present - such as -fthinlto-index, or, later, profiles - is a bug farm. This patch explicitly models such flags as something that cannot be 'deleted' or 'added', and must be somehow converted (contextualized to the location of the files(presumably) used as values of those flags). The conversion rule is a formatting string which uses a 'context' value, which will have fields populate at the time of the conversion. Some tests had to have the order of some flags adjusted a bit as a NFC side-effect. * Rebase and fix
1 parent bff702d commit fc26cfc

File tree

2 files changed

+136
-26
lines changed

2 files changed

+136
-26
lines changed

compiler_opt/rl/corpus.py

Lines changed: 73 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
from absl import logging
2121
from dataclasses import dataclass
22-
from typing import Iterable, List, Dict, Tuple, Any
22+
from typing import Any, Dict, Iterable, List, Optional, Tuple
2323

2424
import json
2525
import os
@@ -82,16 +82,36 @@ def __call__(self,
8282
class Corpus:
8383
"""Represents a corpus. Comes along with some utility functions."""
8484

85+
@dataclass(frozen=True)
86+
class ReplaceContext:
87+
"""Context for 'replace' rules."""
88+
full_path_prefix: str
89+
8590
def __init__(self,
8691
data_path: str,
8792
additional_flags: Tuple[str, ...] = (),
88-
delete_flags: Tuple[str, ...] = ()):
93+
delete_flags: Tuple[str, ...] = (),
94+
replace_flags: Optional[Dict[str, str]] = None):
95+
"""
96+
Args:
97+
data_path: corpus directory.
98+
additional_flags: list of flags to append to the command line
99+
delete_flags: list of flags to remove (both `-flag=<value` and
100+
`-flag <value>` are supported).
101+
replace_flags: list of flags to be replaced. The key in the dictionary
102+
is the flag. The value is a string that will be `format`-ed with a
103+
`context` object - see `ReplaceContext`.
104+
We verify that flags in replace_flags are present, and do not appear
105+
in the additional_flags nor delete_flags.
106+
Thinlto index is handled this way, too.
107+
"""
89108
self._module_specs = tuple(
90109
sorted(
91110
_build_modulespecs_from_datapath(
92111
data_path=data_path,
93112
additional_flags=additional_flags,
94-
delete_flags=delete_flags),
113+
delete_flags=delete_flags,
114+
replace_flags=replace_flags),
95115
key=lambda m: m.size,
96116
reverse=True))
97117
self._root_dir = data_path
@@ -137,8 +157,8 @@ def __len__(self):
137157
def _build_modulespecs_from_datapath(
138158
data_path: str,
139159
additional_flags: Tuple[str, ...] = (),
140-
delete_flags: Tuple[str, ...] = ()
141-
) -> List[ModuleSpec]:
160+
delete_flags: Tuple[str, ...] = (),
161+
replace_flags: Optional[Dict[str, str]] = None) -> List[ModuleSpec]:
142162
# TODO: (b/233935329) Per-corpus *fdo profile paths can be read into
143163
# {additional|delete}_flags here
144164
with open(
@@ -174,6 +194,7 @@ def _build_modulespecs_from_datapath(
174194
has_thinlto=has_thinlto,
175195
additional_flags=additional_flags,
176196
delete_flags=delete_flags,
197+
replace_flags=replace_flags,
177198
cmd_override=cmd_override)
178199
size = os.path.getsize(full_module_path + '.bc')
179200
module_specs.append(
@@ -187,7 +208,8 @@ def _load_and_parse_command(
187208
has_thinlto: bool,
188209
additional_flags: Tuple[str, ...] = (),
189210
delete_flags: Tuple[str, ...] = (),
190-
cmd_override: Tuple[str, ...] = ()
211+
replace_flags: Optional[Dict[str, str]] = None,
212+
cmd_override: Tuple[str, ...] = (),
191213
) -> List[str]:
192214
"""Cleans up base command line.
193215
@@ -211,23 +233,59 @@ def _load_and_parse_command(
211233
else:
212234
with open(module_path + '.cmd', encoding='utf-8') as f:
213235
option_iterator = iter(f.read().split('\0'))
236+
237+
context = Corpus.ReplaceContext(full_path_prefix=module_path)
238+
replace_flags = replace_flags.copy() if replace_flags else {}
239+
if has_thinlto:
240+
additional_flags = ('-mllvm', '-thinlto-assume-merged') + additional_flags
241+
if cmd_override:
242+
additional_flags = (
243+
f'-fthinlto-index={context.full_path_prefix}.thinlto.bc',
244+
) + additional_flags
245+
else:
246+
fthinlto_index = '-fthinlto-index'
247+
if fthinlto_index in replace_flags:
248+
raise ValueError(
249+
'-fthinlto-index must be handled by the infrastructure')
250+
replace_flags[fthinlto_index] = '{context.full_path_prefix}.thinlto.bc'
251+
252+
additional_flags = ('-x', 'ir', module_path + '.bc') + additional_flags
253+
254+
matched_replace_flags = set()
255+
# don't user add/remove for replace
256+
if set(additional_flags).intersection(
257+
set(replace_flags)) or set(delete_flags).intersection(set(replace_flags)):
258+
raise ValueError('do not use add/delete flags to replace')
259+
214260
option = next(option_iterator, None)
215261

216262
while option is not None:
217263
if any(option.startswith(flag) for flag in delete_flags):
218264
if '=' not in option:
219265
next(option_iterator, None)
220266
else:
221-
cmdline.append(option)
222-
option = next(option_iterator, None)
223-
cmdline.extend(['-x', 'ir', module_path + '.bc'])
224-
225-
if has_thinlto:
226-
cmdline.extend([
227-
f'-fthinlto-index={module_path}.thinlto.bc', '-mllvm',
228-
'-thinlto-assume-merged'
229-
])
267+
matching_replace = [
268+
flag for flag in replace_flags if option.startswith(flag)
269+
]
270+
if not matching_replace:
271+
cmdline.append(option)
272+
else:
273+
assert len(matching_replace) == 1
274+
flag = matching_replace[0]
275+
if flag in matched_replace_flags:
276+
raise ValueError(f'{flag} was matched twice')
277+
matched_replace_flags.add(flag)
278+
279+
if '=' not in option:
280+
next(option_iterator, None)
281+
cmdline.extend([option, replace_flags[flag].format(context=context)])
282+
else:
283+
cmdline.append(flag + '=' +
284+
replace_flags[flag].format(context=context))
230285

286+
option = next(option_iterator, None)
287+
if len(matched_replace_flags) != len(replace_flags):
288+
raise ValueError('flags that were expected to be replaced were not found')
231289
cmdline.extend(additional_flags)
232290

233291
# The options read from a .cmd file must be run with -cc1

compiler_opt/rl/corpus_test.py

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ def test_thinlto_file(self):
3333
corpus._load_and_parse_command(
3434
module_path=module_path, has_thinlto=False),
3535
['-cc1', '-foo', '-bar=baz', '-x', 'ir', module_path + '.bc'])
36+
data = ['-cc1', '-foo', '-bar=baz', '-fthinlto-index=some/path/here']
37+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
3638
self.assertEqual(
3739
corpus._load_and_parse_command(
3840
module_path=module_path, has_thinlto=True), [
39-
'-cc1', '-foo', '-bar=baz', '-x', 'ir', module_path + '.bc',
40-
'-fthinlto-index=' + module_path + '.thinlto.bc', '-mllvm',
41-
'-thinlto-assume-merged'
41+
'-cc1', '-foo', '-bar=baz',
42+
'-fthinlto-index=' + module_path + '.thinlto.bc', '-x', 'ir',
43+
module_path + '.bc', '-mllvm', '-thinlto-assume-merged'
4244
])
4345

4446
def test_deletion(self):
@@ -70,6 +72,54 @@ def test_addition(self):
7072
additional_flags=additional_flags),
7173
['-cc1', '-x', 'ir', module_path + '.bc', '-fix-all-bugs'])
7274

75+
def test_replacement(self):
76+
replace_flags = {'-replace-me': '{context.full_path_prefix}.replaced'}
77+
data = ['-cc1']
78+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
79+
module_path = argfile.full_path[:-4]
80+
# if we expect to be able to replace a flag, and it's not in the original
81+
# cmdline, raise.
82+
self.assertRaises(
83+
ValueError,
84+
corpus._load_and_parse_command,
85+
module_path=module_path,
86+
has_thinlto=False,
87+
replace_flags=replace_flags)
88+
data = ['-cc1', '-replace-me=some_value']
89+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
90+
# the flag can be replaced in non-thinlto cases; and in the case of thinlto
91+
# it does not interfere with the thinlto one
92+
self.assertEqual(
93+
corpus._load_and_parse_command(
94+
module_path=module_path,
95+
has_thinlto=False,
96+
replace_flags=replace_flags), [
97+
'-cc1', '-replace-me=' + module_path + '.replaced', '-x', 'ir',
98+
module_path + '.bc'
99+
])
100+
# variant without '='
101+
data = ['-cc1', '-replace-me', 'some_value']
102+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
103+
self.assertEqual(
104+
corpus._load_and_parse_command(
105+
module_path=module_path,
106+
has_thinlto=False,
107+
replace_flags=replace_flags), [
108+
'-cc1', '-replace-me', module_path + '.replaced', '-x', 'ir',
109+
module_path + '.bc'
110+
])
111+
data = ['-cc1', '-replace-me', 'some_value', '-fthinlto-index=blah']
112+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
113+
self.assertEqual(
114+
corpus._load_and_parse_command(
115+
module_path=module_path,
116+
has_thinlto=True,
117+
replace_flags=replace_flags), [
118+
'-cc1', '-replace-me', module_path + '.replaced',
119+
'-fthinlto-index=' + module_path + '.thinlto.bc', '-x', 'ir',
120+
module_path + '.bc', '-mllvm', '-thinlto-assume-merged'
121+
])
122+
73123
def test_modification(self):
74124
delete_compilation_flags = ('-split-dwarf-file', '-split-dwarf-output',
75125
'-fthinlto-index', '-fprofile-sample-use',
@@ -166,23 +216,27 @@ def test_get_with_thinlto(self):
166216
tempdir.create_file(
167217
'2.cmd', content='\0'.join(['-cc1', '-fthinlto-index=abc']))
168218

169-
ms_list = corpus._build_modulespecs_from_datapath(
219+
self.assertRaises(
220+
ValueError,
221+
corpus._build_modulespecs_from_datapath,
170222
tempdir.full_path,
171223
additional_flags=('-add',),
172224
delete_flags=('-fthinlto-index',))
225+
ms_list = corpus._build_modulespecs_from_datapath(
226+
tempdir.full_path, additional_flags=('-add',))
173227
self.assertEqual(len(ms_list), 2)
174228
ms1 = ms_list[0]
175229
ms2 = ms_list[1]
176230
self.assertEqual(ms1.name, '1')
177231
self.assertEqual(ms1.exec_cmd,
178-
('-cc1', '-x', 'ir', tempdir.full_path + '/1.bc',
179-
'-fthinlto-index=' + tempdir.full_path + '/1.thinlto.bc',
232+
('-cc1', '-fthinlto-index=' + tempdir.full_path +
233+
'/1.thinlto.bc', '-x', 'ir', tempdir.full_path + '/1.bc',
180234
'-mllvm', '-thinlto-assume-merged', '-add'))
181235

182236
self.assertEqual(ms2.name, '2')
183237
self.assertEqual(ms2.exec_cmd,
184-
('-cc1', '-x', 'ir', tempdir.full_path + '/2.bc',
185-
'-fthinlto-index=' + tempdir.full_path + '/2.thinlto.bc',
238+
('-cc1', '-fthinlto-index=' + tempdir.full_path +
239+
'/2.thinlto.bc', '-x', 'ir', tempdir.full_path + '/2.bc',
186240
'-mllvm', '-thinlto-assume-merged', '-add'))
187241

188242
def test_get_with_override(self):
@@ -203,9 +257,7 @@ def test_get_with_override(self):
203257
tempdir.create_file('2.cmd', content='\0'.join(['-fthinlto-index=abc']))
204258

205259
ms_list = corpus._build_modulespecs_from_datapath(
206-
tempdir.full_path,
207-
additional_flags=('-add',),
208-
delete_flags=('-fthinlto-index',))
260+
tempdir.full_path, additional_flags=('-add',))
209261
self.assertEqual(len(ms_list), 2)
210262
ms1 = ms_list[0]
211263
ms2 = ms_list[1]

0 commit comments

Comments
 (0)