Skip to content

Commit 8ff4f65

Browse files
authored
Move corpus related functions to corpus.py (google#63)
1 parent ac094ca commit 8ff4f65

10 files changed

+301
-142
lines changed

compiler_opt/rl/compilation_runner.py

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -75,47 +75,6 @@ def _overwrite_trajectory_reward(sequence_example: tf.train.SequenceExample,
7575
return sequence_example
7676

7777

78-
def get_command_line_for_bundle(
79-
cmd_file: str,
80-
ir_file: str,
81-
thinlto: Optional[str] = None,
82-
additional_flags: Tuple[str, ...] = (),
83-
delete_flags: Tuple[str, ...] = ()
84-
) -> List[str]:
85-
"""Cleans up base command line.
86-
87-
Remove certain unnecessary flags, and add the .bc file to compile and, if
88-
given, the thinlto index.
89-
90-
Args:
91-
cmd_file: Path to a .cmd file (from corpus).
92-
ir_file: The path to the ir file to compile.
93-
thinlto: The path to the thinlto index, or None.
94-
additional_flags: Tuple of clang flags to add.
95-
delete_flags: Tuple of clang flags to remove.
96-
97-
Returns:
98-
The argument list to pass to the compiler process.
99-
"""
100-
cmdline = []
101-
102-
with open(cmd_file, encoding='utf-8') as f:
103-
option_iterator = iter(f.read().split('\0'))
104-
option = next(option_iterator, None)
105-
while option:
106-
if any(option.startswith(flag) for flag in delete_flags):
107-
if '=' not in option:
108-
next(option_iterator, None)
109-
else:
110-
cmdline.append(option)
111-
option = next(option_iterator, None)
112-
cmdline.extend(['-x', 'ir', ir_file])
113-
if thinlto:
114-
cmdline.append('-fthinlto-index=' + thinlto)
115-
cmdline.extend(additional_flags)
116-
return cmdline
117-
118-
11978
class ProcessKilledError(Exception):
12079

12180
def __init__(self):
@@ -286,23 +245,17 @@ def is_priority_method(cls, method_name: str) -> bool:
286245
def __init__(self,
287246
clang_path: Optional[str] = None,
288247
launcher_path: Optional[str] = None,
289-
moving_average_decay_rate: float = 1,
290-
additional_flags: Tuple[str, ...] = (),
291-
delete_flags: Tuple[str, ...] = ()):
248+
moving_average_decay_rate: float = 1):
292249
"""Initialization of CompilationRunner class.
293250
294251
Args:
295252
clang_path: path to the clang binary.
296253
launcher_path: path to the launcher binary.
297254
moving_average_decay_rate: moving average decay rate during training.
298-
additional_flags: tuple of clang flags to add.
299-
delete_flags: tuple of clang flags to remove.
300255
"""
301256
self._clang_path = clang_path
302257
self._launcher_path = launcher_path
303258
self._moving_average_decay_rate = moving_average_decay_rate
304-
self._additional_flags = additional_flags
305-
self._delete_flags = delete_flags
306259
self._compilation_timeout = _COMPILATION_TIMEOUT.value
307260
self._cancellation_manager = WorkerCancellationManager()
308261

compiler_opt/rl/compilation_runner_test.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -212,43 +212,6 @@ def test_exception_handling(self, mock_compile_fn):
212212
reward_stat=None)
213213
self.assertEqual(1, mock_compile_fn.call_count)
214214

215-
def test_command_line_file(self):
216-
data = ['-cc1', '-foo', '-bar=baz']
217-
argfile = self.create_tempfile(content='\0'.join(data))
218-
self.assertEqual(
219-
compilation_runner.get_command_line_for_bundle(argfile.full_path,
220-
'my_file.bc'),
221-
['-cc1', '-foo', '-bar=baz', '-x', 'ir', 'my_file.bc'])
222-
self.assertEqual(
223-
compilation_runner.get_command_line_for_bundle(argfile.full_path,
224-
'my_file.bc',
225-
'the_index.bc'),
226-
[
227-
'-cc1', '-foo', '-bar=baz', '-x', 'ir', 'my_file.bc',
228-
'-fthinlto-index=the_index.bc'
229-
])
230-
231-
def test_command_line_correction(self):
232-
delete_compilation_flags = ('-split-dwarf-file', '-split-dwarf-output',
233-
'-fthinlto-index', '-fprofile-sample-use',
234-
'-fprofile-remapping-file')
235-
data = [
236-
'-cc1', '-fthinlto-index=bad', '-split-dwarf-file', '/tmp/foo.dwo',
237-
'-split-dwarf-output', 'somepath/some.dwo'
238-
]
239-
argfile = self.create_tempfile(content='\0'.join(data))
240-
self.assertEqual(
241-
compilation_runner.get_command_line_for_bundle(
242-
argfile.full_path, 'hi.bc', delete_flags=delete_compilation_flags),
243-
['-cc1', '-x', 'ir', 'hi.bc'])
244-
self.assertEqual(
245-
compilation_runner.get_command_line_for_bundle(
246-
argfile.full_path,
247-
'hi.bc',
248-
'index.bc',
249-
delete_flags=delete_compilation_flags),
250-
['-cc1', '-x', 'ir', 'hi.bc', '-fthinlto-index=index.bc'])
251-
252215
def test_start_subprocess_output(self):
253216
ct = compilation_runner.WorkerCancellationManager()
254217
output = compilation_runner.start_cancellable_process(

compiler_opt/rl/corpus.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,97 @@
1515
"""ModuleSpec definition and utility command line parsing functions."""
1616

1717
from dataclasses import dataclass
18+
from typing import Iterable, Optional, Tuple, List
19+
20+
import os
21+
import tensorflow as tf
1822

1923

2024
@dataclass(frozen=True)
2125
class ModuleSpec:
2226
"""Dataclass describing an input module and its compilation command options.
2327
"""
2428
name: str
25-
has_thinlto: bool = False
29+
exec_cmd: Tuple[str, ...] = ()
30+
31+
32+
def build_modulespecs_from_datapath(
33+
data_path: str,
34+
additional_flags: Tuple[str, ...] = (),
35+
delete_flags: Tuple[str, ...] = ()
36+
) -> List[ModuleSpec]:
37+
module_paths: List[str] = _load_module_paths(data_path)
38+
39+
has_thinlto: bool = _has_thinlto_index(module_paths)
40+
41+
module_specs: List[ModuleSpec] = []
42+
43+
# This takes ~7s for 30k modules
44+
for module_path in module_paths:
45+
exec_cmd = _load_and_parse_command(
46+
ir_file=module_path + '.bc',
47+
cmd_file=(module_path + '.cmd'),
48+
thinlto_file=(module_path + '.thinlto.bc') if has_thinlto else None,
49+
additional_flags=additional_flags,
50+
delete_flags=delete_flags)
51+
module_specs.append(ModuleSpec(name=module_path, exec_cmd=tuple(exec_cmd)))
52+
53+
return module_specs
54+
55+
56+
def _has_thinlto_index(module_paths: Iterable[str]) -> bool:
57+
return tf.io.gfile.exists(next(iter(module_paths)) + '.thinlto.bc')
58+
59+
60+
def _load_module_paths(data_path) -> List[str]:
61+
module_paths_path = os.path.join(data_path, 'module_paths')
62+
with open(module_paths_path, 'r', encoding='utf-8') as f:
63+
ret = [os.path.join(data_path, name.rstrip('\n')) for name in f]
64+
if len(ret) == 0:
65+
raise ValueError(f'{module_paths_path} is empty.')
66+
return ret
67+
68+
69+
def _load_and_parse_command(
70+
ir_file: str,
71+
cmd_file: str,
72+
thinlto_file: Optional[str] = None,
73+
additional_flags: Tuple[str, ...] = (),
74+
delete_flags: Tuple[str, ...] = ()
75+
) -> List[str]:
76+
"""Cleans up base command line.
77+
78+
Remove certain unnecessary flags, and add the .bc file to compile and, if
79+
given, the thinlto index.
80+
81+
Args:
82+
cmd_file: Path to a .cmd file (from corpus).
83+
ir_file: The path to the ir file to compile.
84+
thinlto_file: The path to the thinlto index, or None.
85+
additional_flags: Tuple of clang flags to add.
86+
delete_flags: Tuple of clang flags to remove.
87+
88+
Returns:
89+
The argument list to pass to the compiler process.
90+
"""
91+
cmdline = []
92+
93+
with open(cmd_file, encoding='utf-8') as f:
94+
option_iterator = iter(f.read().split('\0'))
95+
option = next(option_iterator, None)
96+
while option is not None:
97+
if any(option.startswith(flag) for flag in delete_flags):
98+
if '=' not in option:
99+
next(option_iterator, None)
100+
else:
101+
cmdline.append(option)
102+
option = next(option_iterator, None)
103+
cmdline.extend(['-x', 'ir', ir_file])
104+
105+
if thinlto_file:
106+
cmdline.extend(
107+
[f'-fthinlto-index={thinlto_file}', '-mllvm', '-thinlto-assume-merged'])
108+
109+
cmdline.extend(additional_flags)
110+
111+
return cmdline

0 commit comments

Comments
 (0)