Skip to content

Commit fca414f

Browse files
authored
Replace module_paths with corpus_description.json (#65)
* Replace module_paths with metadata.json - This adds 'global_command_override' functionality for lld-thinlto corpora, exposed in metadata.json - Setting the above in the metadata.json file will default the runners to using that as the base command, rather than looking for a .cmd file * Rename to corpus_description.json * Fix pytype error * Make UNSPECIFIED a module constant * Move UNSPECIFIED to constant.py. Resolved comments
1 parent 6332087 commit fca414f

File tree

6 files changed

+180
-120
lines changed

6 files changed

+180
-120
lines changed

compiler_opt/rl/constant.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
# Delta to add when computing reward.
2424
DELTA = 0.01
2525

26+
# Default of global_command_override in corpus_description.json
27+
UNSPECIFIED_OVERRIDE = ['<UNSPECIFIED>']
28+
2629

2730
@gin.constants_from_enum
2831
class AgentName(enum.Enum):

compiler_opt/rl/corpus.py

Lines changed: 60 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@
1414
# limitations under the License.
1515
"""ModuleSpec definition and utility command line parsing functions."""
1616

17+
from absl import logging
1718
from dataclasses import dataclass
18-
from typing import Iterable, Optional, Tuple, List
19+
from typing import List, Dict, Tuple, Any
1920

21+
import json
2022
import os
21-
import tensorflow as tf
23+
24+
from compiler_opt.rl import constant
2225

2326

2427
@dataclass(frozen=True)
@@ -34,77 +37,91 @@ def build_modulespecs_from_datapath(
3437
additional_flags: Tuple[str, ...] = (),
3538
delete_flags: Tuple[str, ...] = ()
3639
) -> List[ModuleSpec]:
37-
module_paths: List[str] = _load_module_paths(data_path)
38-
39-
has_thinlto: bool = _has_thinlto_index(module_paths)
40+
# TODO: (b/233935329) Per-corpus *fdo profile paths can be read into
41+
# {additional|delete}_flags here
42+
with open(
43+
os.path.join(data_path, 'corpus_description.json'), 'r',
44+
encoding='utf-8') as f:
45+
corpus_description: Dict[str, Any] = json.load(f)
46+
47+
module_paths = corpus_description['modules']
48+
if len(module_paths) == 0:
49+
raise ValueError(f'{data_path}\'s corpus_description contains no modules.')
50+
51+
has_thinlto: bool = corpus_description['has_thinlto']
52+
53+
cmd_override = ()
54+
if 'global_command_override' in corpus_description:
55+
if corpus_description[
56+
'global_command_override'] == constant.UNSPECIFIED_OVERRIDE:
57+
raise ValueError(
58+
'global_command_override in corpus_description.json not filled.')
59+
cmd_override = tuple(corpus_description['global_command_override'])
60+
if len(additional_flags) > 0:
61+
logging.warning('Additional flags are specified together with override.')
62+
if len(delete_flags) > 0:
63+
logging.warning('Delete flags are specified together with override.')
4064

4165
module_specs: List[ModuleSpec] = []
4266

4367
# This takes ~7s for 30k modules
4468
for module_path in module_paths:
4569
exec_cmd = _load_and_parse_command(
46-
ir_file=module_path + '.bc',
47-
cmd_file=(module_path + '.cmd'),
48-
thinlto_file=(module_path + '.thinlto.bc') if has_thinlto else None,
70+
module_path=os.path.join(data_path, module_path),
71+
has_thinlto=has_thinlto,
4972
additional_flags=additional_flags,
50-
delete_flags=delete_flags)
73+
delete_flags=delete_flags,
74+
cmd_override=cmd_override)
5175
module_specs.append(ModuleSpec(name=module_path, exec_cmd=tuple(exec_cmd)))
5276

5377
return module_specs
5478

5579

56-
def _has_thinlto_index(module_paths: Iterable[str]) -> bool:
57-
return tf.io.gfile.exists(next(iter(module_paths)) + '.thinlto.bc')
58-
59-
60-
def _load_module_paths(data_path) -> List[str]:
61-
module_paths_path = os.path.join(data_path, 'module_paths')
62-
with open(module_paths_path, 'r', encoding='utf-8') as f:
63-
ret = [os.path.join(data_path, name.rstrip('\n')) for name in f]
64-
if len(ret) == 0:
65-
raise ValueError(f'{module_paths_path} is empty.')
66-
return ret
67-
68-
6980
def _load_and_parse_command(
70-
ir_file: str,
71-
cmd_file: str,
72-
thinlto_file: Optional[str] = None,
81+
module_path: str,
82+
has_thinlto: bool,
7383
additional_flags: Tuple[str, ...] = (),
74-
delete_flags: Tuple[str, ...] = ()
84+
delete_flags: Tuple[str, ...] = (),
85+
cmd_override: Tuple[str, ...] = ()
7586
) -> List[str]:
7687
"""Cleans up base command line.
7788
7889
Remove certain unnecessary flags, and add the .bc file to compile and, if
7990
given, the thinlto index.
8091
8192
Args:
82-
cmd_file: Path to a .cmd file (from corpus).
83-
ir_file: The path to the ir file to compile.
84-
thinlto_file: The path to the thinlto index, or None.
93+
module_path: Absolute path to the module without extension (from corpus).
94+
has_thinlto: Whether to add thinlto flags.
8595
additional_flags: Tuple of clang flags to add.
8696
delete_flags: Tuple of clang flags to remove.
97+
cmd_override: Tuple of strings to use as the base command line.
8798
8899
Returns:
89100
The argument list to pass to the compiler process.
90101
"""
91102
cmdline = []
92103

93-
with open(cmd_file, encoding='utf-8') as f:
94-
option_iterator = iter(f.read().split('\0'))
104+
if cmd_override:
105+
option_iterator = iter(cmd_override)
106+
else:
107+
with open(module_path + '.cmd', encoding='utf-8') as f:
108+
option_iterator = iter(f.read().split('\0'))
109+
option = next(option_iterator, None)
110+
111+
while option is not None:
112+
if any(option.startswith(flag) for flag in delete_flags):
113+
if '=' not in option:
114+
next(option_iterator, None)
115+
else:
116+
cmdline.append(option)
95117
option = next(option_iterator, None)
96-
while option is not None:
97-
if any(option.startswith(flag) for flag in delete_flags):
98-
if '=' not in option:
99-
next(option_iterator, None)
100-
else:
101-
cmdline.append(option)
102-
option = next(option_iterator, None)
103-
cmdline.extend(['-x', 'ir', ir_file])
104-
105-
if thinlto_file:
106-
cmdline.extend(
107-
[f'-fthinlto-index={thinlto_file}', '-mllvm', '-thinlto-assume-merged'])
118+
cmdline.extend(['-x', 'ir', module_path + '.bc'])
119+
120+
if has_thinlto:
121+
cmdline.extend([
122+
f'-fthinlto-index={module_path}.thinlto.bc', '-mllvm',
123+
'-thinlto-assume-merged'
124+
])
108125

109126
cmdline.extend(additional_flags)
110127

0 commit comments

Comments
 (0)