Skip to content

Commit 53255a4

Browse files
authored
Minor fixes (#67)
* Minor fixes - the '-cc1' flag is required for commands extracted from .cmd files- Sometimes it's not in the .cmd file (Not sure how I managed it, but it happened) - disabled yapf formatting on data_reader's dataset builder - gracefully handle when a compile_command doesn't have the -o option
1 parent d5f076d commit 53255a4

File tree

7 files changed

+70
-30
lines changed

7 files changed

+70
-30
lines changed

check-license.sh

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/bin/bash
12
# Copyright 2020 Google LLC
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,27 +13,33 @@
1213
# See the License for the specific language governing permissions and
1314
# limitations under the License.
1415

15-
#!/bin/bash
1616

1717
exitcode=0
18-
for fname in `find . -name "*.py"`;
19-
do
20-
diff -q <(head -14 $fname) <(cat license-header.txt) > /dev/null
21-
if [ $? -ne 0 ]
18+
19+
check_license () {
20+
HEADER=$(head -1 $1)
21+
if [[ $HEADER == \#!* ]] # Ignore shebang line
22+
then
23+
HEADER=$(head -$3 $1 | sed 1d)
24+
else
25+
HEADER=$(head -$(($3 - 1)) $fname)
26+
fi
27+
if [[ "$HEADER" != "$2" ]]
2228
then
23-
echo "$fname does not have license header. Please copy and paste ./license-header.txt"
29+
echo "$1 does not have license header. Please copy and paste ./license-header.txt"
2430
exitcode=1
2531
fi
32+
}
33+
34+
for fname in `find . -name "*.py"`;
35+
do
36+
check_license $fname "$(cat license-header.txt)" 15
2637
done
2738

28-
for fname in `find . -name "*.sh"`;
39+
for fname in `find . -name "*.sh"`;
2940
do
30-
diff -q <(head -13 $fname) <(tail -13 license-header.txt) > /dev/null
31-
if [ $? -ne 0 ]
32-
then
33-
echo "$fname does not have license header. Please copy and paste ./license-header.txt"
34-
exitcode=1
35-
fi
41+
check_license $fname "$(tail -13 license-header.txt)" 14
3642
done
3743

3844
exit ${exitcode}
45+

compiler_opt/rl/corpus.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,8 @@ def _load_and_parse_command(
125125

126126
cmdline.extend(additional_flags)
127127

128+
# The options read from a .cmd file must be run with -cc1
129+
if not cmd_override and cmdline[0] != '-cc1':
130+
raise ValueError('-cc1 flag not present in .cmd file.')
131+
128132
return cmdline

compiler_opt/rl/corpus_test.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ def test_override(self):
109109
cmd_override=cmd_override),
110110
['-fix-all-bugs', '-x', 'ir', 'this!path#cant$exist/hi.bc'])
111111

112+
def test_cc1_exists(self):
113+
data = ['-fix-all-bugs', '-xyz']
114+
argfile = self.create_tempfile(content='\0'.join(data), file_path='hi.cmd')
115+
module_path = argfile.full_path[:-4]
116+
self.assertRaises(
117+
ValueError,
118+
corpus._load_and_parse_command,
119+
module_path=module_path,
120+
has_thinlto=False)
121+
112122

113123
class ModuleSpecTest(tf.test.TestCase):
114124

@@ -125,7 +135,7 @@ def test_get_without_thinlto(self):
125135
tempdir.create_file('1.bc')
126136
tempdir.create_file('1.cmd', content='\0'.join(['-cc1']))
127137
tempdir.create_file('2.bc')
128-
tempdir.create_file('2.cmd', content='\0'.join(['-O3']))
138+
tempdir.create_file('2.cmd', content='\0'.join(['-cc1', '-O3']))
129139

130140
ms_list = corpus.build_modulespecs_from_datapath(
131141
tempdir.full_path, additional_flags=('-add',))
@@ -137,8 +147,9 @@ def test_get_without_thinlto(self):
137147
('-cc1', '-x', 'ir', tempdir.full_path + '/1.bc', '-add'))
138148

139149
self.assertEqual(ms2.name, '2')
140-
self.assertEqual(ms2.exec_cmd,
141-
('-O3', '-x', 'ir', tempdir.full_path + '/2.bc', '-add'))
150+
self.assertEqual(
151+
ms2.exec_cmd,
152+
('-cc1', '-O3', '-x', 'ir', tempdir.full_path + '/2.bc', '-add'))
142153

143154
def test_get_with_thinlto(self):
144155
corpus_description = {'modules': ['1', '2'], 'has_thinlto': True}
@@ -151,7 +162,8 @@ def test_get_with_thinlto(self):
151162
'1.cmd', content='\0'.join(['-cc1', '-fthinlto-index=xyz']))
152163
tempdir.create_file('2.bc')
153164
tempdir.create_file('2.thinlto.bc')
154-
tempdir.create_file('2.cmd', content='\0'.join(['-fthinlto-index=abc']))
165+
tempdir.create_file(
166+
'2.cmd', content='\0'.join(['-cc1', '-fthinlto-index=abc']))
155167

156168
ms_list = corpus.build_modulespecs_from_datapath(
157169
tempdir.full_path,
@@ -168,7 +180,7 @@ def test_get_with_thinlto(self):
168180

169181
self.assertEqual(ms2.name, '2')
170182
self.assertEqual(ms2.exec_cmd,
171-
('-x', 'ir', tempdir.full_path + '/2.bc',
183+
('-cc1', '-x', 'ir', tempdir.full_path + '/2.bc',
172184
'-fthinlto-index=' + tempdir.full_path + '/2.thinlto.bc',
173185
'-mllvm', '-thinlto-assume-merged', '-add'))
174186

compiler_opt/rl/data_reader.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,19 @@ def create_sequence_example_dataset_fn(
165165
def _sequence_example_dataset_fn(sequence_examples):
166166
# Data collector returns empty strings for corner cases, filter them out
167167
# here.
168-
dataset = (
169-
tf.data.Dataset.from_tensor_slices(sequence_examples).filter(
170-
lambda string: tf.strings.length(string) > 0).map(parser_fn).filter(
171-
lambda traj: tf.size(traj.reward) > 2).unbatch().batch(
172-
train_sequence_length, drop_remainder=True).cache().shuffle(
173-
trajectory_shuffle_buffer_size).batch(
174-
batch_size, drop_remainder=True))
168+
# yapf: disable - Looks better hand formatted
169+
dataset = (tf.data.Dataset
170+
.from_tensor_slices(sequence_examples)
171+
.filter(lambda string: tf.strings.length(string) > 0)
172+
.map(parser_fn)
173+
.filter(lambda traj: tf.size(traj.reward) > 2)
174+
.unbatch()
175+
.batch(train_sequence_length, drop_remainder=True)
176+
.cache()
177+
.shuffle(trajectory_shuffle_buffer_size)
178+
.batch(batch_size, drop_remainder=True)
179+
)
180+
# yapf: enable
175181
return dataset
176182

177183
return _sequence_example_dataset_fn

compiler_opt/tools/extract_ir.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,18 @@ def extract(self,
235235
is_thinlto=thinlto_build == 'distributed')
236236

237237

238-
def convert_compile_command_to_objectfile(command: Dict[str, str],
239-
output_dir: str):
238+
def convert_compile_command_to_objectfile(
239+
command: Dict[str, str], output_dir: str) -> Optional[TrainingIRExtractor]:
240240
obj_base_dir = command['directory']
241241
cmd = command['command']
242242

243243
cmd_parts = cmd.split()
244-
obj_index = cmd_parts.index('-o') + 1
244+
try:
245+
obj_index = cmd_parts.index('-o') + 1
246+
except ValueError:
247+
# This could happen if there are non-clang commands in compile_commands.json
248+
logging.info('Command has no -o option: %s', cmd)
249+
return None
245250
obj_rel_path = cmd_parts[obj_index]
246251
# TODO(mtrofin): is the obj_base_dir correct for thinlto index bc files?
247252
return TrainingIRExtractor(
@@ -252,10 +257,12 @@ def convert_compile_command_to_objectfile(command: Dict[str, str],
252257

253258
def load_from_compile_commands(json_array: List[Dict[str, str]],
254259
output_dir: str) -> List[TrainingIRExtractor]:
255-
return [
260+
objs = [
256261
convert_compile_command_to_objectfile(cmd, output_dir)
257262
for cmd in json_array
258263
]
264+
# Filter out None, in case there were non-clang commands in the .json
265+
return [obj for obj in objs if obj is not None]
259266

260267

261268
def load_from_lld_params(params_array: List[str], obj_base_dir: str,

compiler_opt/tools/extract_ir_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,16 @@ def test_one_conversion(self):
3434
'command': '-cc1 -c /some/path/lib/foo/bar.cc -o lib/bar.o',
3535
'file': '/some/path/lib/foo/bar.cc'
3636
}, '/corpus/destination/path')
37+
self.assertIsNotNone(obj)
38+
# pytype: disable=attribute-error
39+
# Pytype complains about obj being None
3740
self.assertEqual(obj.input_obj(), '/output/directory/lib/bar.o')
3841
self.assertEqual(obj.relative_output_path(), 'lib/bar.o')
3942
self.assertEqual(obj.cmd_file(), '/corpus/destination/path/lib/bar.o.cmd')
4043
self.assertEqual(obj.bc_file(), '/corpus/destination/path/lib/bar.o.bc')
4144
self.assertEqual(obj.thinlto_index_file(),
4245
'/corpus/destination/path/lib/bar.o.thinlto.bc')
46+
# pytype: enable=attribute-error
4347

4448
def test_arr_conversion(self):
4549
res = extract_ir.load_from_compile_commands([{

compiler_opt/tools/generate_default_trace_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_api(self, mock_get_runner):
7777

7878
with tf.io.gfile.GFile(
7979
os.path.join(tmp_dir.full_path, module_name + '.cmd'), 'w') as f:
80-
f.write(module_name)
80+
f.write('-cc1')
8181

8282
mock_compilation_runner = MockCompilationRunner()
8383
mock_get_runner.return_value = mock_compilation_runner

0 commit comments

Comments
 (0)