Skip to content

Commit ec3f4bc

Browse files
Make make_corpus functions consistent with extract_ir functions (#283)
Currently the functions in make_corpus_lib assume they are working with a full file path whereas the tooling in extract_ir truncates the .bc/.cmd extensions and only has the object file. This discrepancy means that tooling that works with downstream products of corpora from extract_ir doesn't work without modification with corpora from make_corpus (ignoring the missing .cmd files). This patch makes make_corpus match the behavior of extract_ir.
1 parent 34a8851 commit ec3f4bc

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

compiler_opt/tools/make_corpus_lib.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
from typing import List, Optional
2323

24+
BITCODE_EXTENSION = '.bc'
25+
2426

2527
def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]:
2628
"""Finds bitcode files to extract from a given directory.
@@ -33,7 +35,10 @@ def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]:
3335
Returns an array of paths representing the relative path to the bitcode
3436
file from the base direcotry.
3537
"""
36-
paths = [str(p) for p in pathlib.Path(bitcode_base_dir).glob('**/*.bc')]
38+
paths = [
39+
str(p)[:-len(BITCODE_EXTENSION)]
40+
for p in pathlib.Path(bitcode_base_dir).glob('**/*' + BITCODE_EXTENSION)
41+
]
3742

3843
return [
3944
os.path.relpath(full_path, start=bitcode_base_dir) for full_path in paths
@@ -51,8 +56,10 @@ def copy_bitcode(relative_paths: List[str], bitcode_base_dir: str,
5156
output_dir: The output directory to place the bitcode in.
5257
"""
5358
for relative_path in relative_paths:
54-
base_path = os.path.join(bitcode_base_dir, relative_path)
55-
destination_path = os.path.join(output_dir, relative_path)
59+
base_path = os.path.join(bitcode_base_dir,
60+
relative_path + BITCODE_EXTENSION)
61+
destination_path = os.path.join(output_dir,
62+
relative_path + BITCODE_EXTENSION)
5663
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
5764
shutil.copy(base_path, destination_path)
5865

compiler_opt/tools/make_corpus_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,23 @@ def test_load_bitcode_from_directory(self):
3131
tempdir.create_file('test2.bc')
3232
relative_paths = make_corpus_lib.load_bitcode_from_directory(outer)
3333
relative_paths = sorted(relative_paths)
34-
self.assertEqual(relative_paths[0], 'nested/test1.bc')
35-
self.assertEqual(relative_paths[1], 'nested/test2.bc')
34+
self.assertEqual(relative_paths[0], 'nested/test1')
35+
self.assertEqual(relative_paths[1], 'nested/test2')
3636

3737
def test_copy_bitcode(self):
3838
build_dir = self.create_tempdir()
3939
nested_dir = build_dir.mkdir(dir_path='nested')
4040
nested_dir.create_file('test1.bc')
4141
nested_dir.create_file('test2.bc')
42-
relative_paths = ['nested/test1.bc', 'nested/test2.bc']
42+
relative_paths = ['nested/test1', 'nested/test2']
4343
corpus_dir = self.create_tempdir()
4444
make_corpus_lib.copy_bitcode(relative_paths, build_dir, corpus_dir)
4545
output_files = sorted(os.listdir(os.path.join(corpus_dir, './nested')))
4646
self.assertEqual(output_files[0], 'test1.bc')
4747
self.assertEqual(output_files[1], 'test2.bc')
4848

4949
def test_write_corpus_manifest(self):
50-
relative_output_paths = ['test/test1.bc', 'test/test2.bc']
50+
relative_output_paths = ['test/test1', 'test/test2']
5151
output_dir = self.create_tempdir()
5252
default_args = ['-O3', '-c']
5353
make_corpus_lib.write_corpus_manifest(relative_output_paths, output_dir,

0 commit comments

Comments
 (0)