23
23
The compilation is assumed to have been performed with clang, using
24
24
-fembed-bitcode=all passed to cc1 (i.e. pass clang -Xclang=-fembed-bitcode=all)
25
25
26
- In a ThinLTO case, the compilation is assumed to have been performed specifying
27
- -mllvm -lto-embed-bitcode=post-merge-pre-opt.
26
+ In a distributed ThinLTO case, the compilation is assumed to have been performed
27
+ specifying -mllvm -lto-embed-bitcode=post-merge-pre-opt.
28
+
29
+ In a local ThinLTO case, the compilation is assumedto have been performed
30
+ specifying -Wl,--save-temps=import -Wl,--thinlto-emit-index-files
28
31
"""
29
32
30
33
import json
31
34
import multiprocessing
32
35
import os
36
+ import pathlib
33
37
import re
34
38
import shutil
35
39
import subprocess
59
63
'Include only those modules with a command line matching this regexp. '
60
64
'Setting it to None for not filtering. Note that the regexp is applied '
61
65
'independently for each separate command line option. For example, ^-Oz$ '
62
- 'will match Oz - built binaries.' )
63
- flags .DEFINE_bool (
64
- 'thinlto_build' , False , 'Set if the build was ThinLTO, to '
65
- 'ensure index files are also copied. The build is assumed to have had'
66
- '-mllvm -lto-embed-bitcode=post-merge-pre-opt passed to clang.' )
66
+ 'will match Oz - built binaries. Does not work with thinlto_build=lld.' )
67
+ flags .DEFINE_enum (
68
+ 'thinlto_build' , None , ['distributed' , 'local' ],
69
+ 'Set if the build was performed with either \' distributed\' or '
70
+ '\' local\' ThinLTO. This ensures the thinlto.bc files are also copied. '
71
+ 'The build is assumed to have had '
72
+ '-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed '
73
+ 'case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files '
74
+ 'passed in the local case.' )
67
75
68
76
FLAGS = flags .FLAGS
69
77
@@ -118,6 +126,16 @@ def relative_output_path(self):
118
126
def input_obj (self ):
119
127
return os .path .join (self .obj_base_dir (), self ._obj_relative_path )
120
128
129
+ def lld_src_bc (self ):
130
+ # .3.import.bc is the suffix attached to post-merge-pre-opt ('postimport')
131
+ # IR bitcode saved by lld. It is hardcoded into lld.
132
+ return os .path .join (self ._obj_base_dir ,
133
+ self ._obj_relative_path + '.3.import.bc' )
134
+
135
+ def lld_src_thinlto (self ):
136
+ return os .path .join (self ._obj_base_dir ,
137
+ self ._obj_relative_path + '.thinlto.bc' )
138
+
121
139
def dest_dir (self ):
122
140
return os .path .join (self .output_base_dir (),
123
141
os .path .dirname (self ._obj_relative_path ))
@@ -148,8 +166,8 @@ def _get_extraction_bc_command(self, llvm_objcopy_path):
148
166
self .input_obj (), '/dev/null'
149
167
]
150
168
151
- def extract (self , llvm_objcopy_path : str , cmd_filter : str ,
152
- is_thinlto : bool ) -> Optional [str ]:
169
+ def _extract_clang_artifacts (self , llvm_objcopy_path : str , cmd_filter : str ,
170
+ is_thinlto : bool ) -> Optional [str ]:
153
171
"""Run llvm-objcopy to extract the .bc and command line."""
154
172
if not os .path .exists (self .input_obj ()):
155
173
logging .info ('%s does not exist.' , self .input_obj ())
@@ -184,6 +202,36 @@ def extract(self, llvm_objcopy_path: str, cmd_filter: str,
184
202
(not is_thinlto or os .path .exists (self .thinlto_index_file ())))
185
203
return self .relative_output_path ()
186
204
205
+ def _extract_lld_artifacts (self ) -> Optional [str ]:
206
+ """Extract the .bc file with ThinLTO index from an lld ThinLTO invocation.
207
+ """
208
+ if not os .path .exists (self .lld_src_bc ()):
209
+ logging .info ('%s does not exist.' , self .lld_src_bc ())
210
+ return None
211
+ if not os .path .exists (self .lld_src_thinlto ()):
212
+ logging .info ('%s does not exist.' , self .lld_src_thinlto ())
213
+ return None
214
+ os .makedirs (self .dest_dir (), exist_ok = True )
215
+
216
+ # Copy over the files
217
+ shutil .copy (self .lld_src_bc (), self .bc_file ())
218
+ shutil .copy (self .lld_src_thinlto (), self .thinlto_index_file ())
219
+
220
+ assert os .path .exists (self .bc_file ())
221
+ assert os .path .exists (self .thinlto_index_file ())
222
+ return self ._obj_relative_path
223
+
224
+ def extract (self ,
225
+ llvm_objcopy_path : Optional [str ] = None ,
226
+ cmd_filter : Optional [str ] = None ,
227
+ thinlto_build : Optional [str ] = None ) -> Optional [str ]:
228
+ if thinlto_build == 'local' :
229
+ return self ._extract_lld_artifacts ()
230
+ return self ._extract_clang_artifacts (
231
+ llvm_objcopy_path = llvm_objcopy_path ,
232
+ cmd_filter = cmd_filter ,
233
+ is_thinlto = thinlto_build == 'distributed' )
234
+
187
235
188
236
def convert_compile_command_to_objectfile (command : Dict [str , str ],
189
237
output_dir : str ):
@@ -232,6 +280,24 @@ def make_obj(obj_file: str) -> TrainingIRExtractor:
232
280
return [make_obj (obj_file ) for obj_file in just_obj_paths ]
233
281
234
282
283
+ def load_for_lld_thinlto (obj_base_dir : str ,
284
+ output_dir : str ) -> List [TrainingIRExtractor ]:
285
+ # .3.import.bc is the suffix attached to post-merge-pre-opt ('postimport')
286
+ # IR bitcode saved by lld. It is hardcoded into lld. ThinLTO index files
287
+ # are also emitted next to the postimport bitcode, with the suffix
288
+ # .thinlto.bc instead
289
+ paths = [str (p ) for p in pathlib .Path (obj_base_dir ).glob ('**/*.3.import.bc' )]
290
+
291
+ def make_spec (obj_file : str ):
292
+ return TrainingIRExtractor (
293
+ # Cut away .3.import.bc
294
+ obj_relative_path = os .path .relpath (obj_file , start = obj_base_dir )[:- 12 ],
295
+ output_base_dir = output_dir ,
296
+ obj_base_dir = obj_base_dir )
297
+
298
+ return [make_spec (path ) for path in paths ]
299
+
300
+
235
301
# This is here just for readability, lint complains if the pooling expression is
236
302
# over 3 lines; and it needs to be a non-local so it may be pickled.
237
303
def extract_artifacts (obj : TrainingIRExtractor ) -> Optional [str ]:
@@ -242,9 +308,14 @@ def extract_artifacts(obj: TrainingIRExtractor) -> Optional[str]:
242
308
def main (argv ):
243
309
if len (argv ) > 1 :
244
310
raise app .UsageError ('Too many command-line arguments.' )
245
- flags .mark_flags_as_required (['output_dir' , 'input' ])
311
+ flags .mark_flags_as_required (['output_dir' ])
312
+
246
313
objs = []
247
- if FLAGS .input_type == 'json' :
314
+ if FLAGS .input is None :
315
+ if FLAGS .thinlto_build != 'local' :
316
+ raise ValueError ('--input or --thinlto_build=local must be provided' )
317
+ objs = load_for_lld_thinlto (FLAGS .obj_base_dir , FLAGS .output_dir )
318
+ elif FLAGS .input_type == 'json' :
248
319
with open (FLAGS .input , encoding = 'utf-8' ) as f :
249
320
objs = load_from_compile_commands (json .load (f ), FLAGS .output_dir )
250
321
elif FLAGS .input_type == 'params' :
0 commit comments