@@ -166,68 +166,18 @@ const __llvm_initialized = Ref(false)
166
166
runtime_fns = LLVM. name .(defs (runtime))
167
167
end
168
168
169
- @timeit_debug to " LLVM middle-end" begin
170
- # target-specific libraries
169
+ @timeit_debug to " Library linking" begin
171
170
if libraries
171
+ # target-specific libraries
172
172
undefined_fns = LLVM. name .(decls (ir))
173
173
@timeit_debug to " target libraries" link_libraries! (job, ir, undefined_fns)
174
174
end
175
-
176
- if optimize
177
- @timeit_debug to " optimization" optimize! (job, ir)
178
-
179
- # optimization may have replaced functions, so look the entry point up again
180
- entry = functions (ir)[entry_fn]
181
- end
182
-
183
- if libraries
184
- undefined_fns = LLVM. name .(decls (ir))
185
- if any (fn -> fn in runtime_fns, undefined_fns)
186
- @timeit_debug to " runtime library" link_library! (ir, runtime)
187
- end
188
- end
189
-
190
- if ccall (:jl_is_debugbuild , Cint, ()) == 1
191
- @timeit_debug to " verification" verify (ir)
192
- end
193
-
194
- if only_entry
195
- # replace non-entry function definitions with a declaration
196
- for f in functions (ir)
197
- f == entry && continue
198
- isdeclaration (f) && continue
199
- LLVM. isintrinsic (f) && continue
200
- empty! (f)
201
- end
202
- end
203
-
204
- # remove everything except for the entry and any exported global variables
205
- @timeit_debug to " clean-up" begin
206
- exports = String[entry_fn]
207
- for gvar in globals (ir)
208
- push! (exports, LLVM. name (gvar))
209
- end
210
-
211
- ModulePassManager () do pm
212
- internalize! (pm, exports)
213
-
214
- # eliminate all unused internal functions
215
- global_optimizer! (pm)
216
- global_dce! (pm)
217
- strip_dead_prototypes! (pm)
218
-
219
- # merge constants (such as exception messages) from the runtime
220
- constant_merge! (pm)
221
-
222
- run! (pm, ir)
223
- end
224
- end
225
175
end
226
176
227
- entry = finish_module! (job, ir, entry)
228
-
229
177
# deferred code generation
230
- if ! only_entry && deferred_codegen && haskey (functions (ir), " deferred_codegen" )
178
+ do_deferred_codegen = ! only_entry && deferred_codegen &&
179
+ haskey (functions (ir), " deferred_codegen" )
180
+ if do_deferred_codegen
231
181
dyn_marker = functions (ir)[" deferred_codegen" ]
232
182
233
183
cache = Dict {CompilerJob, String} (job => entry_fn)
@@ -257,7 +207,7 @@ const __llvm_initialized = Ref(false)
257
207
for dyn_job in keys (worklist)
258
208
# cached compilation
259
209
dyn_entry_fn = get! (cache, dyn_job) do
260
- dyn_ir, dyn_meta = codegen (:llvm , dyn_job; optimize,
210
+ dyn_ir, dyn_meta = codegen (:llvm , dyn_job; optimize= false ,
261
211
deferred_codegen= false , parent_job= job)
262
212
dyn_entry_fn = LLVM. name (dyn_meta. entry)
263
213
merge! (compiled, dyn_meta. compiled)
@@ -279,32 +229,91 @@ const __llvm_initialized = Ref(false)
279
229
unsafe_delete! (LLVM. parent (call), call)
280
230
end
281
231
end
282
- end
283
-
284
- ModulePassManager () do pm
285
- # inline and optimize the call to the deferred code. in particular we want to
286
- # remove unnecessary alloca's that are created by pass-by-ref semantics.
287
- instruction_combining! (pm)
288
- always_inliner! (pm)
289
- scalar_repl_aggregates_ssa! (pm)
290
- promote_memory_to_register! (pm)
291
- gvn! (pm)
292
232
293
- # merge constants (such as exception messages) from each entry
294
- constant_merge! (pm)
295
-
296
- # merge duplicate functions, since each compilation invocation emits everything
297
- # XXX : ideally we want to avoid emitting these in the first place
298
- merge_functions! (pm)
233
+ # clean-up
234
+ ModulePassManager () do pm
235
+ # inline and optimize the call to the deferred code. in particular we want to
236
+ # remove unnecessary alloca's that are created by pass-by-ref semantics.
237
+ instruction_combining! (pm)
238
+ always_inliner! (pm)
239
+ scalar_repl_aggregates_ssa! (pm)
240
+ promote_memory_to_register! (pm)
241
+ gvn! (pm)
242
+
243
+ # merge duplicate functions, since each compilation invocation emits everything
244
+ # XXX : ideally we want to avoid emitting these in the first place
245
+ merge_functions! (pm)
299
246
300
- run! (pm, ir)
247
+ run! (pm, ir)
248
+ end
301
249
end
302
250
303
251
# all deferred compilations should have been resolved
304
252
@compiler_assert isempty (uses (dyn_marker)) job
305
253
unsafe_delete! (ir, dyn_marker)
306
254
end
307
255
256
+ @timeit_debug to " IR post-processing" begin
257
+ entry = finish_module! (job, ir, entry)
258
+
259
+ if optimize
260
+ @timeit_debug to " optimization" optimize! (job, ir)
261
+
262
+ # optimization may have replaced functions, so look the entry point up again
263
+ entry = functions (ir)[entry_fn]
264
+ end
265
+
266
+ if libraries
267
+ # GPU run-time library
268
+ #
269
+ # we do this late for multiple reasons:
270
+ # - the runtime library is already optimized, so we don't want to re-optimize
271
+ # - if `malloc(...) = 0`, the consequent stores are reduced to a trap, which
272
+ # results in e.g. every `box` function just trapping. this breaks our test
273
+ # suite, which runs without malloc, but expects actual code being generated.
274
+ undefined_fns = LLVM. name .(decls (ir))
275
+ if any (fn -> fn in runtime_fns, undefined_fns)
276
+ @timeit_debug to " runtime library" link_library! (ir, runtime)
277
+ end
278
+ end
279
+
280
+ if ccall (:jl_is_debugbuild , Cint, ()) == 1
281
+ @timeit_debug to " verification" verify (ir)
282
+ end
283
+
284
+ @timeit_debug to " clean-up" begin
285
+ # replace non-entry function definitions with a declaration
286
+ if only_entry
287
+ for f in functions (ir)
288
+ f == entry && continue
289
+ isdeclaration (f) && continue
290
+ LLVM. isintrinsic (f) && continue
291
+ empty! (f)
292
+ end
293
+ end
294
+
295
+ # remove everything except for the entry and any exported global variables
296
+ exports = String[entry_fn]
297
+ for gvar in globals (ir)
298
+ push! (exports, LLVM. name (gvar))
299
+ end
300
+
301
+ ModulePassManager () do pm
302
+ internalize! (pm, exports)
303
+
304
+ # eliminate all unused internal functions
305
+ global_optimizer! (pm)
306
+ global_dce! (pm)
307
+ strip_dead_prototypes! (pm)
308
+
309
+ # merge constants (such as exception messages)
310
+ constant_merge! (pm)
311
+
312
+ run! (pm, ir)
313
+ end
314
+ end
315
+ end
316
+
308
317
return ir, (; entry, compiled)
309
318
end
310
319
0 commit comments