Skip to content

Commit 725443d

Browse files
committed
Perform late optimization, after deferred codegen and byval lowering.
1 parent 7dfb27f commit 725443d

File tree

1 file changed

+82
-73
lines changed

1 file changed

+82
-73
lines changed

src/driver.jl

Lines changed: 82 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -166,68 +166,18 @@ const __llvm_initialized = Ref(false)
166166
runtime_fns = LLVM.name.(defs(runtime))
167167
end
168168

169-
@timeit_debug to "LLVM middle-end" begin
170-
# target-specific libraries
169+
@timeit_debug to "Library linking" begin
171170
if libraries
171+
# target-specific libraries
172172
undefined_fns = LLVM.name.(decls(ir))
173173
@timeit_debug to "target libraries" link_libraries!(job, ir, undefined_fns)
174174
end
175-
176-
if optimize
177-
@timeit_debug to "optimization" optimize!(job, ir)
178-
179-
# optimization may have replaced functions, so look the entry point up again
180-
entry = functions(ir)[entry_fn]
181-
end
182-
183-
if libraries
184-
undefined_fns = LLVM.name.(decls(ir))
185-
if any(fn -> fn in runtime_fns, undefined_fns)
186-
@timeit_debug to "runtime library" link_library!(ir, runtime)
187-
end
188-
end
189-
190-
if ccall(:jl_is_debugbuild, Cint, ()) == 1
191-
@timeit_debug to "verification" verify(ir)
192-
end
193-
194-
if only_entry
195-
# replace non-entry function definitions with a declaration
196-
for f in functions(ir)
197-
f == entry && continue
198-
isdeclaration(f) && continue
199-
LLVM.isintrinsic(f) && continue
200-
empty!(f)
201-
end
202-
end
203-
204-
# remove everything except for the entry and any exported global variables
205-
@timeit_debug to "clean-up" begin
206-
exports = String[entry_fn]
207-
for gvar in globals(ir)
208-
push!(exports, LLVM.name(gvar))
209-
end
210-
211-
ModulePassManager() do pm
212-
internalize!(pm, exports)
213-
214-
# eliminate all unused internal functions
215-
global_optimizer!(pm)
216-
global_dce!(pm)
217-
strip_dead_prototypes!(pm)
218-
219-
# merge constants (such as exception messages) from the runtime
220-
constant_merge!(pm)
221-
222-
run!(pm, ir)
223-
end
224-
end
225175
end
226176

227-
entry = finish_module!(job, ir, entry)
228-
229177
# deferred code generation
230-
if !only_entry && deferred_codegen && haskey(functions(ir), "deferred_codegen")
178+
do_deferred_codegen = !only_entry && deferred_codegen &&
179+
haskey(functions(ir), "deferred_codegen")
180+
if do_deferred_codegen
231181
dyn_marker = functions(ir)["deferred_codegen"]
232182

233183
cache = Dict{CompilerJob, String}(job => entry_fn)
@@ -257,7 +207,7 @@ const __llvm_initialized = Ref(false)
257207
for dyn_job in keys(worklist)
258208
# cached compilation
259209
dyn_entry_fn = get!(cache, dyn_job) do
260-
dyn_ir, dyn_meta = codegen(:llvm, dyn_job; optimize,
210+
dyn_ir, dyn_meta = codegen(:llvm, dyn_job; optimize=false,
261211
deferred_codegen=false, parent_job=job)
262212
dyn_entry_fn = LLVM.name(dyn_meta.entry)
263213
merge!(compiled, dyn_meta.compiled)
@@ -279,32 +229,91 @@ const __llvm_initialized = Ref(false)
279229
unsafe_delete!(LLVM.parent(call), call)
280230
end
281231
end
282-
end
283-
284-
ModulePassManager() do pm
285-
# inline and optimize the call to the deferred code. in particular we want to
286-
# remove unnecessary alloca's that are created by pass-by-ref semantics.
287-
instruction_combining!(pm)
288-
always_inliner!(pm)
289-
scalar_repl_aggregates_ssa!(pm)
290-
promote_memory_to_register!(pm)
291-
gvn!(pm)
292232

293-
# merge constants (such as exception messages) from each entry
294-
constant_merge!(pm)
295-
296-
# merge duplicate functions, since each compilation invocation emits everything
297-
# XXX: ideally we want to avoid emitting these in the first place
298-
merge_functions!(pm)
233+
# clean-up
234+
ModulePassManager() do pm
235+
# inline and optimize the call to the deferred code. in particular we want to
236+
# remove unnecessary alloca's that are created by pass-by-ref semantics.
237+
instruction_combining!(pm)
238+
always_inliner!(pm)
239+
scalar_repl_aggregates_ssa!(pm)
240+
promote_memory_to_register!(pm)
241+
gvn!(pm)
242+
243+
# merge duplicate functions, since each compilation invocation emits everything
244+
# XXX: ideally we want to avoid emitting these in the first place
245+
merge_functions!(pm)
299246

300-
run!(pm, ir)
247+
run!(pm, ir)
248+
end
301249
end
302250

303251
# all deferred compilations should have been resolved
304252
@compiler_assert isempty(uses(dyn_marker)) job
305253
unsafe_delete!(ir, dyn_marker)
306254
end
307255

256+
@timeit_debug to "IR post-processing" begin
257+
entry = finish_module!(job, ir, entry)
258+
259+
if optimize
260+
@timeit_debug to "optimization" optimize!(job, ir)
261+
262+
# optimization may have replaced functions, so look the entry point up again
263+
entry = functions(ir)[entry_fn]
264+
end
265+
266+
if libraries
267+
# GPU run-time library
268+
#
269+
# we do this late for multiple reasons:
270+
# - the runtime library is already optimized, so we don't want to re-optimize
271+
# - if `malloc(...) = 0`, the consequent stores are reduced to a trap, which
272+
# results in e.g. every `box` function just trapping. this breaks our test
273+
# suite, which runs without malloc, but expects actual code being generated.
274+
undefined_fns = LLVM.name.(decls(ir))
275+
if any(fn -> fn in runtime_fns, undefined_fns)
276+
@timeit_debug to "runtime library" link_library!(ir, runtime)
277+
end
278+
end
279+
280+
if ccall(:jl_is_debugbuild, Cint, ()) == 1
281+
@timeit_debug to "verification" verify(ir)
282+
end
283+
284+
@timeit_debug to "clean-up" begin
285+
# replace non-entry function definitions with a declaration
286+
if only_entry
287+
for f in functions(ir)
288+
f == entry && continue
289+
isdeclaration(f) && continue
290+
LLVM.isintrinsic(f) && continue
291+
empty!(f)
292+
end
293+
end
294+
295+
# remove everything except for the entry and any exported global variables
296+
exports = String[entry_fn]
297+
for gvar in globals(ir)
298+
push!(exports, LLVM.name(gvar))
299+
end
300+
301+
ModulePassManager() do pm
302+
internalize!(pm, exports)
303+
304+
# eliminate all unused internal functions
305+
global_optimizer!(pm)
306+
global_dce!(pm)
307+
strip_dead_prototypes!(pm)
308+
309+
# merge constants (such as exception messages)
310+
constant_merge!(pm)
311+
312+
run!(pm, ir)
313+
end
314+
end
315+
end
316+
308317
return ir, (; entry, compiled)
309318
end
310319

0 commit comments

Comments
 (0)