Skip to content

Commit ac1898b

Browse files
committed
Fix optimization pipeline.
1 parent 5924f46 commit ac1898b

File tree

1 file changed

+47
-13
lines changed

1 file changed

+47
-13
lines changed

src/optim.jl

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,36 @@ function addTargetPasses!(pm, tm, triple)
66
end
77

88
# Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
9-
function addOptimizationPasses!(pm)
9+
function addOptimizationPasses!(pm, opt_level=2)
10+
# compare with the using Julia's optimization pipeline directly:
11+
#ccall(:jl_add_optimization_passes, Cvoid,
12+
# (LLVM.API.LLVMPassManagerRef, Cint, Cint),
13+
# pm, opt_level, #=lower_intrinsics=# 0)
14+
#return
15+
1016
constant_merge!(pm)
1117

18+
if opt_level < 2
19+
cfgsimplification!(pm)
20+
if opt_level == 1
21+
scalar_repl_aggregates!(pm)
22+
instruction_combining!(pm)
23+
early_cse!(pm)
24+
# maybe add GVN?
25+
# also try GVNHoist and GVNSink
26+
end
27+
mem_cpy_opt!(pm)
28+
always_inliner!(pm) # Respect always_inline
29+
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
30+
return
31+
end
32+
1233
propagate_julia_addrsp!(pm)
1334
scoped_no_alias_aa!(pm)
1435
type_based_alias_analysis!(pm)
15-
basic_alias_analysis!(pm)
36+
if opt_level >= 3
37+
basic_alias_analysis!(pm)
38+
end
1639
cfgsimplification!(pm)
1740
dce!(pm)
1841
scalar_repl_aggregates!(pm)
@@ -27,12 +50,12 @@ function addOptimizationPasses!(pm)
2750

2851
alloc_opt!(pm)
2952
# consider AggressiveInstCombinePass at optlevel > 2
30-
3153
instruction_combining!(pm)
3254
cfgsimplification!(pm)
3355
scalar_repl_aggregates!(pm)
34-
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
56+
instruction_simplify!(pm)
3557
jump_threading!(pm)
58+
correlated_value_propagation!(pm)
3659

3760
reassociate!(pm)
3861

@@ -49,8 +72,11 @@ function addOptimizationPasses!(pm)
4972
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
5073
licm!(pm)
5174
julia_licm!(pm)
75+
loop_unswitch!(pm)
76+
licm!(pm)
77+
julia_licm!(pm)
5278
# Subsequent passes not stripping metadata from terminator
53-
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
79+
instruction_simplify!(pm)
5480
ind_var_simplify!(pm)
5581
loop_deletion!(pm)
5682
loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll
@@ -60,7 +86,8 @@ function addOptimizationPasses!(pm)
6086
# Re-run SROA after loop-unrolling (useful for small loops that operate,
6187
# over the structure of an aggregate)
6288
scalar_repl_aggregates!(pm)
63-
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
89+
# might not be necessary:
90+
instruction_simplify!(pm)
6491

6592
gvn!(pm)
6693
mem_cpy_opt!(pm)
@@ -72,21 +99,23 @@ function addOptimizationPasses!(pm)
7299
# loops over Union-typed arrays to vectorize.
73100
instruction_combining!(pm)
74101
jump_threading!(pm)
102+
correlated_value_propagation!(pm)
75103
dead_store_elimination!(pm)
76104

77105
# More dead allocation (store) deletion before loop optimization
78106
# consider removing this:
79107
alloc_opt!(pm)
80-
81108
# see if all of the constant folding has exposed more loops
82109
# to simplification and deletion
83110
# this helps significantly with cleaning up iteration
84-
cfgsimplification!(pm)
111+
cfgsimplification!(pm) # See note above, don't hoist instructions before LV
85112
loop_deletion!(pm)
86113
instruction_combining!(pm)
87114
loop_vectorize!(pm)
88-
# TODO: createLoopLoadEliminationPass
115+
loop_load_elimination!(pm)
116+
# Cleanup after LV pass
89117
cfgsimplification!(pm)
118+
# TODO: aggressive CFG simplificaton options
90119

91120
aggressive_dce!(pm)
92121
end
@@ -108,28 +137,33 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
108137
# intrinsics should never get lowered before Julia has optimized them.
109138
# XXX: why doesn't the barrier noop pass work here?
110139

140+
# lower intrinsics
111141
ModulePassManager() do pm
112142
addTargetPasses!(pm, tm, triple)
113143

114-
# lower intrinsics
115144
add!(pm, FunctionPass("LowerGCFrame", lower_gc_frame!))
116-
aggressive_dce!(pm) # remove dead uses of ptls
145+
146+
# remove dead uses of ptls
147+
aggressive_dce!(pm)
117148
add!(pm, ModulePass("LowerPTLS", lower_ptls!))
118149

119150
# the Julia GC lowering pass also has some clean-up that is required
120151
late_lower_gc_frame!(pm)
121152

153+
remove_ni!(pm)
122154
remove_julia_addrspaces!(pm)
123155

124156
# Julia's operand bundles confuse the inliner, so repeat here now they are gone.
125157
# FIXME: we should fix the inliner so that inlined code gets optimized early-on
126158
always_inliner!(pm)
127159

160+
# some of Julia's optimization passes happen _after_ lowering intrinsics
161+
combine_mul_add!(pm)
162+
div_rem_pairs!(pm)
163+
128164
run!(pm, mod)
129165
end
130166

131-
# TODO: combine_mul_add and create_div_rem_pairs from addMachinePasses
132-
133167
# target-specific optimizations
134168
optimize_module!(job, mod)
135169

0 commit comments

Comments
 (0)