@@ -6,13 +6,36 @@ function addTargetPasses!(pm, tm, triple)
6
6
end
7
7
8
8
# Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
9
- function addOptimizationPasses! (pm)
9
+ function addOptimizationPasses! (pm, opt_level= 2 )
10
+ # compare with the using Julia's optimization pipeline directly:
11
+ # ccall(:jl_add_optimization_passes, Cvoid,
12
+ # (LLVM.API.LLVMPassManagerRef, Cint, Cint),
13
+ # pm, opt_level, #=lower_intrinsics=# 0)
14
+ # return
15
+
10
16
constant_merge! (pm)
11
17
18
+ if opt_level < 2
19
+ cfgsimplification! (pm)
20
+ if opt_level == 1
21
+ scalar_repl_aggregates! (pm)
22
+ instruction_combining! (pm)
23
+ early_cse! (pm)
24
+ # maybe add GVN?
25
+ # also try GVNHoist and GVNSink
26
+ end
27
+ mem_cpy_opt! (pm)
28
+ always_inliner! (pm) # Respect always_inline
29
+ lower_simdloop! (pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
30
+ return
31
+ end
32
+
12
33
propagate_julia_addrsp! (pm)
13
34
scoped_no_alias_aa! (pm)
14
35
type_based_alias_analysis! (pm)
15
- basic_alias_analysis! (pm)
36
+ if opt_level >= 3
37
+ basic_alias_analysis! (pm)
38
+ end
16
39
cfgsimplification! (pm)
17
40
dce! (pm)
18
41
scalar_repl_aggregates! (pm)
@@ -27,12 +50,12 @@ function addOptimizationPasses!(pm)
27
50
28
51
alloc_opt! (pm)
29
52
# consider AggressiveInstCombinePass at optlevel > 2
30
-
31
53
instruction_combining! (pm)
32
54
cfgsimplification! (pm)
33
55
scalar_repl_aggregates! (pm)
34
- instruction_combining ! (pm) # TODO : createInstSimplifyLegacy
56
+ instruction_simplify ! (pm)
35
57
jump_threading! (pm)
58
+ correlated_value_propagation! (pm)
36
59
37
60
reassociate! (pm)
38
61
@@ -49,8 +72,11 @@ function addOptimizationPasses!(pm)
49
72
lower_simdloop! (pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
50
73
licm! (pm)
51
74
julia_licm! (pm)
75
+ loop_unswitch! (pm)
76
+ licm! (pm)
77
+ julia_licm! (pm)
52
78
# Subsequent passes not stripping metadata from terminator
53
- instruction_combining ! (pm) # TODO : createInstSimplifyLegacy
79
+ instruction_simplify ! (pm)
54
80
ind_var_simplify! (pm)
55
81
loop_deletion! (pm)
56
82
loop_unroll! (pm) # TODO : in Julia createSimpleLoopUnroll
@@ -60,7 +86,8 @@ function addOptimizationPasses!(pm)
60
86
# Re-run SROA after loop-unrolling (useful for small loops that operate,
61
87
# over the structure of an aggregate)
62
88
scalar_repl_aggregates! (pm)
63
- instruction_combining! (pm) # TODO : createInstSimplifyLegacy
89
+ # might not be necessary:
90
+ instruction_simplify! (pm)
64
91
65
92
gvn! (pm)
66
93
mem_cpy_opt! (pm)
@@ -72,21 +99,23 @@ function addOptimizationPasses!(pm)
72
99
# loops over Union-typed arrays to vectorize.
73
100
instruction_combining! (pm)
74
101
jump_threading! (pm)
102
+ correlated_value_propagation! (pm)
75
103
dead_store_elimination! (pm)
76
104
77
105
# More dead allocation (store) deletion before loop optimization
78
106
# consider removing this:
79
107
alloc_opt! (pm)
80
-
81
108
# see if all of the constant folding has exposed more loops
82
109
# to simplification and deletion
83
110
# this helps significantly with cleaning up iteration
84
- cfgsimplification! (pm)
111
+ cfgsimplification! (pm) # See note above, don't hoist instructions before LV
85
112
loop_deletion! (pm)
86
113
instruction_combining! (pm)
87
114
loop_vectorize! (pm)
88
- # TODO : createLoopLoadEliminationPass
115
+ loop_load_elimination! (pm)
116
+ # Cleanup after LV pass
89
117
cfgsimplification! (pm)
118
+ # TODO : aggressive CFG simplificaton options
90
119
91
120
aggressive_dce! (pm)
92
121
end
@@ -108,28 +137,33 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
108
137
# intrinsics should never get lowered before Julia has optimized them.
109
138
# XXX : why doesn't the barrier noop pass work here?
110
139
140
+ # lower intrinsics
111
141
ModulePassManager () do pm
112
142
addTargetPasses! (pm, tm, triple)
113
143
114
- # lower intrinsics
115
144
add! (pm, FunctionPass (" LowerGCFrame" , lower_gc_frame!))
116
- aggressive_dce! (pm) # remove dead uses of ptls
145
+
146
+ # remove dead uses of ptls
147
+ aggressive_dce! (pm)
117
148
add! (pm, ModulePass (" LowerPTLS" , lower_ptls!))
118
149
119
150
# the Julia GC lowering pass also has some clean-up that is required
120
151
late_lower_gc_frame! (pm)
121
152
153
+ remove_ni! (pm)
122
154
remove_julia_addrspaces! (pm)
123
155
124
156
# Julia's operand bundles confuse the inliner, so repeat here now they are gone.
125
157
# FIXME : we should fix the inliner so that inlined code gets optimized early-on
126
158
always_inliner! (pm)
127
159
160
+ # some of Julia's optimization passes happen _after_ lowering intrinsics
161
+ combine_mul_add! (pm)
162
+ div_rem_pairs! (pm)
163
+
128
164
run! (pm, mod)
129
165
end
130
166
131
- # TODO : combine_mul_add and create_div_rem_pairs from addMachinePasses
132
-
133
167
# target-specific optimizations
134
168
optimize_module! (job, mod)
135
169
0 commit comments