1
1
# LLVM IR optimization
2
2
3
- function optimize! (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module )
4
- tm = llvm_machine (job. target)
3
+ function addTargetPasses! (pm, tm, triple)
4
+ add_library_info! (pm, triple)
5
+ add_transform_info! (pm, tm)
6
+ end
7
+
8
+ # Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
9
+ function addOptimizationPasses! (pm, opt_level= 2 )
10
+ # compare with the using Julia's optimization pipeline directly:
11
+ # ccall(:jl_add_optimization_passes, Cvoid,
12
+ # (LLVM.API.LLVMPassManagerRef, Cint, Cint),
13
+ # pm, opt_level, #=lower_intrinsics=# 0)
14
+ # return
15
+
16
+ constant_merge! (pm)
17
+
18
+ if opt_level < 2
19
+ cfgsimplification! (pm)
20
+ if opt_level == 1
21
+ scalar_repl_aggregates! (pm)
22
+ instruction_combining! (pm)
23
+ early_cse! (pm)
24
+ # maybe add GVN?
25
+ # also try GVNHoist and GVNSink
26
+ end
27
+ mem_cpy_opt! (pm)
28
+ always_inliner! (pm) # Respect always_inline
29
+ lower_simdloop! (pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
30
+ return
31
+ end
5
32
6
- function initialize! (pm)
7
- add_library_info! (pm, triple (mod))
8
- add_transform_info! (pm, tm)
33
+ propagate_julia_addrsp! (pm)
34
+ scoped_no_alias_aa! (pm)
35
+ type_based_alias_analysis! (pm)
36
+ if opt_level >= 3
37
+ basic_alias_analysis! (pm)
9
38
end
39
+ cfgsimplification! (pm)
40
+ dce! (pm)
41
+ scalar_repl_aggregates! (pm)
42
+
43
+ # mem_cpy_opt!(pm)
44
+
45
+ always_inliner! (pm) # Respect always_inline
46
+
47
+ # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard
48
+ # time merging the `alloca` for the unboxed data and the `alloca` created by
49
+ # the `alloc_opt` pass.
50
+
51
+ alloc_opt! (pm)
52
+ # consider AggressiveInstCombinePass at optlevel > 2
53
+ instruction_combining! (pm)
54
+ cfgsimplification! (pm)
55
+ scalar_repl_aggregates! (pm)
56
+ instruction_simplify! (pm)
57
+ jump_threading! (pm)
58
+ correlated_value_propagation! (pm)
59
+
60
+ reassociate! (pm)
61
+
62
+ early_cse! (pm)
63
+
64
+ # Load forwarding above can expose allocations that aren't actually used
65
+ # remove those before optimizing loops.
66
+ alloc_opt! (pm)
67
+ loop_rotate! (pm)
68
+ # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
69
+ loop_idiom! (pm)
70
+
71
+ # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
72
+ lower_simdloop! (pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
73
+ licm! (pm)
74
+ julia_licm! (pm)
75
+ loop_unswitch! (pm)
76
+ licm! (pm)
77
+ julia_licm! (pm)
78
+ # Subsequent passes not stripping metadata from terminator
79
+ instruction_simplify! (pm)
80
+ ind_var_simplify! (pm)
81
+ loop_deletion! (pm)
82
+ loop_unroll! (pm) # TODO : in Julia createSimpleLoopUnroll
83
+
84
+ # Run our own SROA on heap objects before LLVM's
85
+ alloc_opt! (pm)
86
+ # Re-run SROA after loop-unrolling (useful for small loops that operate,
87
+ # over the structure of an aggregate)
88
+ scalar_repl_aggregates! (pm)
89
+ # might not be necessary:
90
+ instruction_simplify! (pm)
91
+
92
+ gvn! (pm)
93
+ mem_cpy_opt! (pm)
94
+ sccp! (pm)
95
+
96
+ # Run instcombine after redundancy elimination to exploit opportunities
97
+ # opened up by them.
98
+ # This needs to be InstCombine instead of InstSimplify to allow
99
+ # loops over Union-typed arrays to vectorize.
100
+ instruction_combining! (pm)
101
+ jump_threading! (pm)
102
+ correlated_value_propagation! (pm)
103
+ dead_store_elimination! (pm)
104
+
105
+ # More dead allocation (store) deletion before loop optimization
106
+ # consider removing this:
107
+ alloc_opt! (pm)
108
+ # see if all of the constant folding has exposed more loops
109
+ # to simplification and deletion
110
+ # this helps significantly with cleaning up iteration
111
+ cfgsimplification! (pm) # See note above, don't hoist instructions before LV
112
+ loop_deletion! (pm)
113
+ instruction_combining! (pm)
114
+ loop_vectorize! (pm)
115
+ loop_load_elimination! (pm)
116
+ # Cleanup after LV pass
117
+ cfgsimplification! (pm)
118
+ # TODO : aggressive CFG simplificaton options
119
+
120
+ aggressive_dce! (pm)
121
+ end
122
+
123
+ function optimize! (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module )
124
+ triple = llvm_triple (job. target)
125
+ tm = llvm_machine (job. target)
10
126
11
127
global current_job
12
128
current_job = job
13
129
14
- # Julia-specific optimizations
15
- #
16
- # NOTE: we need to use multiple distinct pass managers to force pass ordering;
17
- # intrinsics should never get lowered before Julia has optimized them.
18
-
19
130
ModulePassManager () do pm
20
- initialize! (pm)
21
- ccall (:jl_add_optimization_passes , Cvoid,
22
- (LLVM. API. LLVMPassManagerRef, Cint, Cint),
23
- pm, Base. JLOptions (). opt_level, #= lower_intrinsics=# 0 )
131
+ addTargetPasses! (pm, tm, triple)
132
+ addOptimizationPasses! (pm)
24
133
run! (pm, mod)
25
134
end
26
135
136
+ # NOTE: we need to use multiple distinct pass managers to force pass ordering;
137
+ # intrinsics should never get lowered before Julia has optimized them.
138
+ # XXX : why doesn't the barrier noop pass work here?
139
+
140
+ # lower intrinsics
27
141
ModulePassManager () do pm
28
- initialize ! (pm)
142
+ addTargetPasses ! (pm, tm, triple )
29
143
30
- # lower intrinsics
31
144
add! (pm, FunctionPass (" LowerGCFrame" , lower_gc_frame!))
32
- aggressive_dce! (pm) # remove dead uses of ptls
145
+
146
+ # remove dead uses of ptls
147
+ aggressive_dce! (pm)
33
148
add! (pm, ModulePass (" LowerPTLS" , lower_ptls!))
34
149
35
150
# the Julia GC lowering pass also has some clean-up that is required
36
151
late_lower_gc_frame! (pm)
37
152
153
+ remove_ni! (pm)
38
154
remove_julia_addrspaces! (pm)
39
155
40
156
# Julia's operand bundles confuse the inliner, so repeat here now they are gone.
41
157
# FIXME : we should fix the inliner so that inlined code gets optimized early-on
42
158
always_inliner! (pm)
43
159
160
+ # some of Julia's optimization passes happen _after_ lowering intrinsics
161
+ combine_mul_add! (pm)
162
+ div_rem_pairs! (pm)
163
+
44
164
run! (pm, mod)
45
165
end
46
166
@@ -56,7 +176,7 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
56
176
#
57
177
# these might not always be safe, as Julia's IR metadata isn't designed for IPO.
58
178
ModulePassManager () do pm
59
- initialize ! (pm)
179
+ addTargetPasses ! (pm, tm, triple )
60
180
61
181
dead_arg_elimination! (pm) # parent doesn't use return value --> ret void
62
182
0 commit comments