1
1
# LLVM IR optimization
2
2
3
- Base . @kwdef struct GPUOptimizationParams
4
- julia :: Bool = true
5
- intrinsics :: Bool = true
6
- ipo :: Bool = true
3
+ function addTargetPasses! (pm, tm, triple)
4
+ add_library_info! (pm, triple)
5
+ add_transform_info! (pm, tm)
6
+ end
7
7
8
- optlevel:: Int = Base. JLOptions (). opt_level
8
+ # Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
9
+ function addOptimizationPasses! (pm)
10
+ constant_merge! (pm)
11
+
12
+ propagate_julia_addrsp! (pm)
13
+ scoped_no_alias_aa! (pm)
14
+ type_based_alias_analysis! (pm)
15
+ basic_alias_analysis! (pm)
16
+ cfgsimplification! (pm)
17
+ dce! (pm)
18
+ scalar_repl_aggregates! (pm)
19
+
20
+ # mem_cpy_opt!(pm)
21
+
22
+ always_inliner! (pm) # Respect always_inline
23
+
24
+ # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard
25
+ # time merging the `alloca` for the unboxed data and the `alloca` created by
26
+ # the `alloc_opt` pass.
27
+
28
+ alloc_opt! (pm)
29
+ # consider AggressiveInstCombinePass at optlevel > 2
30
+
31
+ instruction_combining! (pm)
32
+ cfgsimplification! (pm)
33
+ scalar_repl_aggregates! (pm)
34
+ instruction_combining! (pm) # TODO : createInstSimplifyLegacy
35
+ jump_threading! (pm)
36
+
37
+ reassociate! (pm)
38
+
39
+ early_cse! (pm)
40
+
41
+ # Load forwarding above can expose allocations that aren't actually used
42
+ # remove those before optimizing loops.
43
+ alloc_opt! (pm)
44
+ loop_rotate! (pm)
45
+ # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
46
+ loop_idiom! (pm)
47
+
48
+ # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
49
+ lower_simdloop! (pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
50
+ licm! (pm)
51
+ julia_licm! (pm)
52
+ # Subsequent passes not stripping metadata from terminator
53
+ instruction_combining! (pm) # TODO : createInstSimplifyLegacy
54
+ ind_var_simplify! (pm)
55
+ loop_deletion! (pm)
56
+ loop_unroll! (pm) # TODO : in Julia createSimpleLoopUnroll
57
+
58
+ # Run our own SROA on heap objects before LLVM's
59
+ alloc_opt! (pm)
60
+ # Re-run SROA after loop-unrolling (useful for small loops that operate,
61
+ # over the structure of an aggregate)
62
+ scalar_repl_aggregates! (pm)
63
+ instruction_combining! (pm) # TODO : createInstSimplifyLegacy
64
+
65
+ gvn! (pm)
66
+ mem_cpy_opt! (pm)
67
+ sccp! (pm)
68
+
69
+ # Run instcombine after redundancy elimination to exploit opportunities
70
+ # opened up by them.
71
+ # This needs to be InstCombine instead of InstSimplify to allow
72
+ # loops over Union-typed arrays to vectorize.
73
+ instruction_combining! (pm)
74
+ jump_threading! (pm)
75
+ dead_store_elimination! (pm)
76
+
77
+ # More dead allocation (store) deletion before loop optimization
78
+ # consider removing this:
79
+ alloc_opt! (pm)
80
+
81
+ # see if all of the constant folding has exposed more loops
82
+ # to simplification and deletion
83
+ # this helps significantly with cleaning up iteration
84
+ cfgsimplification! (pm)
85
+ loop_deletion! (pm)
86
+ instruction_combining! (pm)
87
+ loop_vectorize! (pm)
88
+ # TODO : createLoopLoadEliminationPass
89
+ cfgsimplification! (pm)
90
+
91
+ aggressive_dce! (pm)
9
92
end
10
93
11
94
function optimize! (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module )
12
95
triple = llvm_triple (job. target)
13
96
tm = llvm_machine (job. target)
14
97
15
- function initialize! (pm)
16
- add_library_info! (pm, triple)
17
- add_transform_info! (pm, tm)
18
- end
19
-
20
98
global current_job
21
99
current_job = job
22
100
23
- params = optimization_params (job)
101
+ ModulePassManager () do pm
102
+ addTargetPasses! (pm, tm, triple)
103
+ addOptimizationPasses! (pm)
104
+ run! (pm, mod)
105
+ end
24
106
25
- # Julia-specific optimizations
26
- #
27
107
# NOTE: we need to use multiple distinct pass managers to force pass ordering;
28
108
# intrinsics should never get lowered before Julia has optimized them.
109
+ # XXX : why doesn't the barrier noop pass work here?
29
110
30
111
ModulePassManager () do pm
31
- initialize! (pm)
32
- if params. julia
33
- ccall (:jl_add_optimization_passes , Cvoid,
34
- (LLVM. API. LLVMPassManagerRef, Cint, Cint),
35
- pm, params. optlevel, #= lower_intrinsics=# 0 )
36
- end
37
- if params. optlevel < 2
38
- # Julia doesn't run the alloc optimizer on lower optimization levels,
39
- # but the pass is crucial to remove possibly unsupported malloc calls.
40
- alloc_opt! (pm)
41
- end
42
- run! (pm, mod)
43
- end
44
-
45
- params. intrinsics && ModulePassManager () do pm
46
- initialize! (pm)
112
+ addTargetPasses! (pm, tm, triple)
47
113
48
114
# lower intrinsics
49
115
add! (pm, FunctionPass (" LowerGCFrame" , lower_gc_frame!))
@@ -62,6 +128,8 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
62
128
run! (pm, mod)
63
129
end
64
130
131
+ # TODO : combine_mul_add and create_div_rem_pairs from addMachinePasses
132
+
65
133
# target-specific optimizations
66
134
optimize_module! (job, mod)
67
135
@@ -73,8 +141,8 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
73
141
# part of the LateLowerGCFrame pass) aren't collected properly.
74
142
#
75
143
# these might not always be safe, as Julia's IR metadata isn't designed for IPO.
76
- params . ipo && ModulePassManager () do pm
77
- initialize ! (pm)
144
+ ModulePassManager () do pm
145
+ addTargetPasses ! (pm, tm, triple )
78
146
79
147
dead_arg_elimination! (pm) # parent doesn't use return value --> ret void
80
148
0 commit comments