Skip to content

Commit 0cfb52f

Browse files
authored
Merge pull request #230 from JuliaGPU/tb/spirv
More fixes for the SPIRV- backend.
2 parents 5bbffc4 + ac01295 commit 0cfb52f

File tree

2 files changed

+210
-44
lines changed

2 files changed

+210
-44
lines changed

src/optim.jl

Lines changed: 138 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,166 @@
11
# LLVM IR optimization
22

3-
function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
4-
tm = llvm_machine(job.target)
3+
function addTargetPasses!(pm, tm, triple)
4+
add_library_info!(pm, triple)
5+
add_transform_info!(pm, tm)
6+
end
7+
8+
# Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
9+
function addOptimizationPasses!(pm, opt_level=2)
10+
# compare with the using Julia's optimization pipeline directly:
11+
#ccall(:jl_add_optimization_passes, Cvoid,
12+
# (LLVM.API.LLVMPassManagerRef, Cint, Cint),
13+
# pm, opt_level, #=lower_intrinsics=# 0)
14+
#return
15+
16+
constant_merge!(pm)
17+
18+
if opt_level < 2
19+
cfgsimplification!(pm)
20+
if opt_level == 1
21+
scalar_repl_aggregates!(pm)
22+
instruction_combining!(pm)
23+
early_cse!(pm)
24+
# maybe add GVN?
25+
# also try GVNHoist and GVNSink
26+
end
27+
mem_cpy_opt!(pm)
28+
always_inliner!(pm) # Respect always_inline
29+
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
30+
return
31+
end
532

6-
function initialize!(pm)
7-
add_library_info!(pm, triple(mod))
8-
add_transform_info!(pm, tm)
33+
propagate_julia_addrsp!(pm)
34+
scoped_no_alias_aa!(pm)
35+
type_based_alias_analysis!(pm)
36+
if opt_level >= 3
37+
basic_alias_analysis!(pm)
938
end
39+
cfgsimplification!(pm)
40+
dce!(pm)
41+
scalar_repl_aggregates!(pm)
42+
43+
#mem_cpy_opt!(pm)
44+
45+
always_inliner!(pm) # Respect always_inline
46+
47+
# Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard
48+
# time merging the `alloca` for the unboxed data and the `alloca` created by
49+
# the `alloc_opt` pass.
50+
51+
alloc_opt!(pm)
52+
# consider AggressiveInstCombinePass at optlevel > 2
53+
instruction_combining!(pm)
54+
cfgsimplification!(pm)
55+
scalar_repl_aggregates!(pm)
56+
instruction_simplify!(pm)
57+
jump_threading!(pm)
58+
correlated_value_propagation!(pm)
59+
60+
reassociate!(pm)
61+
62+
early_cse!(pm)
63+
64+
# Load forwarding above can expose allocations that aren't actually used
65+
# remove those before optimizing loops.
66+
alloc_opt!(pm)
67+
loop_rotate!(pm)
68+
# moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
69+
loop_idiom!(pm)
70+
71+
# LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
72+
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
73+
licm!(pm)
74+
julia_licm!(pm)
75+
loop_unswitch!(pm)
76+
licm!(pm)
77+
julia_licm!(pm)
78+
# Subsequent passes not stripping metadata from terminator
79+
instruction_simplify!(pm)
80+
ind_var_simplify!(pm)
81+
loop_deletion!(pm)
82+
loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll
83+
84+
# Run our own SROA on heap objects before LLVM's
85+
alloc_opt!(pm)
86+
# Re-run SROA after loop-unrolling (useful for small loops that operate,
87+
# over the structure of an aggregate)
88+
scalar_repl_aggregates!(pm)
89+
# might not be necessary:
90+
instruction_simplify!(pm)
91+
92+
gvn!(pm)
93+
mem_cpy_opt!(pm)
94+
sccp!(pm)
95+
96+
# Run instcombine after redundancy elimination to exploit opportunities
97+
# opened up by them.
98+
# This needs to be InstCombine instead of InstSimplify to allow
99+
# loops over Union-typed arrays to vectorize.
100+
instruction_combining!(pm)
101+
jump_threading!(pm)
102+
correlated_value_propagation!(pm)
103+
dead_store_elimination!(pm)
104+
105+
# More dead allocation (store) deletion before loop optimization
106+
# consider removing this:
107+
alloc_opt!(pm)
108+
# see if all of the constant folding has exposed more loops
109+
# to simplification and deletion
110+
# this helps significantly with cleaning up iteration
111+
cfgsimplification!(pm) # See note above, don't hoist instructions before LV
112+
loop_deletion!(pm)
113+
instruction_combining!(pm)
114+
loop_vectorize!(pm)
115+
loop_load_elimination!(pm)
116+
# Cleanup after LV pass
117+
cfgsimplification!(pm)
118+
# TODO: aggressive CFG simplificaton options
119+
120+
aggressive_dce!(pm)
121+
end
122+
123+
function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
124+
triple = llvm_triple(job.target)
125+
tm = llvm_machine(job.target)
10126

11127
global current_job
12128
current_job = job
13129

14-
# Julia-specific optimizations
15-
#
16-
# NOTE: we need to use multiple distinct pass managers to force pass ordering;
17-
# intrinsics should never get lowered before Julia has optimized them.
18-
19130
ModulePassManager() do pm
20-
initialize!(pm)
21-
ccall(:jl_add_optimization_passes, Cvoid,
22-
(LLVM.API.LLVMPassManagerRef, Cint, Cint),
23-
pm, Base.JLOptions().opt_level, #=lower_intrinsics=# 0)
131+
addTargetPasses!(pm, tm, triple)
132+
addOptimizationPasses!(pm)
24133
run!(pm, mod)
25134
end
26135

136+
# NOTE: we need to use multiple distinct pass managers to force pass ordering;
137+
# intrinsics should never get lowered before Julia has optimized them.
138+
# XXX: why doesn't the barrier noop pass work here?
139+
140+
# lower intrinsics
27141
ModulePassManager() do pm
28-
initialize!(pm)
142+
addTargetPasses!(pm, tm, triple)
29143

30-
# lower intrinsics
31144
add!(pm, FunctionPass("LowerGCFrame", lower_gc_frame!))
32-
aggressive_dce!(pm) # remove dead uses of ptls
145+
146+
# remove dead uses of ptls
147+
aggressive_dce!(pm)
33148
add!(pm, ModulePass("LowerPTLS", lower_ptls!))
34149

35150
# the Julia GC lowering pass also has some clean-up that is required
36151
late_lower_gc_frame!(pm)
37152

153+
remove_ni!(pm)
38154
remove_julia_addrspaces!(pm)
39155

40156
# Julia's operand bundles confuse the inliner, so repeat here now they are gone.
41157
# FIXME: we should fix the inliner so that inlined code gets optimized early-on
42158
always_inliner!(pm)
43159

160+
# some of Julia's optimization passes happen _after_ lowering intrinsics
161+
combine_mul_add!(pm)
162+
div_rem_pairs!(pm)
163+
44164
run!(pm, mod)
45165
end
46166

@@ -56,7 +176,7 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
56176
#
57177
# these might not always be safe, as Julia's IR metadata isn't designed for IPO.
58178
ModulePassManager() do pm
59-
initialize!(pm)
179+
addTargetPasses!(pm, tm, triple)
60180

61181
dead_arg_elimination!(pm) # parent doesn't use return value --> ret void
62182

src/spirv.jl

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,48 +57,73 @@ function finish_module!(job::CompilerJob{SPIRVCompilerTarget}, mod::LLVM.Module)
5757
# (OpKill is only available in fragment execution mode)
5858
ModulePassManager() do pm
5959
add!(pm, ModulePass("RemoveTrap", rm_trap!))
60+
add!(pm, ModulePass("RemoveFreeze", rm_freeze!))
6061
run!(pm, mod)
6162
end
6263
end
6364

6465
@unlocked function mcgen(job::CompilerJob{SPIRVCompilerTarget}, mod::LLVM.Module,
6566
format=LLVM.API.LLVMAssemblyFile)
66-
# write the bitcode to a temporary file (the SPIRV Translator library doesn't have a C API)
67-
mktemp() do input, input_io
68-
write(input_io, mod)
69-
flush(input_io)
67+
# The SPIRV Tools don't handle Julia's debug info, rejecting DW_LANG_Julia...
68+
strip_debuginfo!(mod)
69+
70+
# translate to SPIR-V
71+
input = tempname(cleanup=false) * ".bc"
72+
translated = tempname(cleanup=false) * ".spv"
73+
write(input, mod)
74+
SPIRV_LLVM_Translator_jll.llvm_spirv() do translator
75+
proc = run(ignorestatus(`$translator --spirv-debug-info-version=ocl-100 -o $translated $input`))
76+
if !success(proc)
77+
error("""Failed to translate LLVM code to SPIR-V.
78+
If you think this is a bug, please file an issue and attach $(input).""")
79+
end
80+
end
7081

71-
# compile to SPIR-V
72-
mktemp() do output, output_io
73-
SPIRV_LLVM_Translator_jll.llvm_spirv() do translator
74-
cmd = `$translator`
75-
if format == LLVM.API.LLVMAssemblyFile
76-
cmd = `$cmd -spirv-text`
77-
end
78-
cmd = `$cmd --spirv-debug-info-version=ocl-100 -o $output $input`
79-
run(cmd)
82+
# validate
83+
# XXX: parameterize this on the `validate` driver argument
84+
# XXX: our code currently doesn't pass the validator
85+
if Base.JLOptions().debug_level >= 2 && false
86+
SPIRV_Tools_jll.spirv_val() do validator
87+
proc = run(ignorestatus(`$validator $translated`))
88+
if !success(proc)
89+
error("""Failed to validate generated SPIR-V.
90+
If you think this is a bug, please file an issue and attach $(input) and $(translated).""")
8091
end
92+
end
93+
end
8194

82-
# read back the file
83-
if format == LLVM.API.LLVMAssemblyFile
84-
read(output_io, String)
85-
else
86-
read(output_io)
95+
# optimize
96+
# XXX: parameterize this on the `optimize` driver argument
97+
# XXX: the optimizer segfaults on some of our code
98+
optimized = tempname(cleanup=false) * ".spv"
99+
if false
100+
SPIRV_Tools_jll.spirv_opt() do optimizer
101+
proc = run(ignorestatus(`$optimizer -O --skip-validation $translated -o $optimized`))
102+
if !success(proc)
103+
error("""Failed to optimize generated SPIR-V.
104+
If you think this is a bug, please file an issue and attach $(input) and $(translated).""")
87105
end
88106
end
89107
end
108+
109+
output = if format == LLVM.API.LLVMObjectFile
110+
read(translated)
111+
else
112+
# disassemble
113+
SPIRV_Tools_jll.spirv_dis() do disassembler
114+
read(`$disassembler $optimized`, String)
115+
end
116+
end
117+
118+
rm(input)
119+
rm(translated)
120+
#rm(optimized)
121+
122+
return output
90123
end
91124

92125
# reimplementation that uses `spirv-dis`, giving much more pleasant output
93126
function code_native(io::IO, job::CompilerJob{SPIRVCompilerTarget}; raw::Bool=false, dump_module::Bool=false)
94-
if raw
95-
# The SPIRV Tools don't handle Julia's debug info, rejecting DW_LANG_Julia...
96-
# so just return what LLVM gives us in that case (which is also more faithful).
97-
asm, _ = codegen(:asm, job; strip=false, only_entry=!dump_module, validate=false)
98-
print(io, asm)
99-
return
100-
end
101-
102127
obj, _ = codegen(:obj, job; strip=!raw, only_entry=!dump_module, validate=false)
103128
mktemp() do input_path, input_io
104129
write(input_io, obj)
@@ -146,6 +171,27 @@ function rm_trap!(mod::LLVM.Module)
146171
return changed
147172
end
148173

174+
# remove freeze and replace uses by the original value
175+
# (KhronosGroup/SPIRV-LLVM-Translator#1140)
176+
function rm_freeze!(mod::LLVM.Module)
177+
job = current_job::CompilerJob
178+
changed = false
179+
@timeit_debug to "remove freeze" begin
180+
181+
for f in functions(mod), bb in blocks(f), inst in instructions(bb)
182+
if inst isa LLVM.FreezeInst
183+
orig = first(operands(inst))
184+
replace_uses!(inst, orig)
185+
@compiler_assert isempty(uses(inst)) job
186+
unsafe_delete!(bb, inst)
187+
changed = true
188+
end
189+
end
190+
191+
end
192+
return changed
193+
end
194+
149195
# wrap byval pointers in a single-value struct
150196
function wrap_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry_f::LLVM.Function)
151197
ctx = context(mod)

0 commit comments

Comments
 (0)