Skip to content

Commit 5136798

Browse files
authored
Add option for backends to run vectorization passes (#716)
Disabled by default, only enabled for AMDGPU, CUDA, and native.
1 parent a239ae0 commit 5136798

File tree

5 files changed

+9
-4
lines changed

5 files changed

+9
-4
lines changed

src/gcn.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,5 @@ function emit_trap!(job::CompilerJob{GCNCompilerTarget}, builder, mod, inst)
124124
end
125125
call!(builder, trap_ft, trap)
126126
end
127+
128+
can_vectorize(job::CompilerJob{GCNCompilerTarget}) = true

src/interface.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ end
222222
# Has the runtime available and does not require special handling
223223
uses_julia_runtime(@nospecialize(job::CompilerJob)) = false
224224

225+
# Is it legal to run vectorization passes on this target
226+
can_vectorize(@nospecialize(job::CompilerJob)) = false
227+
225228
# Should emit PTLS lookup that can be relocated
226229
dump_native(@nospecialize(job::CompilerJob)) = false
227230

src/native.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ end
3535

3636
runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))$(job.config.target.jlruntime ? "-jlrt" : "")"
3737
uses_julia_runtime(job::CompilerJob{NativeCompilerTarget}) = job.config.target.jlruntime
38+
can_vectorize(job::CompilerJob{NativeCompilerTarget}) = true

src/optim.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level)
3131
add!(mpm, NewPMFunctionPassManager()) do fpm
3232
buildLoopOptimizerPipeline(fpm, job, opt_level)
3333
buildScalarOptimizerPipeline(fpm, job, opt_level)
34-
if uses_julia_runtime(job) && opt_level >= 2
35-
# XXX: we disable vectorization, as this generally isn't useful for GPU targets
36-
# and actually causes issues with some back-end compilers (like Metal).
37-
# TODO: Make this not dependent on `uses_julia_runtime` (likely CPU), but it's own control
34+
if (can_vectorize(job)) && opt_level >= 2
3835
buildVectorPipeline(fpm, job, opt_level)
3936
end
4037
if isdebug(:optim)

src/ptx.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ have_fma(@nospecialize(target::PTXCompilerTarget), T::Type) = true
7878

7979
dwarf_version(target::PTXCompilerTarget) = Int32(2) # Cuda only supports dwarfv2
8080

81+
can_vectorize(job::CompilerJob{PTXCompilerTarget}) = true
82+
8183
## job
8284

8385
function Base.show(io::IO, @nospecialize(job::CompilerJob{PTXCompilerTarget}))

0 commit comments

Comments
 (0)