Skip to content

Commit 6c11dab

Browse files
committed
Emit CUDA version info in the LLVM module.
1 parent e5658e5 commit 6c11dab

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

src/ptx.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,31 @@ runtime_slug(@nospecialize(job::CompilerJob{PTXCompilerTarget})) =
8585
"-exitable=$(job.target.exitable)"
8686

8787
function process_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), mod::LLVM.Module)
88+
ctx = context(mod)
89+
8890
# calling convention
8991
if LLVM.version() >= v"8"
9092
for f in functions(mod)
9193
# JuliaGPU/GPUCompiler.jl#97
9294
#callconv!(f, LLVM.API.LLVMPTXDeviceCallConv)
9395
end
9496
end
97+
98+
# emit the device capability and ptx isa version as constants in the module. this makes
99+
# it possible to 'query' these in device code, relying on LLVM to optimize the checks
100+
# away and generate static code. note that we only do so if there's actual uses of these
101+
# variables; unconditionally creating a gvar would result in duplicate declarations.
102+
for (name, value) in ["sm_major" => job.target.cap.major,
103+
"sm_minor" => job.target.cap.minor,
104+
"ptx_major" => job.target.ptx.major,
105+
"ptx_minor" => job.target.ptx.minor]
106+
if haskey(globals(mod), name)
107+
gv = globals(mod)[name]
108+
initializer!(gv, ConstantInt(LLVM.Int32Type(ctx), value))
109+
# change the linkage so that we can inline the value
110+
linkage!(gv, LLVM.API.LLVMPrivateLinkage)
111+
end
112+
end
95113
end
96114

97115
function process_entry!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
@@ -161,6 +179,10 @@ function add_lowering_passes!(@nospecialize(job::CompilerJob{PTXCompilerTarget})
161179

162180
# even if we support `unreachable`, we still prefer `exit` to `trap`
163181
add!(pm, ModulePass("HideTrap", hide_trap!))
182+
183+
# we emit properties (of the device and ptx isa) as private global constants,
184+
# so run the optimizer so that they are inlined before the rest of the optimizer runs.
185+
global_optimizer!(pm)
164186
end
165187

166188
function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),

0 commit comments

Comments
 (0)