-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Enable .ptr .global .align attributes for kernel attributes for CUDA #114874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
a0d23db
7dbc3b7
1814093
608202f
58dc372
3f9b0fb
7eb7dbb
955d72a
4973c30
22914f7
21da083
118c10c
c8c51a0
504c119
aaade68
78ea5e5
0ef9acb
f6f64a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1600,29 +1600,37 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { | |
|
|
||
| if (isKernelFunc) { | ||
| if (PTy) { | ||
| // Special handling for pointer arguments to kernel | ||
| O << "\t.param .u" << PTySizeInBits << " "; | ||
|
|
||
| if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != | ||
| NVPTX::CUDA) { | ||
| int addrSpace = PTy->getAddressSpace(); | ||
| switch (addrSpace) { | ||
| default: | ||
| O << ".ptr "; | ||
| break; | ||
| case ADDRESS_SPACE_CONST: | ||
| O << ".ptr .const "; | ||
| break; | ||
| case ADDRESS_SPACE_SHARED: | ||
| O << ".ptr .shared "; | ||
| break; | ||
| case ADDRESS_SPACE_GLOBAL: | ||
| O << ".ptr .global "; | ||
| break; | ||
| } | ||
| Align ParamAlign = I->getParamAlign().valueOrOne(); | ||
| O << ".align " << ParamAlign.value() << " "; | ||
| int addrSpace = PTy->getAddressSpace(); | ||
| const bool IsCUDA = | ||
| static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == | ||
| NVPTX::CUDA; | ||
|
|
||
| O << ".ptr "; | ||
LewisCrawford marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| switch (addrSpace) { | ||
| default: | ||
| // Special handling for pointer arguments to kernel | ||
| // CUDA kernels assume that pointers are in global address space | ||
| // See: | ||
| // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space | ||
| if (IsCUDA) | ||
| O << " .global "; | ||
|
||
| break; | ||
| case ADDRESS_SPACE_CONST: | ||
| O << " .const "; | ||
| break; | ||
| case ADDRESS_SPACE_SHARED: | ||
| O << " .shared "; | ||
| break; | ||
| case ADDRESS_SPACE_GLOBAL: | ||
| O << " .global "; | ||
| break; | ||
| } | ||
|
|
||
| Align ParamAlign = I->getParamAlign().valueOrOne(); | ||
| if (ParamAlign != 1 || !IsCUDA) | ||
|
||
| O << ".align " << ParamAlign.value() << " "; | ||
| O << TLI->getParamName(F, paramIndex); | ||
| continue; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s | ||
| ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %} | ||
|
|
||
| %struct.Large = type { [16 x double] } | ||
|
|
||
| ; CHECK-LABEL: .entry func_align( | ||
| ; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0 | ||
| ; CHECK: .param .u64 .ptr .global .align 16 func_align_param_1 | ||
| ; CHECK: .param .u64 .ptr .global .align 16 func_align_param_2 | ||
| ; CHECK: .param .u64 .ptr .shared .align 16 func_align_param_3 | ||
| ; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4 | ||
| define void @func_align(ptr nocapture readonly align 16 %input, | ||
| ptr nocapture align 16 %out, | ||
| ptr addrspace(1) align 16 %global, | ||
| ptr addrspace(3) align 16 %shared, | ||
| ptr addrspace(4) align 16 %const) { | ||
| entry: | ||
| %0 = addrspacecast ptr %out to ptr addrspace(1) | ||
LewisCrawford marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| %1 = addrspacecast ptr %input to ptr addrspace(1) | ||
| %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5 | ||
| %tmp2 = load i32, ptr addrspace(1) %getElem, align 8 | ||
| store i32 %tmp2, ptr addrspace(1) %0, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: .entry func_noalign( | ||
| ; CHECK: .param .u64 .ptr .global func_noalign_param_0 | ||
| ; CHECK: .param .u64 .ptr .global func_noalign_param_1 | ||
| ; CHECK: .param .u64 .ptr .global func_noalign_param_2 | ||
| ; CHECK: .param .u64 .ptr .shared func_noalign_param_3 | ||
| ; CHECK: .param .u64 .ptr .const func_noalign_param_4 | ||
| define void @func_noalign(ptr nocapture readonly %input, | ||
| ptr nocapture %out, | ||
| ptr addrspace(1) %global, | ||
| ptr addrspace(3) %shared, | ||
| ptr addrspace(4) %const) { | ||
| entry: | ||
| %0 = addrspacecast ptr %out to ptr addrspace(1) | ||
| %1 = addrspacecast ptr %input to ptr addrspace(1) | ||
| %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5 | ||
| %tmp2 = load i32, ptr addrspace(1) %getElem, align 8 | ||
| store i32 %tmp2, ptr addrspace(1) %0, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| !nvvm.annotations = !{!0, !1} | ||
| !0 = !{ptr @func_align, !"kernel", i32 1} | ||
| !1 = !{ptr @func_noalign, !"kernel", i32 1} | ||
Uh oh!
There was an error while loading. Please reload this page.