Skip to content

Commit c67fc1c

Browse files
committed
[NVPTX] Add errors for incorrect CUDA addrpaces
The CUDA API only accepts kernel params in the global and generic address spaces, so display an error message when attempting to emit pointers outside those address-spaces from CUDA (but still allow them for OpenCL).
1 parent 2dd296a commit c67fc1c

File tree

5 files changed

+41
-25
lines changed

5 files changed

+41
-25
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,19 +1399,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
13991399
if (PTy) {
14001400
O << "\t.param .u" << PTySizeInBits << " .ptr";
14011401

1402+
bool IsCUDA = static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
1403+
NVPTX::CUDA;
14021404
switch (PTy->getAddressSpace()) {
14031405
default:
14041406
break;
14051407
case ADDRESS_SPACE_GLOBAL:
14061408
O << " .global";
14071409
break;
14081410
case ADDRESS_SPACE_SHARED:
1411+
if (IsCUDA)
1412+
report_fatal_error(".shared ptr kernel args unsupported in CUDA.");
14091413
O << " .shared";
14101414
break;
14111415
case ADDRESS_SPACE_CONST:
1416+
if (IsCUDA)
1417+
report_fatal_error(".const ptr kernel args unsupported in CUDA.");
14121418
O << " .const";
14131419
break;
14141420
case ADDRESS_SPACE_LOCAL:
1421+
if (IsCUDA)
1422+
report_fatal_error(".local ptr kernel args unsupported in CUDA.");
14151423
O << " .local";
14161424
break;
14171425
}

llvm/test/CodeGen/NVPTX/kernel-param-align.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
2-
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
1+
; RUN: llc < %s -mcpu=sm_60 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
3+
target triple = "nvptx64-nvidia-nvcl"
34

45
%struct.Large = type { [16 x double] }
56

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: not --crash llc < %s -mcpu=sm_75 -o /dev/null 2>&1 | FileCheck %s
2+
3+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
; Make sure we exit with an error message for this input, as pointers to the
7+
; shared address-space are only supported as kernel args in NVCL, not CUDA.
8+
; CHECK: .shared ptr kernel args unsupported in CUDA.
9+
define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
10+
%v = load i32, ptr addrspace(3) %in, align 4
11+
store i32 %v, ptr addrspace(1) %out, align 4
12+
ret void
13+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; RUN: opt < %s -S -nvptx-lower-args | FileCheck %s --check-prefixes COMMON,IR
2+
; RUN: llc < %s -mcpu=sm_20 | FileCheck %s --check-prefixes COMMON,PTX
3+
; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
4+
5+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
6+
target triple = "nvptx64-nvidia-nvcl"
7+
8+
; COMMON-LABEL: ptr_nongeneric
9+
define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
10+
; IR-NOT: addrspacecast
11+
; PTX-NOT: cvta.to.global
12+
; PTX: ld.shared.u32
13+
; PTX st.global.u32
14+
%v = load i32, ptr addrspace(3) %in, align 4
15+
store i32 %v, ptr addrspace(1) %out, align 4
16+
ret void
17+
}

llvm/test/CodeGen/NVPTX/lower-args.ll

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -140,29 +140,6 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
140140
ret void
141141
}
142142

143-
define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
144-
; IR-LABEL: define ptx_kernel void @ptr_nongeneric(
145-
; IR-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) [[IN:%.*]]) {
146-
; IR-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) [[IN]], align 4
147-
; IR-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
148-
; IR-NEXT: ret void
149-
;
150-
; PTX-LABEL: ptr_nongeneric(
151-
; PTX: {
152-
; PTX-NEXT: .reg .b32 %r<2>;
153-
; PTX-NEXT: .reg .b64 %rd<3>;
154-
; PTX-EMPTY:
155-
; PTX-NEXT: // %bb.0:
156-
; PTX-NEXT: ld.param.u64 %rd1, [ptr_nongeneric_param_0];
157-
; PTX-NEXT: ld.param.u64 %rd2, [ptr_nongeneric_param_1];
158-
; PTX-NEXT: ld.shared.u32 %r1, [%rd2];
159-
; PTX-NEXT: st.global.u32 [%rd1], %r1;
160-
; PTX-NEXT: ret;
161-
%v = load i32, ptr addrspace(3) %in, align 4
162-
store i32 %v, ptr addrspace(1) %out, align 4
163-
ret void
164-
}
165-
166143
define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
167144
; IRC-LABEL: define ptx_kernel void @ptr_as_int(
168145
; IRC-SAME: i64 noundef [[I:%.*]], i32 noundef [[V:%.*]]) {

0 commit comments

Comments
 (0)