Skip to content

Commit 8c4889e

Browse files
author
git apple-llvm automerger
committed
Merge commit '6d058317e60c' from llvm.org/main into next
2 parents 4886b7e + 6d05831 commit 8c4889e

File tree

3 files changed

+63
-24
lines changed

3 files changed

+63
-24
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,30 +1600,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
16001600

16011601
if (isKernelFunc) {
16021602
if (PTy) {
1603-
// Special handling for pointer arguments to kernel
1604-
O << "\t.param .u" << PTySizeInBits << " ";
1605-
1606-
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1607-
NVPTX::CUDA) {
1608-
int addrSpace = PTy->getAddressSpace();
1609-
switch (addrSpace) {
1610-
default:
1611-
O << ".ptr ";
1612-
break;
1613-
case ADDRESS_SPACE_CONST:
1614-
O << ".ptr .const ";
1615-
break;
1616-
case ADDRESS_SPACE_SHARED:
1617-
O << ".ptr .shared ";
1618-
break;
1619-
case ADDRESS_SPACE_GLOBAL:
1620-
O << ".ptr .global ";
1621-
break;
1622-
}
1623-
Align ParamAlign = I->getParamAlign().valueOrOne();
1624-
O << ".align " << ParamAlign.value() << " ";
1603+
O << "\t.param .u" << PTySizeInBits << " .ptr";
1604+
1605+
switch (PTy->getAddressSpace()) {
1606+
default:
1607+
break;
1608+
case ADDRESS_SPACE_GLOBAL:
1609+
O << " .global";
1610+
break;
1611+
case ADDRESS_SPACE_SHARED:
1612+
O << " .shared";
1613+
break;
1614+
case ADDRESS_SPACE_CONST:
1615+
O << " .const";
1616+
break;
1617+
case ADDRESS_SPACE_LOCAL:
1618+
O << " .local";
1619+
break;
16251620
}
1626-
O << TLI->getParamName(F, paramIndex);
1621+
1622+
O << " .align " << I->getParamAlign().valueOrOne().value();
1623+
O << " " << TLI->getParamName(F, paramIndex);
16271624
continue;
16281625
}
16291626

llvm/test/CodeGen/NVPTX/i1-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
88

99
; CHECK: .entry foo
1010
; CHECK: .param .u8 foo_param_0
11-
; CHECK: .param .u64 foo_param_1
11+
; CHECK: .param .u64 .ptr .align 1 foo_param_1
1212
define void @foo(i1 %p, ptr %out) {
1313
%val = zext i1 %p to i32
1414
store i32 %val, ptr %out
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
3+
4+
%struct.Large = type { [16 x double] }
5+
6+
; CHECK-LABEL: .entry func_align(
7+
; CHECK: .param .u64 .ptr .align 1 func_align_param_0
8+
; CHECK: .param .u64 .ptr .align 2 func_align_param_1
9+
; CHECK: .param .u64 .ptr .global .align 4 func_align_param_2
10+
; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3
11+
; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
12+
; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5
13+
define void @func_align(ptr nocapture readonly align 1 %input,
14+
ptr nocapture align 2 %out,
15+
ptr addrspace(1) align 4 %global,
16+
ptr addrspace(3) align 8 %shared,
17+
ptr addrspace(4) align 16 %const,
18+
ptr addrspace(5) align 32 %local) {
19+
entry:
20+
ret void
21+
}
22+
23+
; CHECK-LABEL: .entry func_noalign(
24+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_0
25+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_1
26+
; CHECK: .param .u64 .ptr .global .align 1 func_noalign_param_2
27+
; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
28+
; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
29+
; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
30+
define void @func_noalign(ptr nocapture readonly %input,
31+
ptr nocapture %out,
32+
ptr addrspace(1) %global,
33+
ptr addrspace(3) %shared,
34+
ptr addrspace(4) %const,
35+
ptr addrspace(5) %local) {
36+
entry:
37+
ret void
38+
}
39+
40+
!nvvm.annotations = !{!0, !1}
41+
!0 = !{ptr @func_align, !"kernel", i32 1}
42+
!1 = !{ptr @func_noalign, !"kernel", i32 1}

0 commit comments

Comments
 (0)