Skip to content

Commit f6f64a1

Browse files
committed
Emit .align 1 by default if unspecified
Change to explicitly emit .align 1 for both CL and CUDA if alignment is unspecified. In most cases, the frontend should already specify alignment, so this should ideally not change much in practice.
1 parent 0ef9acb commit f6f64a1

File tree

3 files changed

+26
-23
lines changed

3 files changed

+26
-23
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,24 +1600,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
16001600

16011601
if (isKernelFunc) {
16021602
if (PTy) {
1603-
O << "\t.param .u" << PTySizeInBits << " .ptr ";
1604-
1605-
const unsigned AddrSpace = PTy->getAddressSpace();
1606-
if (AddrSpace != ADDRESS_SPACE_GENERIC) {
1607-
O << ".";
1608-
emitPTXAddressSpace(AddrSpace, O);
1609-
O << " ";
1603+
O << "\t.param .u" << PTySizeInBits << " .ptr";
1604+
1605+
switch (PTy->getAddressSpace()) {
1606+
default:
1607+
break;
1608+
case ADDRESS_SPACE_GLOBAL:
1609+
O << " .global";
1610+
break;
1611+
case ADDRESS_SPACE_SHARED:
1612+
O << " .shared";
1613+
break;
1614+
case ADDRESS_SPACE_CONST:
1615+
O << " .const";
1616+
break;
1617+
case ADDRESS_SPACE_LOCAL:
1618+
O << " .local";
1619+
break;
16101620
}
16111621

1612-
const bool IsCUDA =
1613-
static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
1614-
NVPTX::CUDA;
1615-
1616-
MaybeAlign ParamAlign = I->getParamAlign();
1617-
if (ParamAlign.has_value() || !IsCUDA)
1618-
O << ".align " << ParamAlign.valueOrOne().value() << " ";
1619-
1620-
O << TLI->getParamName(F, paramIndex);
1622+
O << " .align " << I->getParamAlign().valueOrOne().value();
1623+
O << " " << TLI->getParamName(F, paramIndex);
16211624
continue;
16221625
}
16231626

llvm/test/CodeGen/NVPTX/i1-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
88

99
; CHECK: .entry foo
1010
; CHECK: .param .u8 foo_param_0
11-
; CHECK: .param .u64 .ptr foo_param_1
11+
; CHECK: .param .u64 .ptr .align 1 foo_param_1
1212
define void @foo(i1 %p, ptr %out) {
1313
%val = zext i1 %p to i32
1414
store i32 %val, ptr %out

llvm/test/CodeGen/NVPTX/kernel-param-align.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ entry:
2121
}
2222

2323
; CHECK-LABEL: .entry func_noalign(
24-
; CHECK: .param .u64 .ptr func_noalign_param_0
25-
; CHECK: .param .u64 .ptr func_noalign_param_1
26-
; CHECK: .param .u64 .ptr .global func_noalign_param_2
27-
; CHECK: .param .u64 .ptr .shared func_noalign_param_3
28-
; CHECK: .param .u64 .ptr .const func_noalign_param_4
29-
; CHECK: .param .u64 .ptr .local func_noalign_param_5
24+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_0
25+
; CHECK: .param .u64 .ptr .align 1 func_noalign_param_1
26+
; CHECK: .param .u64 .ptr .global .align 1 func_noalign_param_2
27+
; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
28+
; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
29+
; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
3030
define void @func_noalign(ptr nocapture readonly %input,
3131
ptr nocapture %out,
3232
ptr addrspace(1) %global,

0 commit comments

Comments
 (0)