Skip to content

Commit 850f0be

Browse files
committed
[NVPTX] Fix vaarg store alignment
Existing vaargs.ll test has an issue with inconsistent alignment used for store and load of vaarg. In fact PTX code from this test will not work correctly. This patch applies additional alignment check into LowerCall to avoid this discreapancy.
1 parent 27e78e6 commit 850f0be

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
16071607
StoreOperands.push_back(
16081608
DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
16091609

1610+
if (!IsByVal && IsVAArg) {
1611+
// Align each part of the variadic argument to their type.
1612+
VAOffset = alignTo(VAOffset, DL.getABITypeAlign(
1613+
EltVT.getTypeForEVT(*DAG.getContext())));
1614+
}
1615+
16101616
StoreOperands.push_back(DAG.getConstant(
16111617
IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
16121618
dl, MVT::i32));

llvm/test/CodeGen/NVPTX/vaargs.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,12 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
8989
; CHECK-NEXT: ld.param.u32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0];
9090

9191
; Store arguments to an array
92-
; CHECK32: .param .align 8 .b8 param1[24];
93-
; CHECK64: .param .align 8 .b8 param1[28];
92+
; CHECK32: .param .align 8 .b8 param1[28];
93+
; CHECK64: .param .align 8 .b8 param1[32];
9494
; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]];
95-
; CHECK-NEXT: st.param.b64 [param1+4], [[ARG_I64]];
96-
; CHECK-NEXT: st.param.f64 [param1+12], [[ARG_DOUBLE]];
97-
; CHECK-NEXT: st.param.b[[BITS]] [param1+20], [[ARG_VOID_PTR]];
95+
; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]];
96+
; CHECK-NEXT: st.param.f64 [param1+16], [[ARG_DOUBLE]];
97+
; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
9898
; CHECK-NEXT: .param .b32 retval0;
9999
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
100100

0 commit comments

Comments
 (0)