Skip to content

Commit b46607b

Browse files
committed
[CHERI] Fix Bounded Mem/Var Args layout on stack
1 parent 2f07835 commit b46607b

File tree

4 files changed

+79
-95
lines changed

4 files changed

+79
-95
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 38 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "llvm/ADT/Statistic.h"
2525
#include "llvm/Analysis/MemoryLocation.h"
2626
#include "llvm/Analysis/VectorUtils.h"
27-
#include "llvm/CHERI/cheri-compressed-cap/cheri_compressed_cap.h"
2827
#include "llvm/CodeGen/MachineFrameInfo.h"
2928
#include "llvm/CodeGen/MachineFunction.h"
3029
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -6511,7 +6510,11 @@ SDValue RISCVTargetLowering::lowerVASTARTCap(SDValue Op, SelectionDAG &DAG) cons
65116510
MachinePointerInfo::getStack(MF, 0), Align(PtrSize));
65126511
SDValue Chain = VarPtr.getOperand(0);
65136512
if (UseBoundedMemArgsCallee) {
6514-
uint64_t PermMask = -1UL & ~(CAP_AP_X | CAP_AP_W);
6513+
uint64_t ExecPerm =
6514+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 17) : (1 << 1);
6515+
uint64_t WritePerm =
6516+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 0) : (1 << 3);
6517+
uint64_t PermMask = -1UL & ~(ExecPerm | WritePerm);
65156518
VarPtr = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, PtrVT,
65166519
DAG.getConstant(Intrinsic::cheri_cap_perms_and, DL,
65176520
Subtarget.getXLenVT()),
@@ -15234,8 +15237,10 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1523415237
? Subtarget.typeForCapabilities()
1523515238
: MVT();
1523615239
MVT PtrVT = DL.isFatPointer(DL.getAllocaAddrSpace()) ? CLenVT : XLenVT;
15237-
bool IsPureCapVarArgs = !IsFixed && RISCVABI::isCheriPureCapABI(ABI);
15240+
bool IsPureCap = RISCVABI::isCheriPureCapABI(ABI);
15241+
bool IsPureCapVarArgs = !IsFixed && IsPureCap;
1523815242
bool IsBoundedVarArgs = IsPureCapVarArgs && Subtarget.hasCheriBoundVarArg();
15243+
unsigned SlotSize = PtrVT.getFixedSizeInBits() / 8;
1523915244

1524015245
// Static chain parameter must not be passed in normal argument registers,
1524115246
// so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
@@ -15309,7 +15314,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1530915314
// not apply.
1531015315
// TODO: Pure capability varargs bounds
1531115316
unsigned TwoXLenInBytes = (2 * XLen) / 8;
15312-
if (!IsFixed && !RISCVABI::isCheriPureCapABI(ABI) &&
15317+
if (!IsFixed && !IsPureCap &&
1531315318
ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
1531415319
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1531515320
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
@@ -15325,38 +15330,10 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1532515330
assert(PendingLocs.size() == PendingArgFlags.size() &&
1532615331
"PendingLocs and PendingArgFlags out of sync");
1532715332

15328-
// Bounded VarArgs
15329-
// Each bounded varargs is assigned a 2*XLen slot on the stack
15330-
// If the value is small enough to fit into the slot it is passed
15331-
// directly - otherwise a capability to the value is filled into the
15332-
// slot.
15333-
if (!IsFixed && IsBoundedVarArgs) {
15334-
unsigned SlotSize = CLenVT.getFixedSizeInBits() / 8;
15335-
// Aggregates of size 2*XLen need special handling here
15336-
// as LLVM with treat them as two separate XLen wide arguments
15337-
if(LocVT == XLenVT && OrigTy && OrigTy->isAggregateType()){
15338-
PendingLocs.push_back(
15339-
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
15340-
PendingArgFlags.push_back(ArgFlags);
15341-
if(PendingLocs.size() == 2){
15342-
CCValAssign VA = PendingLocs[0];
15343-
ISD::ArgFlagsTy AF = PendingArgFlags[0];
15344-
PendingLocs.clear();
15345-
PendingArgFlags.clear();
15346-
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF,
15347-
ValNo, ValVT, LocVT, ArgFlags);
15348-
}
15349-
return false;
15350-
}
15351-
unsigned StackOffset = State.AllocateStack(SlotSize, Align(SlotSize));
15352-
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
15353-
return false;
15354-
}
15355-
1535615333
// Handle passing f64 on RV32D with a soft float ABI or when floating point
1535715334
// registers are exhausted. Also handle for pure capability varargs which are
1535815335
// always passed on the stack.
15359-
if ((UseGPRForF64 || IsPureCapVarArgs) && XLen == 32 && ValVT == MVT::f64) {
15336+
if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1536015337
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1536115338
"Can't lower f64 if it is split");
1536215339
// Depending on available argument GPRS, f64 may be passed in a pair of
@@ -15366,7 +15343,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1536615343
Register Reg = IsPureCapVarArgs ? 0 : State.AllocateReg(ArgGPRs);
1536715344
LocVT = MVT::i32;
1536815345
if (!Reg) {
15369-
unsigned StackOffset = State.AllocateStack(8, Align(8));
15346+
unsigned StackOffset =
15347+
IsBoundedVarArgs ? State.AllocateStack(SlotSize, Align(SlotSize))
15348+
: State.AllocateStack(8, Align(8));
1537015349
State.addLoc(
1537115350
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1537215351
return false;
@@ -15407,8 +15386,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1540715386
ISD::ArgFlagsTy AF = PendingArgFlags[0];
1540815387
PendingLocs.clear();
1540915388
PendingArgFlags.clear();
15410-
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF,
15411-
ValNo, ValVT, LocVT, ArgFlags);
15389+
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF, ValNo,
15390+
ValVT, LocVT, ArgFlags);
1541215391
}
1541315392

1541415393
// Will be passed indirectly; make sure we allocate the right type of
@@ -15461,8 +15440,17 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1546115440
Reg = State.AllocateReg(ArgGPRs);
1546215441
}
1546315442

15443+
// Aggregate types i.e. structs/arrays which can fit into 2*XLEN
15444+
// Don't allocate a slot for each instead we make sure that the next element
15445+
// is then properly aligned.
15446+
bool AllocateSlot = IsBoundedVarArgs;
15447+
if (OrigTy && OrigTy->isAggregateType())
15448+
AllocateSlot = false;
1546415449
unsigned StackOffset =
15465-
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
15450+
Reg ? 0
15451+
: (AllocateSlot
15452+
? State.AllocateStack(SlotSize, Align(SlotSize))
15453+
: State.AllocateStack(StoreSizeBytes, StackAlign));
1546615454

1546715455
// If we reach this point and PendingLocs is non-empty, we must be at the
1546815456
// end of a split argument that must be passed indirectly.
@@ -16339,7 +16327,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1633916327

1634016328
// Bookkeeping for cheri varargs/memargs
1634116329
int VAArgStartOffset, VAArgEndOffset, MemArgStartOffset, MemArgEndOffset;
16342-
SDValue FirstAddr, FirstArgAddr;
16330+
SDValue FirstVAAddr, FirstArgAddr;
1634316331

1634416332
// Copy argument values to their designated locations.
1634516333
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
@@ -16466,34 +16454,23 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1646616454
SDValue Address =
1646716455
DAG.getPointerAdd(DL, StackPtr, VA.getLocMemOffset());
1646816456

16469-
if (UseBoundeMemArgsCaller) {
16457+
if (UseBoundeMemArgsCaller && Outs[i].IsFixed) {
1647016458
if (FirstArgAddr == SDValue()) {
1647116459
FirstArgAddr = Address;
1647216460
MemArgStartOffset = VA.getLocMemOffset();
1647316461
}
1647416462
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
1647516463
MemArgEndOffset = VA.getLocMemOffset() + VTSize;
16476-
if (!Outs[i].IsFixed) {
16477-
// we need to align to 16-byte slot
16478-
Align OffsetAlign = Align(PtrLenBytes);
16479-
Type *OrigTy = CLI.getArgs()[Outs[i].OrigArgIndex].Ty;
16480-
if (OrigTy && OrigTy->isAggregateType())
16481-
OffsetAlign = Align(PtrLenBytes / 2);
16482-
MemArgEndOffset = alignTo(MemArgEndOffset, OffsetAlign);
16483-
}
1648416464
}
1648516465
if (UseBoundedVarArgs && !Outs[i].IsFixed) {
16486-
if (FirstAddr == SDValue()) {
16487-
FirstAddr = Address;
16466+
if (FirstVAAddr == SDValue()) {
16467+
FirstVAAddr = Address;
1648816468
VAArgStartOffset = VA.getLocMemOffset();
1648916469
}
1649016470
Align OffsetAlign = Align(PtrLenBytes);
16491-
Type *OrigTy = CLI.getArgs()[Outs[i].OrigArgIndex].Ty;
16492-
if (OrigTy && OrigTy->isAggregateType())
16493-
OffsetAlign = Align(PtrLenBytes / 2);
16494-
1649516471
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
1649616472
VAArgEndOffset = alignTo(VA.getLocMemOffset() + VTSize, OffsetAlign);
16473+
MemArgEndOffset = VAArgEndOffset;
1649716474
}
1649816475

1649916476
// Emit the store.
@@ -16503,15 +16480,19 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1650316480
}
1650416481

1650516482
if(IsVarArg && UseBoundedVarArgs && !UseBoundeMemArgsCaller) {
16506-
if (FirstAddr != SDValue()) {
16483+
if (FirstVAAddr != SDValue()) {
1650716484
SDValue VarArgs = DAG.getCSetBounds(
16508-
FirstAddr, DL, VAArgEndOffset - VAArgStartOffset, Align(),
16485+
FirstVAAddr, DL, VAArgEndOffset - VAArgStartOffset, Align(),
1650916486
"CHERI-RISCV variadic call lowering",
1651016487
cheri::SetBoundsPointerSource::Stack, "varargs call bounds setting");
1651116488
// clear write and execute permissions on varargs. Clearning other
1651216489
// permissions shouldn't be necessary since the capability is derived from
1651316490
// CSP and that shouldn't have these in the first place.
16514-
uint64_t PermMask = -1UL & ~(CAP_AP_X | CAP_AP_W);
16491+
uint64_t ExecPerm =
16492+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 17) : (1 << 1);
16493+
uint64_t WritePerm =
16494+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 0) : (1 << 3);
16495+
uint64_t PermMask = -1UL & ~(ExecPerm | WritePerm);
1651516496
VarArgs = DAG.getNode(
1651616497
ISD::INTRINSIC_WO_CHAIN, DL, PtrVT,
1651716498
DAG.getConstant(Intrinsic::cheri_cap_perms_and, DL, XLenVT), VarArgs,
@@ -16535,7 +16516,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1653516516
std::make_pair(RISCVABI::getCheriBoundedArgReg(), MemArgs));
1653616517
} else {
1653716518
bool ShouldClearArgReg = IsVarArg;
16538-
if (!ShouldClearArgReg && UseBoundeMemArgsCallee) {
16519+
if (!ShouldClearArgReg && !UseBoundeMemArgsCallee) {
1653916520
auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
1654016521
ShouldClearArgReg = !G || !G->getGlobal()->hasInternalLinkage();
1654116522
}

llvm/test/CodeGen/RISCV/cheri/bakewell/cheri-bounded-memargs-caller.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ define i32 @baf(i32 %z, i32 %u, i32 %v, i32 %w, i32 %t1, i32 %t2, i32 %t3, ptr a
4949
; CHECK-NEXT: sc cs1, 32(csp) # 16-byte Folded Spill
5050
; CHECK-NEXT: caddi ca0, csp, 16
5151
; CHECK-NEXT: scbndsi ca0, ca0, 16
52-
; CHECK-NEXT: li a1, -11
52+
; CHECK-NEXT: lui a1, 1048544
53+
; CHECK-NEXT: addiw a1, a1, -2
5354
; CHECK-NEXT: acperm ca1, ct1, a1
5455
; CHECK-NEXT: sc ca1, 0(ca0)
5556
; CHECK-NEXT: lc ca0, 16(csp)
@@ -94,7 +95,8 @@ define i32 @bb([4 x float] %f1.coerce, [4 x float] %f2.coerce, [4 x float] %f3.c
9495
; CHECK-NEXT: caddi csp, csp, -32
9596
; CHECK-NEXT: caddi ca2, csp, 16
9697
; CHECK-NEXT: scbndsi ca2, ca2, 16
97-
; CHECK-NEXT: li a4, -11
98+
; CHECK-NEXT: lui a4, 1048544
99+
; CHECK-NEXT: addiw a4, a4, -2
98100
; CHECK-NEXT: acperm ca4, ct1, a4
99101
; CHECK-NEXT: sc ca4, 0(ca2)
100102
; CHECK-NEXT: sc ct1, 0(csp)
@@ -146,7 +148,8 @@ define i32 @biz() local_unnamed_addr addrspace(200) nounwind {
146148
; CHECK-NEXT: sd a0, 16(csp)
147149
; CHECK-NEXT: li a1, 32
148150
; CHECK-NEXT: scbndsr ca1, csp, a1
149-
; CHECK-NEXT: li a2, -11
151+
; CHECK-NEXT: lui a2, 1048544
152+
; CHECK-NEXT: addiw a2, a2, -2
150153
; CHECK-NEXT: acperm ct1, ca1, a2
151154
; CHECK-NEXT: li a1, 1
152155
; CHECK-NEXT: li a2, 2

llvm/test/CodeGen/RISCV/cheri/bakewell/cheri-bounded-memargs.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,9 @@ define dso_local signext i32 @foo(i32 noundef signext %a, i32 noundef signext %b
1313
; CHECK-NEXT: sc cs1, 0(csp) # 16-byte Folded Spill
1414
; CHECK-NEXT: lc cs0, 16(ct1)
1515
; CHECK-NEXT: mv a0, a1
16-
; CHECK-NEXT: cmv ct1, cnull
1716
; CHECK-NEXT: call bar
1817
; CHECK-NEXT: mv s1, a0
1918
; CHECK-NEXT: cmv ca0, cs0
20-
; CHECK-NEXT: cmv ct1, cnull
2119
; CHECK-NEXT: call baz
2220
; CHECK-NEXT: addw a0, a0, s1
2321
; CHECK-NEXT: lc cra, 32(csp) # 16-byte Folded Reload
@@ -56,21 +54,19 @@ define dso_local signext i32 @baf(i32 noundef signext %a, i32 noundef signext %b
5654
; CHECK-NEXT: ld s1, 0(ct1)
5755
; CHECK-NEXT: caddi ca1, csp, 16
5856
; CHECK-NEXT: scbndsi ca1, ca1, 16
59-
; CHECK-NEXT: li a2, -11
57+
; CHECK-NEXT: lui a2, 1048544
58+
; CHECK-NEXT: addiw a2, a2, -2
6059
; CHECK-NEXT: acperm ca0, ca0, a2
6160
; CHECK-NEXT: sc ca0, 0(ca1)
6261
; CHECK-NEXT: lc ca0, 16(csp)
6362
; CHECK-NEXT: caddi ca1, ca0, 16
6463
; CHECK-NEXT: sc ca1, 16(csp)
6564
; CHECK-NEXT: lc ca0, 0(ca0)
66-
; CHECK-NEXT: cmv ct1, cnull
6765
; CHECK-NEXT: call try
6866
; CHECK-NEXT: mv a0, s1
69-
; CHECK-NEXT: cmv ct1, cnull
7067
; CHECK-NEXT: call bar
7168
; CHECK-NEXT: mv s1, a0
7269
; CHECK-NEXT: cmv ca0, cs0
73-
; CHECK-NEXT: cmv ct1, cnull
7470
; CHECK-NEXT: call baz
7571
; CHECK-NEXT: addw a0, a0, s1
7672
; CHECK-NEXT: lc cra, 64(csp) # 16-byte Folded Reload
@@ -102,7 +98,8 @@ define i32 @bb([4 x float] %f1.coerce, [4 x float] %f2.coerce, [4 x float] %f3.c
10298
; CHECK-NEXT: .cfi_def_cfa_offset 32
10399
; CHECK-NEXT: caddi ca2, csp, 16
104100
; CHECK-NEXT: scbndsi ca2, ca2, 16
105-
; CHECK-NEXT: li a4, -11
101+
; CHECK-NEXT: lui a4, 1048544
102+
; CHECK-NEXT: addiw a4, a4, -2
106103
; CHECK-NEXT: acperm ca4, ct1, a4
107104
; CHECK-NEXT: sc ca4, 0(ca2)
108105
; CHECK-NEXT: sc ct1, 0(csp)
@@ -210,7 +207,8 @@ define i32 @f(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i3
210207
; CHECK-NEXT: lw a1, 0(ct1)
211208
; CHECK-NEXT: caddi ca2, csp, 16
212209
; CHECK-NEXT: scbndsi ca2, ca2, 16
213-
; CHECK-NEXT: li a3, -11
210+
; CHECK-NEXT: lui a3, 1048544
211+
; CHECK-NEXT: addiw a3, a3, -2
214212
; CHECK-NEXT: acperm ca0, ca0, a3
215213
; CHECK-NEXT: sc ca0, 0(ca2)
216214
; CHECK-NEXT: lc ca0, 16(csp)
@@ -335,7 +333,8 @@ define i32 @tryhitassert(i32, i32, i32, i32, i32, i32, i32, i32, i32 %memarg, ..
335333
; CHECK-NEXT: lw a1, 0(ct1)
336334
; CHECK-NEXT: caddi ca2, csp, 16
337335
; CHECK-NEXT: scbndsi ca2, ca2, 16
338-
; CHECK-NEXT: li a3, -11
336+
; CHECK-NEXT: lui a3, 1048544
337+
; CHECK-NEXT: addiw a3, a3, -2
339338
; CHECK-NEXT: acperm ca0, ca0, a3
340339
; CHECK-NEXT: sc ca0, 0(ca2)
341340
; CHECK-NEXT: lc ca0, 16(csp)

llvm/test/CodeGen/RISCV/cheri/purecap-bounded-varargs.ll

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
define i32 @caller_test_scalars(i32 %x, i128 %y, i64 %z, float %f, double %d) local_unnamed_addr addrspace(200) nounwind {
77
; CHECK-LABEL: caller_test_scalars:
88
; CHECK: # %bb.0: # %entry
9-
; CHECK-NEXT: cincoffset csp, csp, -192
10-
; CHECK-NEXT: sc cra, 176(csp) # 16-byte Folded Spill
11-
; CHECK-NEXT: sc cs0, 160(csp) # 16-byte Folded Spill
12-
; CHECK-NEXT: sc cs1, 144(csp) # 16-byte Folded Spill
13-
; CHECK-NEXT: sc cs2, 128(csp) # 16-byte Folded Spill
14-
; CHECK-NEXT: sc cs3, 112(csp) # 16-byte Folded Spill
15-
; CHECK-NEXT: sc cs4, 96(csp) # 16-byte Folded Spill
9+
; CHECK-NEXT: cincoffset csp, csp, -176
10+
; CHECK-NEXT: sc cra, 160(csp) # 16-byte Folded Spill
11+
; CHECK-NEXT: sc cs0, 144(csp) # 16-byte Folded Spill
12+
; CHECK-NEXT: sc cs1, 128(csp) # 16-byte Folded Spill
13+
; CHECK-NEXT: sc cs2, 112(csp) # 16-byte Folded Spill
14+
; CHECK-NEXT: sc cs3, 96(csp) # 16-byte Folded Spill
15+
; CHECK-NEXT: sc cs4, 80(csp) # 16-byte Folded Spill
1616
; CHECK-NEXT: mv s0, a5
1717
; CHECK-NEXT: mv s1, a3
1818
; CHECK-NEXT: mv s2, a2
@@ -22,24 +22,24 @@ define i32 @caller_test_scalars(i32 %x, i128 %y, i64 %z, float %f, double %d) lo
2222
; CHECK-NEXT: srli a0, a0, 32
2323
; CHECK-NEXT: call __extendsfdf2
2424
; CHECK-NEXT: mv a1, a0
25-
; CHECK-NEXT: sd s0, 80(csp)
26-
; CHECK-NEXT: sd s1, 48(csp)
27-
; CHECK-NEXT: sd s2, 32(csp)
25+
; CHECK-NEXT: sd s0, 64(csp)
26+
; CHECK-NEXT: sd s1, 32(csp)
27+
; CHECK-NEXT: sd s2, 24(csp)
2828
; CHECK-NEXT: sd s3, 16(csp)
2929
; CHECK-NEXT: sd s4, 0(csp)
30-
; CHECK-NEXT: csetbounds ca0, csp, 96
30+
; CHECK-NEXT: csetbounds ca0, csp, 80
3131
; CHECK-NEXT: li a2, -11
3232
; CHECK-NEXT: candperm ct1, ca0, a2
3333
; CHECK-NEXT: li a0, 5
34-
; CHECK-NEXT: sd a1, 64(csp)
34+
; CHECK-NEXT: sd a1, 48(csp)
3535
; CHECK-NEXT: call callee
36-
; CHECK-NEXT: lc cra, 176(csp) # 16-byte Folded Reload
37-
; CHECK-NEXT: lc cs0, 160(csp) # 16-byte Folded Reload
38-
; CHECK-NEXT: lc cs1, 144(csp) # 16-byte Folded Reload
39-
; CHECK-NEXT: lc cs2, 128(csp) # 16-byte Folded Reload
40-
; CHECK-NEXT: lc cs3, 112(csp) # 16-byte Folded Reload
41-
; CHECK-NEXT: lc cs4, 96(csp) # 16-byte Folded Reload
42-
; CHECK-NEXT: cincoffset csp, csp, 192
36+
; CHECK-NEXT: lc cra, 160(csp) # 16-byte Folded Reload
37+
; CHECK-NEXT: lc cs0, 144(csp) # 16-byte Folded Reload
38+
; CHECK-NEXT: lc cs1, 128(csp) # 16-byte Folded Reload
39+
; CHECK-NEXT: lc cs2, 112(csp) # 16-byte Folded Reload
40+
; CHECK-NEXT: lc cs3, 96(csp) # 16-byte Folded Reload
41+
; CHECK-NEXT: lc cs4, 80(csp) # 16-byte Folded Reload
42+
; CHECK-NEXT: cincoffset csp, csp, 176
4343
; CHECK-NEXT: ret
4444
entry:
4545
%conv = fpext float %f to double
@@ -75,21 +75,22 @@ entry:
7575
define i32 @caller_test_struct(i32 %x, [2 x float] %y.coerce, i32 %z, { i8 addrspace(200)*, i64} %u.coerce) local_unnamed_addr addrspace(200) nounwind {
7676
; CHECK-LABEL: caller_test_struct:
7777
; CHECK: # %bb.0: # %entry
78-
; CHECK-NEXT: cincoffset csp, csp, -80
79-
; CHECK-NEXT: sc cra, 64(csp) # 16-byte Folded Spill
80-
; CHECK-NEXT: mv a5, a0
78+
; CHECK-NEXT: cincoffset csp, csp, -96
79+
; CHECK-NEXT: sc cra, 80(csp) # 16-byte Folded Spill
80+
; CHECK-NEXT: mv a6, a0
81+
; CHECK-NEXT: sd a5, 64(csp)
8182
; CHECK-NEXT: sc ca4, 48(csp)
8283
; CHECK-NEXT: sd a3, 32(csp)
8384
; CHECK-NEXT: sd a2, 24(csp)
8485
; CHECK-NEXT: sd a1, 16(csp)
85-
; CHECK-NEXT: csetbounds ca0, csp, 64
86+
; CHECK-NEXT: csetbounds ca0, csp, 80
8687
; CHECK-NEXT: li a1, -11
8788
; CHECK-NEXT: candperm ct1, ca0, a1
8889
; CHECK-NEXT: li a0, 3
89-
; CHECK-NEXT: sd a5, 0(csp)
90+
; CHECK-NEXT: sd a6, 0(csp)
9091
; CHECK-NEXT: call callee
91-
; CHECK-NEXT: lc cra, 64(csp) # 16-byte Folded Reload
92-
; CHECK-NEXT: cincoffset csp, csp, 80
92+
; CHECK-NEXT: lc cra, 80(csp) # 16-byte Folded Reload
93+
; CHECK-NEXT: cincoffset csp, csp, 96
9394
; CHECK-NEXT: ret
9495
entry:
9596
%call = tail call i32 (i32, ...) @callee(i32 3, i32 %x, [2 x float] %y.coerce, i32 %z, { i8 addrspace(200)*, i64} %u.coerce) nounwind

0 commit comments

Comments
 (0)