Skip to content

Commit ee9fc15

Browse files
committed
[CHERI] Fix Bounded Mem/Var Args layout on stack
1 parent 8581eb8 commit ee9fc15

File tree

3 files changed

+61
-81
lines changed

3 files changed

+61
-81
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -15234,8 +15234,10 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1523415234
? Subtarget.typeForCapabilities()
1523515235
: MVT();
1523615236
MVT PtrVT = DL.isFatPointer(DL.getAllocaAddrSpace()) ? CLenVT : XLenVT;
15237-
bool IsPureCapVarArgs = !IsFixed && RISCVABI::isCheriPureCapABI(ABI);
15237+
bool IsPureCap = RISCVABI::isCheriPureCapABI(ABI);
15238+
bool IsPureCapVarArgs = !IsFixed && IsPureCap;
1523815239
bool IsBoundedVarArgs = IsPureCapVarArgs && Subtarget.hasCheriBoundVarArg();
15240+
unsigned SlotSize = PtrVT.getFixedSizeInBits() / 8;
1523915241

1524015242
// Static chain parameter must not be passed in normal argument registers,
1524115243
// so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
@@ -15309,7 +15311,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1530915311
// not apply.
1531015312
// TODO: Pure capability varargs bounds
1531115313
unsigned TwoXLenInBytes = (2 * XLen) / 8;
15312-
if (!IsFixed && !RISCVABI::isCheriPureCapABI(ABI) &&
15314+
if (!IsFixed && !IsPureCap &&
1531315315
ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
1531415316
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1531515317
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
@@ -15325,38 +15327,10 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1532515327
assert(PendingLocs.size() == PendingArgFlags.size() &&
1532615328
"PendingLocs and PendingArgFlags out of sync");
1532715329

15328-
// Bounded VarArgs
15329-
// Each bounded varargs is assigned a 2*XLen slot on the stack
15330-
// If the value is small enough to fit into the slot it is passed
15331-
// directly - otherwise a capability to the value is filled into the
15332-
// slot.
15333-
if (!IsFixed && IsBoundedVarArgs) {
15334-
unsigned SlotSize = CLenVT.getFixedSizeInBits() / 8;
15335-
// Aggregates of size 2*XLen need special handling here
15336-
// as LLVM with treat them as two separate XLen wide arguments
15337-
if(LocVT == XLenVT && OrigTy && OrigTy->isAggregateType()){
15338-
PendingLocs.push_back(
15339-
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
15340-
PendingArgFlags.push_back(ArgFlags);
15341-
if(PendingLocs.size() == 2){
15342-
CCValAssign VA = PendingLocs[0];
15343-
ISD::ArgFlagsTy AF = PendingArgFlags[0];
15344-
PendingLocs.clear();
15345-
PendingArgFlags.clear();
15346-
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF,
15347-
ValNo, ValVT, LocVT, ArgFlags);
15348-
}
15349-
return false;
15350-
}
15351-
unsigned StackOffset = State.AllocateStack(SlotSize, Align(SlotSize));
15352-
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
15353-
return false;
15354-
}
15355-
1535615330
// Handle passing f64 on RV32D with a soft float ABI or when floating point
1535715331
// registers are exhausted. Also handle for pure capability varargs which are
1535815332
// always passed on the stack.
15359-
if ((UseGPRForF64 || IsPureCapVarArgs) && XLen == 32 && ValVT == MVT::f64) {
15333+
if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1536015334
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1536115335
"Can't lower f64 if it is split");
1536215336
// Depending on available argument GPRS, f64 may be passed in a pair of
@@ -15366,7 +15340,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1536615340
Register Reg = IsPureCapVarArgs ? 0 : State.AllocateReg(ArgGPRs);
1536715341
LocVT = MVT::i32;
1536815342
if (!Reg) {
15369-
unsigned StackOffset = State.AllocateStack(8, Align(8));
15343+
unsigned StackOffset =
15344+
IsBoundedVarArgs ? State.AllocateStack(SlotSize, Align(SlotSize))
15345+
: State.AllocateStack(8, Align(8));
1537015346
State.addLoc(
1537115347
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1537215348
return false;
@@ -15407,8 +15383,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1540715383
ISD::ArgFlagsTy AF = PendingArgFlags[0];
1540815384
PendingLocs.clear();
1540915385
PendingArgFlags.clear();
15410-
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF,
15411-
ValNo, ValVT, LocVT, ArgFlags);
15386+
return CC_RISCVAssign2XLen(XLen, State, IsPureCapVarArgs, VA, AF, ValNo,
15387+
ValVT, LocVT, ArgFlags);
1541215388
}
1541315389

1541415390
// Will be passed indirectly; make sure we allocate the right type of
@@ -15461,8 +15437,17 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1546115437
Reg = State.AllocateReg(ArgGPRs);
1546215438
}
1546315439

15440+
// Aggregate types i.e. structs/arrays which can fit into 2*XLEN
15441+
// Don't allocate a slot for each instead we make sure that the next element
15442+
// is then properly aligned.
15443+
bool AllocateSlot = IsBoundedVarArgs;
15444+
if (OrigTy && OrigTy->isAggregateType())
15445+
AllocateSlot = false;
1546415446
unsigned StackOffset =
15465-
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
15447+
Reg ? 0
15448+
: (AllocateSlot
15449+
? State.AllocateStack(SlotSize, Align(SlotSize))
15450+
: State.AllocateStack(StoreSizeBytes, StackAlign));
1546615451

1546715452
// If we reach this point and PendingLocs is non-empty, we must be at the
1546815453
// end of a split argument that must be passed indirectly.
@@ -16339,7 +16324,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1633916324

1634016325
// Bookkeeping for cheri varargs/memargs
1634116326
int VAArgStartOffset, VAArgEndOffset, MemArgStartOffset, MemArgEndOffset;
16342-
SDValue FirstAddr, FirstArgAddr;
16327+
SDValue FirstVAAddr, FirstArgAddr;
1634316328

1634416329
// Copy argument values to their designated locations.
1634516330
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
@@ -16466,34 +16451,23 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1646616451
SDValue Address =
1646716452
DAG.getPointerAdd(DL, StackPtr, VA.getLocMemOffset());
1646816453

16469-
if (UseBoundeMemArgsCaller) {
16454+
if (UseBoundeMemArgsCaller && Outs[i].IsFixed) {
1647016455
if (FirstArgAddr == SDValue()) {
1647116456
FirstArgAddr = Address;
1647216457
MemArgStartOffset = VA.getLocMemOffset();
1647316458
}
1647416459
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
1647516460
MemArgEndOffset = VA.getLocMemOffset() + VTSize;
16476-
if (!Outs[i].IsFixed) {
16477-
// we need to align to 16-byte slot
16478-
Align OffsetAlign = Align(PtrLenBytes);
16479-
Type *OrigTy = CLI.getArgs()[Outs[i].OrigArgIndex].Ty;
16480-
if (OrigTy && OrigTy->isAggregateType())
16481-
OffsetAlign = Align(PtrLenBytes / 2);
16482-
MemArgEndOffset = alignTo(MemArgEndOffset, OffsetAlign);
16483-
}
1648416461
}
1648516462
if (UseBoundedVarArgs && !Outs[i].IsFixed) {
16486-
if (FirstAddr == SDValue()) {
16487-
FirstAddr = Address;
16463+
if (FirstVAAddr == SDValue()) {
16464+
FirstVAAddr = Address;
1648816465
VAArgStartOffset = VA.getLocMemOffset();
1648916466
}
1649016467
Align OffsetAlign = Align(PtrLenBytes);
16491-
Type *OrigTy = CLI.getArgs()[Outs[i].OrigArgIndex].Ty;
16492-
if (OrigTy && OrigTy->isAggregateType())
16493-
OffsetAlign = Align(PtrLenBytes / 2);
16494-
1649516468
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
1649616469
VAArgEndOffset = alignTo(VA.getLocMemOffset() + VTSize, OffsetAlign);
16470+
MemArgEndOffset = VAArgEndOffset;
1649716471
}
1649816472

1649916473
// Emit the store.
@@ -16503,15 +16477,19 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1650316477
}
1650416478

1650516479
if(IsVarArg && UseBoundedVarArgs && !UseBoundeMemArgsCaller) {
16506-
if (FirstAddr != SDValue()) {
16480+
if (FirstVAAddr != SDValue()) {
1650716481
SDValue VarArgs = DAG.getCSetBounds(
16508-
FirstAddr, DL, VAArgEndOffset - VAArgStartOffset, Align(),
16482+
FirstVAAddr, DL, VAArgEndOffset - VAArgStartOffset, Align(),
1650916483
"CHERI-RISCV variadic call lowering",
1651016484
cheri::SetBoundsPointerSource::Stack, "varargs call bounds setting");
1651116485
// clear write and execute permissions on varargs. Clearning other
1651216486
// permissions shouldn't be necessary since the capability is derived from
1651316487
// CSP and that shouldn't have these in the first place.
16514-
uint64_t PermMask = -1UL & ~(CAP_AP_X | CAP_AP_W);
16488+
uint64_t ExecPerm =
16489+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 17) : (1 << 1);
16490+
uint64_t WritePerm =
16491+
Subtarget.hasStdExtZCheriPureCap() ? (1 << 1) : (1 << 3);
16492+
uint64_t PermMask = -1UL & ~(ExecPerm | WritePerm);
1651516493
VarArgs = DAG.getNode(
1651616494
ISD::INTRINSIC_WO_CHAIN, DL, PtrVT,
1651716495
DAG.getConstant(Intrinsic::cheri_cap_perms_and, DL, XLenVT), VarArgs,

llvm/test/CodeGen/RISCV/cheri/bakewell/cheri-bounded-memargs-caller.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ define i32 @biz() local_unnamed_addr addrspace(200) nounwind {
146146
; CHECK-NEXT: sd a0, 16(csp)
147147
; CHECK-NEXT: li a1, 32
148148
; CHECK-NEXT: scbndsr ca1, csp, a1
149-
; CHECK-NEXT: li a2, -11
149+
; CHECK-NEXT: lui a2, 1048544
150+
; CHECK-NEXT: addiw a2, a2, -3
150151
; CHECK-NEXT: acperm ct1, ca1, a2
151152
; CHECK-NEXT: li a1, 1
152153
; CHECK-NEXT: li a2, 2

llvm/test/CodeGen/RISCV/cheri/purecap-bounded-varargs.ll

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
define i32 @caller_test_scalars(i32 %x, i128 %y, i64 %z, float %f, double %d) local_unnamed_addr addrspace(200) nounwind {
77
; CHECK-LABEL: caller_test_scalars:
88
; CHECK: # %bb.0: # %entry
9-
; CHECK-NEXT: cincoffset csp, csp, -192
10-
; CHECK-NEXT: sc cra, 176(csp) # 16-byte Folded Spill
11-
; CHECK-NEXT: sc cs0, 160(csp) # 16-byte Folded Spill
12-
; CHECK-NEXT: sc cs1, 144(csp) # 16-byte Folded Spill
13-
; CHECK-NEXT: sc cs2, 128(csp) # 16-byte Folded Spill
14-
; CHECK-NEXT: sc cs3, 112(csp) # 16-byte Folded Spill
15-
; CHECK-NEXT: sc cs4, 96(csp) # 16-byte Folded Spill
9+
; CHECK-NEXT: cincoffset csp, csp, -176
10+
; CHECK-NEXT: sc cra, 160(csp) # 16-byte Folded Spill
11+
; CHECK-NEXT: sc cs0, 144(csp) # 16-byte Folded Spill
12+
; CHECK-NEXT: sc cs1, 128(csp) # 16-byte Folded Spill
13+
; CHECK-NEXT: sc cs2, 112(csp) # 16-byte Folded Spill
14+
; CHECK-NEXT: sc cs3, 96(csp) # 16-byte Folded Spill
15+
; CHECK-NEXT: sc cs4, 80(csp) # 16-byte Folded Spill
1616
; CHECK-NEXT: mv s0, a5
1717
; CHECK-NEXT: mv s1, a3
1818
; CHECK-NEXT: mv s2, a2
@@ -22,24 +22,24 @@ define i32 @caller_test_scalars(i32 %x, i128 %y, i64 %z, float %f, double %d) lo
2222
; CHECK-NEXT: srli a0, a0, 32
2323
; CHECK-NEXT: call __extendsfdf2
2424
; CHECK-NEXT: mv a1, a0
25-
; CHECK-NEXT: sd s0, 80(csp)
26-
; CHECK-NEXT: sd s1, 48(csp)
27-
; CHECK-NEXT: sd s2, 32(csp)
25+
; CHECK-NEXT: sd s0, 64(csp)
26+
; CHECK-NEXT: sd s1, 32(csp)
27+
; CHECK-NEXT: sd s2, 24(csp)
2828
; CHECK-NEXT: sd s3, 16(csp)
2929
; CHECK-NEXT: sd s4, 0(csp)
30-
; CHECK-NEXT: csetbounds ca0, csp, 96
30+
; CHECK-NEXT: csetbounds ca0, csp, 80
3131
; CHECK-NEXT: li a2, -11
3232
; CHECK-NEXT: candperm ct1, ca0, a2
3333
; CHECK-NEXT: li a0, 5
34-
; CHECK-NEXT: sd a1, 64(csp)
34+
; CHECK-NEXT: sd a1, 48(csp)
3535
; CHECK-NEXT: call callee
36-
; CHECK-NEXT: lc cra, 176(csp) # 16-byte Folded Reload
37-
; CHECK-NEXT: lc cs0, 160(csp) # 16-byte Folded Reload
38-
; CHECK-NEXT: lc cs1, 144(csp) # 16-byte Folded Reload
39-
; CHECK-NEXT: lc cs2, 128(csp) # 16-byte Folded Reload
40-
; CHECK-NEXT: lc cs3, 112(csp) # 16-byte Folded Reload
41-
; CHECK-NEXT: lc cs4, 96(csp) # 16-byte Folded Reload
42-
; CHECK-NEXT: cincoffset csp, csp, 192
36+
; CHECK-NEXT: lc cra, 160(csp) # 16-byte Folded Reload
37+
; CHECK-NEXT: lc cs0, 144(csp) # 16-byte Folded Reload
38+
; CHECK-NEXT: lc cs1, 128(csp) # 16-byte Folded Reload
39+
; CHECK-NEXT: lc cs2, 112(csp) # 16-byte Folded Reload
40+
; CHECK-NEXT: lc cs3, 96(csp) # 16-byte Folded Reload
41+
; CHECK-NEXT: lc cs4, 80(csp) # 16-byte Folded Reload
42+
; CHECK-NEXT: cincoffset csp, csp, 176
4343
; CHECK-NEXT: ret
4444
entry:
4545
%conv = fpext float %f to double
@@ -75,21 +75,22 @@ entry:
7575
define i32 @caller_test_struct(i32 %x, [2 x float] %y.coerce, i32 %z, { i8 addrspace(200)*, i64} %u.coerce) local_unnamed_addr addrspace(200) nounwind {
7676
; CHECK-LABEL: caller_test_struct:
7777
; CHECK: # %bb.0: # %entry
78-
; CHECK-NEXT: cincoffset csp, csp, -80
79-
; CHECK-NEXT: sc cra, 64(csp) # 16-byte Folded Spill
80-
; CHECK-NEXT: mv a5, a0
78+
; CHECK-NEXT: cincoffset csp, csp, -96
79+
; CHECK-NEXT: sc cra, 80(csp) # 16-byte Folded Spill
80+
; CHECK-NEXT: mv a6, a0
81+
; CHECK-NEXT: sd a5, 64(csp)
8182
; CHECK-NEXT: sc ca4, 48(csp)
8283
; CHECK-NEXT: sd a3, 32(csp)
8384
; CHECK-NEXT: sd a2, 24(csp)
8485
; CHECK-NEXT: sd a1, 16(csp)
85-
; CHECK-NEXT: csetbounds ca0, csp, 64
86+
; CHECK-NEXT: csetbounds ca0, csp, 80
8687
; CHECK-NEXT: li a1, -11
8788
; CHECK-NEXT: candperm ct1, ca0, a1
8889
; CHECK-NEXT: li a0, 3
89-
; CHECK-NEXT: sd a5, 0(csp)
90+
; CHECK-NEXT: sd a6, 0(csp)
9091
; CHECK-NEXT: call callee
91-
; CHECK-NEXT: lc cra, 64(csp) # 16-byte Folded Reload
92-
; CHECK-NEXT: cincoffset csp, csp, 80
92+
; CHECK-NEXT: lc cra, 80(csp) # 16-byte Folded Reload
93+
; CHECK-NEXT: cincoffset csp, csp, 96
9394
; CHECK-NEXT: ret
9495
entry:
9596
%call = tail call i32 (i32, ...) @callee(i32 3, i32 %x, [2 x float] %y.coerce, i32 %z, { i8 addrspace(200)*, i64} %u.coerce) nounwind

0 commit comments

Comments
 (0)