Skip to content

Commit 2e12ff6

Browse files
committed
[CHERI] Align aggregate types upto stack slot
1 parent b46607b commit 2e12ff6

File tree

3 files changed

+127
-10
lines changed

3 files changed

+127
-10
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/Statistic.h"
2525
#include "llvm/Analysis/MemoryLocation.h"
2626
#include "llvm/Analysis/VectorUtils.h"
27+
#include "llvm/CodeGen/Analysis.h"
2728
#include "llvm/CodeGen/MachineFrameInfo.h"
2829
#include "llvm/CodeGen/MachineFunction.h"
2930
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -2240,6 +2241,22 @@ unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
22402241
return NumRepeatedDivisors;
22412242
}
22422243

2244+
bool RISCVTargetLowering::functionArgumentNeedsConsecutiveRegisters(
2245+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
2246+
const DataLayout &DL) const {
2247+
const bool IsPureCapABI =
2248+
RISCVABI::isCheriPureCapABI(Subtarget.getTargetABI());
2249+
const bool HasBoundedVarArgs =
2250+
IsPureCapABI && Subtarget.hasCheriBoundVarArg();
2251+
if (!Ty->isArrayTy())
2252+
return false;
2253+
2254+
// All non aggregate members of the type must have the same type
2255+
SmallVector<EVT> ValueVTs;
2256+
ComputeValueVTs(*this, DL, Ty, ValueVTs);
2257+
return all_equal(ValueVTs) && isVarArg && HasBoundedVarArgs;
2258+
}
2259+
22432260
static SDValue getVLOperand(SDValue Op) {
22442261
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
22452262
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
@@ -15361,6 +15378,37 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1536115378
if (ValVT.isFixedLengthVector())
1536215379
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
1536315380

15381+
// For purecap bounded varargs - aggregate types which can fit into a stack
15382+
// slot are passed to CC_RISCV as separate arguments. We need to align the
15383+
// first argument to a CLEN alignment.
15384+
if (IsBoundedVarArgs && ArgFlags.isInConsecutiveRegs()) {
15385+
PendingLocs.push_back(
15386+
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
15387+
PendingArgFlags.push_back(ArgFlags);
15388+
if (!ArgFlags.isInConsecutiveRegsLast())
15389+
return false;
15390+
}
15391+
15392+
if (IsBoundedVarArgs && ArgFlags.isInConsecutiveRegsLast()) {
15393+
for (size_t I = 0, E = PendingLocs.size(); I < E; I++){
15394+
CCValAssign VA = PendingLocs[I];
15395+
unsigned Size =
15396+
VA.getValVT() == CLenVT ? DL.getPointerSize(200) : XLen / 8;
15397+
Align Alignment(Size);
15398+
// For consecutive types the first item needs to be aligned.
15399+
if (I == 0)
15400+
Alignment = Align(SlotSize);
15401+
15402+
unsigned StackOffset = State.AllocateStack(Size, Alignment);
15403+
State.addLoc(CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
15404+
StackOffset, VA.getLocVT(),
15405+
VA.getLocInfo()));
15406+
}
15407+
PendingLocs.clear();
15408+
PendingArgFlags.clear();
15409+
return false;
15410+
}
15411+
1536415412
// Split arguments might be passed indirectly, so keep track of the pending
1536515413
// values. Split vectors are passed via a mix of registers and indirectly, so
1536615414
// treat them as we would any other argument.
@@ -15440,15 +15488,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1544015488
Reg = State.AllocateReg(ArgGPRs);
1544115489
}
1544215490

15443-
// Aggregate types i.e. structs/arrays which can fit into 2*XLEN
15444-
// Don't allocate a slot for each instead we make sure that the next element
15445-
// is then properly aligned.
15446-
bool AllocateSlot = IsBoundedVarArgs;
15447-
if (OrigTy && OrigTy->isAggregateType())
15448-
AllocateSlot = false;
1544915491
unsigned StackOffset =
1545015492
Reg ? 0
15451-
: (AllocateSlot
15493+
: (IsBoundedVarArgs
1545215494
? State.AllocateStack(SlotSize, Align(SlotSize))
1545315495
: State.AllocateStack(StoreSizeBytes, StackAlign));
1545415496

@@ -16454,22 +16496,23 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
1645416496
SDValue Address =
1645516497
DAG.getPointerAdd(DL, StackPtr, VA.getLocMemOffset());
1645616498

16457-
if (UseBoundeMemArgsCaller && Outs[i].IsFixed) {
16499+
if (UseBoundeMemArgsCaller) {
1645816500
if (FirstArgAddr == SDValue()) {
1645916501
FirstArgAddr = Address;
1646016502
MemArgStartOffset = VA.getLocMemOffset();
1646116503
}
1646216504
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
1646316505
MemArgEndOffset = VA.getLocMemOffset() + VTSize;
1646416506
}
16507+
1646516508
if (UseBoundedVarArgs && !Outs[i].IsFixed) {
1646616509
if (FirstVAAddr == SDValue()) {
1646716510
FirstVAAddr = Address;
1646816511
VAArgStartOffset = VA.getLocMemOffset();
1646916512
}
16470-
Align OffsetAlign = Align(PtrLenBytes);
1647116513
unsigned VTSize = VA.getValVT().getSizeInBits() / 8;
16472-
VAArgEndOffset = alignTo(VA.getLocMemOffset() + VTSize, OffsetAlign);
16514+
VAArgEndOffset =
16515+
alignTo(VA.getLocMemOffset() + VTSize, Align(PtrLenBytes));
1647316516
MemArgEndOffset = VAArgEndOffset;
1647416517
}
1647516518

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,10 @@ class RISCVTargetLowering : public TargetLowering {
977977
/// For available scheduling models FDIV + two independent FMULs are much
978978
/// faster than two FDIVs.
979979
unsigned combineRepeatedFPDivisors() const override;
980+
981+
bool functionArgumentNeedsConsecutiveRegisters(
982+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
983+
const DataLayout &DL) const override;
980984
};
981985

982986
namespace RISCV {
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple riscv64 -mattr=+zcheripurecap,+zcherihybrid,+cap-mode,+cheri-bounded-vararg,+cheri-bounded-memarg-callee,+cheri-bounded-memarg-caller -target-abi l64pc128d %s -o - | FileCheck %s
3+
4+
define dso_local i32 @g() addrspace(200) {
5+
; CHECK-LABEL: g:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: caddi csp, csp, -48
8+
; CHECK-NEXT: .cfi_def_cfa_offset 48
9+
; CHECK-NEXT: sc cra, 32(csp) # 16-byte Folded Spill
10+
; CHECK-NEXT: .cfi_offset ra, -16
11+
; CHECK-NEXT: li a0, 200
12+
; CHECK-NEXT: sd a0, 24(csp)
13+
; CHECK-NEXT: li a0, 100
14+
; CHECK-NEXT: sd a0, 16(csp)
15+
; CHECK-NEXT: li a0, 8
16+
; CHECK-NEXT: li a1, 32
17+
; CHECK-NEXT: scbndsr ct1, csp, a1
18+
; CHECK-NEXT: li a1, 1
19+
; CHECK-NEXT: li a2, 2
20+
; CHECK-NEXT: li a3, 3
21+
; CHECK-NEXT: li a4, 4
22+
; CHECK-NEXT: li a5, 5
23+
; CHECK-NEXT: li a6, 6
24+
; CHECK-NEXT: li a7, 7
25+
; CHECK-NEXT: sd a0, 0(csp)
26+
; CHECK-NEXT: li a0, 0
27+
; CHECK-NEXT: call f
28+
; CHECK-NEXT: lc cra, 32(csp) # 16-byte Folded Reload
29+
; CHECK-NEXT: caddi csp, csp, 48
30+
; CHECK-NEXT: ret
31+
entry:
32+
%call = tail call signext i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @f(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, [2 x i64] [i64 100, i64 200])
33+
ret i32 %call
34+
}
35+
36+
declare i32 @f(i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) addrspace(200)
37+
38+
define dso_local i128 @foo() addrspace(200) {
39+
; CHECK-LABEL: foo:
40+
; CHECK: # %bb.0: # %entry
41+
; CHECK-NEXT: caddi csp, csp, -48
42+
; CHECK-NEXT: .cfi_def_cfa_offset 48
43+
; CHECK-NEXT: sc cra, 32(csp) # 16-byte Folded Spill
44+
; CHECK-NEXT: .cfi_offset ra, -16
45+
; CHECK-NEXT: sd zero, 24(csp)
46+
; CHECK-NEXT: li a0, 789
47+
; CHECK-NEXT: sd a0, 16(csp)
48+
; CHECK-NEXT: sd zero, 8(csp)
49+
; CHECK-NEXT: li a1, 456
50+
; CHECK-NEXT: li a0, 32
51+
; CHECK-NEXT: scbndsr ct1, csp, a0
52+
; CHECK-NEXT: caddi ca0, cnull, 1
53+
; CHECK-NEXT: li a5, 123
54+
; CHECK-NEXT: sd a1, 0(csp)
55+
; CHECK-NEXT: li a1, 0
56+
; CHECK-NEXT: li a2, 0
57+
; CHECK-NEXT: li a3, 0
58+
; CHECK-NEXT: li a4, 0
59+
; CHECK-NEXT: li a6, 0
60+
; CHECK-NEXT: call bar
61+
; CHECK-NEXT: lc cra, 32(csp) # 16-byte Folded Reload
62+
; CHECK-NEXT: caddi csp, csp, 48
63+
; CHECK-NEXT: ret
64+
entry:
65+
%call = call i128 (ptr addrspace(200), i128, i128, i128, ...) @bar(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 1), i128 0, i128 0, i128 123, i128 456, i128 789)
66+
ret i128 %call
67+
}
68+
69+
declare i128 @bar(ptr addrspace(200), i128, i128, i128, ...) addrspace(200)
70+

0 commit comments

Comments
 (0)