Skip to content

Commit 74782d4

Browse files
committed
[CHERIOT] Use capability registers to store f64 values.
This enables each f64 to be passed by value in a single cap register, rather than in pairs of integer registers. This required adding explicit type annotations to various places in the XCheri tblgen files, as the GPCR class can now hold values type c64 or f64, breaking type inference.
1 parent 41640af commit 74782d4

File tree

6 files changed

+459
-91
lines changed

6 files changed

+459
-91
lines changed

llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,14 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
501501
}
502502
}
503503

504+
// Cheriot uses GPCR without a bitcast when possible.
505+
if (LocVT == MVT::f64 && Subtarget.hasVendorXCheriot() && !IsPureCapVarArgs) {
506+
if (MCRegister Reg = State.AllocateReg(ArgGPCRs)) {
507+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
508+
return false;
509+
}
510+
}
511+
504512
// FP smaller than XLen, uses custom GPR.
505513
if (LocVT == MVT::f16 || LocVT == MVT::bf16 ||
506514
(LocVT == MVT::f32 && XLen == 64)) {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
178178
addRegisterClass(CapType, &RISCV::GPCRRegClass);
179179
}
180180

181+
if (Subtarget.hasVendorXCheriot()) {
182+
// Cheriot holds f64's in capability registers.
183+
addRegisterClass(MVT::f64, &RISCV::GPCRRegClass);
184+
}
185+
181186
static const MVT::SimpleValueType BoolVecVTs[] = {
182187
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
183188
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
@@ -727,6 +732,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
727732
setLibcallImpl(RTLIB::MEMSET, RTLIB::memset);
728733
}
729734

735+
if (Subtarget.hasVendorXCheriot()) {
736+
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
737+
738+
static const unsigned CheriotF64ExpandOps[] = {
739+
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
740+
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
741+
ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
742+
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
743+
ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
744+
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
745+
ISD::STRICT_FMA};
746+
setOperationAction(CheriotF64ExpandOps, MVT::f64, Expand);
747+
}
748+
730749
// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
731750
// Unfortunately this can't be determined just from the ISA naming string.
732751
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
@@ -6743,11 +6762,44 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
67436762
return SDValue();
67446763
}
67456764

6746-
SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6747-
SelectionDAG &DAG) const {
6765+
SDValue
6766+
RISCVTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG,
6767+
const RISCVSubtarget &Subtarget) const {
67486768
MVT VT = Op.getSimpleValueType();
67496769
const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
67506770

6771+
if (Subtarget.hasVendorXCheriot()) {
6772+
// Cheriot needs to custom lower f64 immediates using csethigh
6773+
if (VT != MVT::f64)
6774+
return Op;
6775+
6776+
SDLoc DL(Op);
6777+
uint64_t Val = Imm.bitcastToAPInt().getLimitedValue();
6778+
6779+
// Materialize 0.0 as cnull
6780+
if (Val == 0)
6781+
return DAG.getRegister(getNullCapabilityRegister(), MVT::f64);
6782+
6783+
// Otherwise, materialize the low part into a 32-bit register.
6784+
auto Lo = DAG.getConstant(Val & 0xFFFFFFFF, DL, MVT::i32);
6785+
auto LoAsCap = DAG.getTargetInsertSubreg(RISCV::sub_cap_addr, DL, MVT::c64,
6786+
DAG.getUNDEF(MVT::f64), Lo);
6787+
6788+
// The high half of a capability register is zeroed by integer ops,
6789+
// so if we wanted a zero high half then we are done.
6790+
if (Val >> 32 == 0)
6791+
return DAG.getBitcast(MVT::f64, LoAsCap);
6792+
6793+
// Otherwise, materialize the high half and use csethigh to combine the two
6794+
// halve.
6795+
auto Hi = DAG.getConstant(Val >> 32, DL, MVT::i32);
6796+
auto Cap = DAG.getNode(
6797+
ISD::INTRINSIC_WO_CHAIN, DL, MVT::c64,
6798+
DAG.getTargetConstant(Intrinsic::cheri_cap_high_set, DL, MVT::i32),
6799+
LoAsCap, Hi);
6800+
return DAG.getBitcast(MVT::f64, Cap);
6801+
}
6802+
67516803
// Can this constant be selected by a Zfa FLI instruction?
67526804
bool Negate = false;
67536805
int Index = getLegalZfaFPImm(Imm, VT);
@@ -7417,7 +7469,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
74177469
case ISD::Constant:
74187470
return lowerConstant(Op, DAG, Subtarget);
74197471
case ISD::ConstantFP:
7420-
return lowerConstantFP(Op, DAG);
7472+
return lowerConstantFP(Op, DAG, Subtarget);
74217473
case ISD::SELECT:
74227474
return lowerSELECT(Op, DAG);
74237475
case ISD::BRCOND:

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,8 @@ class RISCVTargetLowering : public TargetLowering {
503503
SelectionDAG &DAG) const;
504504
SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
505505

506-
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
506+
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG,
507+
const RISCVSubtarget &Subtarget) const;
507508
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
508509
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
509510
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ def : PatGpcrUimm12<int_cheri_bounded_stack_cap, CSetBoundsImm>;
14511451
def : PatGpcrGpr<int_cheri_bounded_stack_cap_dynamic, CSetBounds>;
14521452
def : PatGpcrUimm12<int_cheri_bounded_stack_cap_dynamic, CSetBoundsImm>;
14531453

1454-
def : Pat<(CapFrameAddrRegImm GPCR:$rs1, simm12:$imm12),
1454+
def : Pat<(CapFrameAddrRegImm(cPTR GPCR:$rs1), simm12:$imm12),
14551455
(CIncOffsetImm GPCR:$rs1, simm12:$imm12)>;
14561456

14571457
/// Pointer-Arithmetic Instructions
@@ -1463,14 +1463,15 @@ def : Pat<(XLenVT (int_cheri_cap_diff GPCR:$cs1, GPCR:$cs2)),
14631463
(XLenVT (EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
14641464

14651465
let Predicates = [IsPureCapABI] in {
1466-
def : Pat<(inttoptr (XLenVT GPR:$rs2)), (CIncOffset C0, GPR:$rs2)>;
1467-
def : Pat<(inttoptr simm12:$imm12), (CIncOffsetImm C0, simm12:$imm12)>;
1468-
def : Pat<(XLenVT (ptrtoint GPCR:$rs1)), (PseudoCGetAddr GPCR:$rs1)>;
1466+
def : Pat<(inttoptr(XLenVT GPR:$rs2)), (cPTR(CIncOffset(cPTR C0), GPR:$rs2))>;
1467+
def : Pat<(inttoptr simm12:$imm12), (cPTR(CIncOffsetImm(cPTR C0),
1468+
simm12:$imm12))>;
1469+
def : Pat<(XLenVT(ptrtoint(cPTR GPCR:$rs1))), (PseudoCGetAddr GPCR:$rs1)>;
14691470
}
14701471

14711472
/// Null Capability Patterns
14721473

1473-
def : Pat<(inttoptr (XLenVT 0)), (CLenVT (COPY C0))>;
1474+
def : Pat<(inttoptr(XLenVT 0)), (CLenVT(COPY(cPTR C0)))>;
14741475
def : Pat<(ptrtoint (CLenVT (inttoptr (XLenVT 0)))),
14751476
(XLenVT (COPY (XLenVT X0)))>;
14761477

@@ -1481,26 +1482,31 @@ def : Pat<(ptrtoint (CLenVT (inttoptr (XLenVT 0)))),
14811482
// * Break untagged < tagged semantics
14821483
// * Can't implement exact equality
14831484
class CheriSetCCPatGpcrGpcr<PatFrag CondOp, dag GprGprDag>
1484-
: Pat<(XLenVT (CondOp GPCR:$cs1, GPCR:$cs2)),
1485-
(OutPatFrag<(ops node:$rs1, node:$rs2), GprGprDag>
1486-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1487-
(XLenVT (EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
1485+
: Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (cPTR GPCR:$cs2))),
1486+
(OutPatFrag<(ops node:$rs1, node:$rs2), GprGprDag>(XLenVT
1487+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1488+
(XLenVT(EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
14881489

14891490
multiclass CheriSetCCPatGpcrImm<PatFrag CondOp, Operand ImmTy, dag GprImmDag> {
1490-
def : Pat<(XLenVT (CondOp GPCR:$cs1, (inttoptr ImmTy:$imm12))),
1491-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1492-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1493-
def : Pat<(XLenVT (CondOp GPCR:$cs1, (cptradd (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1494-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1495-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1496-
def : Pat<(XLenVT (CondOp GPCR:$cs1,
1497-
(int_cheri_cap_offset_set (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1498-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1499-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1500-
def : Pat<(XLenVT (CondOp GPCR:$cs1,
1501-
(int_cheri_cap_address_set (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1502-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1503-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1491+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (inttoptr ImmTy:$imm12))),
1492+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1493+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1494+
ImmTy:$imm12)>;
1495+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (cptradd(inttoptr(XLenVT 0)),
1496+
ImmTy:$imm12))),
1497+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1498+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1499+
ImmTy:$imm12)>;
1500+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1),
1501+
(int_cheri_cap_offset_set(inttoptr(XLenVT 0)), ImmTy:$imm12))),
1502+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1503+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1504+
ImmTy:$imm12)>;
1505+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1),
1506+
(int_cheri_cap_address_set(inttoptr(XLenVT 0)), ImmTy:$imm12))),
1507+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1508+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1509+
ImmTy:$imm12)>;
15041510
}
15051511

15061512
multiclass CheriSetCCPatGpcrSimm12<PatFrag CondOp, dag GprImmDag>
@@ -1510,9 +1516,9 @@ multiclass CheriSetCCPatGpcrSimm12Plus1<PatFrag CondOp, dag GprImmDag>
15101516
: CheriSetCCPatGpcrImm<CondOp, simm12_plus1, GprImmDag>;
15111517

15121518
class CheriSetCCPatGpcrNull<PatFrag CondOp, dag GprDag>
1513-
: Pat<(XLenVT (CondOp GPCR:$cs1, (inttoptr (XLenVT 0)))),
1514-
(OutPatFrag<(ops node:$rs1), GprDag>
1515-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)))>;
1519+
: Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (inttoptr(XLenVT 0)))),
1520+
(OutPatFrag<(ops node:$rs1), GprDag>(XLenVT(EXTRACT_SUBREG GPCR:$cs1,
1521+
sub_cap_addr)))>;
15161522

15171523
class Swap<PatFrag BinFrag>
15181524
: PatFrag<(ops node:$a, node:$b), (BinFrag $b, $a)>;
@@ -1555,11 +1561,10 @@ defm Select_GPCR : SelectCC_GPR_rrirr<GPCR, CLenVT>;
15551561
// No dedicated instructions; see above
15561562

15571563
class CheriBccPat<PatFrag CondOp, RVInstB Inst>
1558-
: Pat<(brcond (XLenVT (CondOp GPCR:$rs1, GPCR:$rs2)), bb:$imm12),
1559-
(Inst
1560-
(XLenVT (EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1561-
(XLenVT (EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1562-
bare_simm13_lsb0_bb:$imm12)>;
1564+
: Pat<(brcond(XLenVT(CondOp(cPTR GPCR:$rs1), (cPTR GPCR:$rs2))), bb:$imm12),
1565+
(Inst(XLenVT(EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1566+
(XLenVT(EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1567+
bare_simm13_lsb0_bb:$imm12)>;
15631568

15641569
def : CheriBccPat<seteq, BEQ>;
15651570
def : CheriBccPat<setne, BNE>;
@@ -1569,11 +1574,10 @@ def : CheriBccPat<setult, BLTU>;
15691574
def : CheriBccPat<setuge, BGEU>;
15701575

15711576
class CheriBccSwapPat<PatFrag CondOp, RVInst InstBcc>
1572-
: Pat<(brcond (XLenVT (CondOp GPCR:$rs1, GPCR:$rs2)), bb:$imm12),
1573-
(InstBcc
1574-
(XLenVT (EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1575-
(XLenVT (EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1576-
bare_simm13_lsb0_bb:$imm12)>;
1577+
: Pat<(brcond(XLenVT(CondOp(cPTR GPCR:$rs1), (cPTR GPCR:$rs2))), bb:$imm12),
1578+
(InstBcc(XLenVT(EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1579+
(XLenVT(EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1580+
bare_simm13_lsb0_bb:$imm12)>;
15771581

15781582
def : CheriBccSwapPat<setgt, BLT>;
15791583
def : CheriBccSwapPat<setle, BGE>;
@@ -1587,15 +1591,18 @@ def : PatGpcrGpcr<riscv_cap_equal_exact, CSEQX, XLenVT>;
15871591

15881592
/// Special Capability Register Access Instructions
15891593

1590-
def : Pat<(int_cheri_ddc_get), (CSpecialRW SCR_DDC.Encoding, C0)>;
1591-
let Predicates = [HasCheri, IsPureCapABI] in
1592-
def : Pat<(int_cheri_stack_cap_get), (CLenVT (COPY C2))>;
1594+
def : Pat<(int_cheri_ddc_get), (CSpecialRW SCR_DDC.Encoding, (cPTR C0))>;
1595+
let Predicates = [HasCheri,
1596+
IsPureCapABI] in def : Pat<(int_cheri_stack_cap_get),
1597+
(CLenVT(COPY(cPTR C2)))>;
15931598

15941599
let Predicates = [HasCheri, IsCapMode] in
15951600
def : Pat<(int_cheri_pcc_get), (AUIPCC 0)>;
15961601

1597-
let Predicates = [HasCheri, NotCapMode] in
1598-
def : Pat<(int_cheri_pcc_get), (CSpecialRW SCR_PCC.Encoding, C0)>;
1602+
let Predicates = [HasCheri,
1603+
NotCapMode] in def : Pat<(int_cheri_pcc_get),
1604+
(CSpecialRW SCR_PCC.Encoding,
1605+
(cPTR C0))>;
15991606

16001607
/// Fast Register-Clearing Instructions
16011608

@@ -1824,13 +1831,14 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap", PseudoCmpXchgCap, CLenVT, GPCR>;
18241831
/// Capability Mode Instructions
18251832

18261833
multiclass CheriLdPat<PatFrag LoadOp, RVInst Inst, ValueType ReturnVt = XLenVT> {
1827-
def : Pat<(ReturnVt (LoadOp (CapRegImm GPCR:$rs1, simm12:$imm12))),
1834+
def : Pat<(ReturnVt(LoadOp(CapRegImm(cPTR GPCR:$rs1), simm12:$imm12))),
18281835
(Inst GPCR:$rs1, simm12:$imm12)>;
18291836
}
18301837

18311838
multiclass CheriStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, ValueType StoreVt = XLenVT> {
1832-
def : Pat<(StoreOp (StoreVt StTy:$rs2), (CapRegImm GPCR:$rs1, simm12:$imm12)),
1833-
(Inst (StoreVt StTy:$rs2), GPCR:$rs1, simm12:$imm12)>;
1839+
def : Pat<(StoreOp(StoreVt StTy:$rs2), (CapRegImm(cPTR GPCR:$rs1),
1840+
simm12:$imm12)),
1841+
(Inst(StoreVt StTy:$rs2), GPCR:$rs1, simm12:$imm12)>;
18341842
}
18351843

18361844
multiclass CheriAtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, ValueType StoreVt>
@@ -2287,6 +2295,11 @@ defm : CheriLdPat<load, CLC_128, CLenVT>;
22872295
defm : CheriStPat<store, CSC_128, GPCR, CLenVT>;
22882296
} // Predicates = [HasCheri, IsRV64, IsCapMode]
22892297

2298+
let Predicates = [HasCheri, HasCheriot, IsRV32, IsCapMode] in {
2299+
defm : CheriLdPat<load, CLC_64, f64>;
2300+
defm : CheriStPat<store, CSC_64, GPCR, f64>;
2301+
} // Predicates = [HasCheri, HasCheriot, IsRV32, IsCapMode]
2302+
22902303
//===----------------------------------------------------------------------===//
22912304
// Compress Instruction tablegen backend.
22922305
//===----------------------------------------------------------------------===//
@@ -2429,7 +2442,17 @@ let Predicates = [HasCheri, IsRV32, IsCapMode, IsRVE] in {
24292442
let mayLoad = true, mayStore = false, hasSideEffects = false in
24302443
def PseudoCLLW : Pseudo<(outs GPCR:$dst), (ins bare_symbol:$src), [],
24312444
"cllc", "$dst, $src">;
2432-
def : Pat<(load (cPTR (load (iPTR globaladdr:$src)))),
2445+
def : Pat<(c64(load(cPTR(load(iPTR globaladdr:$src))))),
2446+
(PseudoCLLW bare_symbol:$src)>;
2447+
def : Pat<(f64(load(cPTR(load(iPTR globaladdr:$src))))),
24332448
(PseudoCLLW bare_symbol:$src)>;
24342449
} // Predicates = [HasCheri, IsRV32, IsCapMode, IsRVE]
2435-
2450+
2451+
// Cheriot stores f64 in cap registers, so bitcasting between f64 and c64
2452+
// is a no-op.
2453+
multiclass NopCapRegCast<ValueType Ty1, ValueType Ty2> {
2454+
def : Pat<(Ty1(bitconvert(Ty2 GPCR:$Val))), (Ty1 GPCR:$Val)>;
2455+
def : Pat<(Ty2(bitconvert(Ty1 GPCR:$Val))), (Ty2 GPCR:$Val)>;
2456+
}
2457+
2458+
let Predicates = [HasCheri, HasCheriot] in { defm : NopCapRegCast<c64, f64>; }

0 commit comments

Comments
 (0)