Skip to content

Commit b4e4666

Browse files
committed
[VP][RISCV][WIP] Add a vp.load.ff intrinsic for fault only first load.
Seems there's been some interested in supporting early-exit loops recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690 This patch was extracted from our downstream where we've been using it in our vectorizer. Still need to write up LangRef. Type legalization is also missing.
1 parent 911e94c commit b4e4666

File tree

13 files changed

+1919
-0
lines changed

13 files changed

+1919
-0
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,8 @@ class SelectionDAG {
15721572
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl,
15731573
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
15741574
ISD::MemIndexType IndexType);
1575+
SDValue getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
1576+
SDValue Mask, SDValue EVL, MachineMemOperand *MMO);
15751577

15761578
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
15771579
MachineMemOperand *MMO);

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,6 +3057,23 @@ class MaskedHistogramSDNode : public MaskedGatherScatterSDNode {
30573057
}
30583058
};
30593059

3060+
class VPLoadFFSDNode : public MemSDNode {
3061+
public:
3062+
friend class SelectionDAG;
3063+
3064+
VPLoadFFSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
3065+
MachineMemOperand *MMO)
3066+
: MemSDNode(ISD::VP_LOAD_FF, Order, dl, VTs, MemVT, MMO) {}
3067+
3068+
const SDValue &getBasePtr() const { return getOperand(1); }
3069+
const SDValue &getMask() const { return getOperand(2); }
3070+
const SDValue &getVectorLength() const { return getOperand(3); }
3071+
3072+
static bool classof(const SDNode *N) {
3073+
return N->getOpcode() == ISD::VP_LOAD_FF;
3074+
}
3075+
};
3076+
30603077
class FPStateAccessSDNode : public MemSDNode {
30613078
public:
30623079
friend class SelectionDAG;

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,6 +1911,12 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
19111911
llvm_i32_ty],
19121912
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
19131913

1914+
def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ],
1915+
[ llvm_anyptr_ty,
1916+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1917+
llvm_i32_ty],
1918+
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
1919+
19141920
def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
19151921
[ LLVMVectorOfAnyPointersToElt<0>,
19161922
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load)
587587
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
588588
END_REGISTER_VP(vp_load, VP_LOAD)
589589

590+
BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2)
591+
// val,chain = VP_LOAD_FF chain,base,mask,evl
592+
BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, vp_load_ff, 2, 3)
593+
HELPER_MAP_VPID_TO_VPSD(vp_load_ff, VP_LOAD_FF)
594+
VP_PROPERTY_NO_FUNCTIONAL
595+
END_REGISTER_VP(vp_load_ff, VP_LOAD_FF)
590596
// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen)
591597
BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3)
592598
// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10139,6 +10139,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
1013910139
return V;
1014010140
}
1014110141

10142+
SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain,
10143+
SDValue Ptr, SDValue Mask, SDValue EVL,
10144+
MachineMemOperand *MMO) {
10145+
SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other);
10146+
SDValue Ops[] = {Chain, Ptr, Mask, EVL};
10147+
FoldingSetNodeID ID;
10148+
AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops);
10149+
ID.AddInteger(VT.getRawBits());
10150+
ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(dl.getIROrder(),
10151+
VTs, VT, MMO));
10152+
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
10153+
ID.AddInteger(MMO->getFlags());
10154+
void *IP = nullptr;
10155+
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
10156+
cast<VPLoadFFSDNode>(E)->refineAlignment(MMO);
10157+
return SDValue(E, 0);
10158+
}
10159+
auto *N = newSDNode<VPLoadFFSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
10160+
VT, MMO);
10161+
createOperands(N, Ops);
10162+
10163+
CSEMap.InsertNode(N, IP);
10164+
InsertNode(N);
10165+
SDValue V(N, 0);
10166+
NewSDValueDbgMsg(V, "Creating new node: ", this);
10167+
return V;
10168+
}
10169+
1014210170
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
1014310171
EVT MemVT, MachineMemOperand *MMO) {
1014410172
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8462,6 +8462,35 @@ void SelectionDAGBuilder::visitVPLoad(
84628462
setValue(&VPIntrin, LD);
84638463
}
84648464

8465+
void SelectionDAGBuilder::visitVPLoadFF(
8466+
const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
8467+
const SmallVectorImpl<SDValue> &OpValues) {
8468+
assert(OpValues.size() == 3);
8469+
SDLoc DL = getCurSDLoc();
8470+
Value *PtrOperand = VPIntrin.getArgOperand(0);
8471+
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8472+
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8473+
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
8474+
SDValue LD;
8475+
bool AddToChain = true;
8476+
// Do not serialize variable-length loads of constant memory with
8477+
// anything.
8478+
if (!Alignment)
8479+
Alignment = DAG.getEVTAlign(VT);
8480+
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
8481+
AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
8482+
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
8483+
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8484+
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
8485+
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
8486+
LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
8487+
MMO);
8488+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1));
8489+
if (AddToChain)
8490+
PendingLoads.push_back(LD.getValue(2));
8491+
setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL));
8492+
}
8493+
84658494
void SelectionDAGBuilder::visitVPGather(
84668495
const VPIntrinsic &VPIntrin, EVT VT,
84678496
const SmallVectorImpl<SDValue> &OpValues) {
@@ -8695,6 +8724,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
86958724
case ISD::VP_LOAD:
86968725
visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
86978726
break;
8727+
case ISD::VP_LOAD_FF:
8728+
visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues);
8729+
break;
86988730
case ISD::VP_GATHER:
86998731
visitVPGather(VPIntrin, ValueVTs[0], OpValues);
87008732
break;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,8 @@ class SelectionDAGBuilder {
632632
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
633633
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
634634
const SmallVectorImpl<SDValue> &OpValues);
635+
void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
636+
const SmallVectorImpl<SDValue> &OpValues);
635637
void visitVPStore(const VPIntrinsic &VPIntrin,
636638
const SmallVectorImpl<SDValue> &OpValues);
637639
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
448448
case Intrinsic::experimental_vp_strided_store:
449449
return 1;
450450
case Intrinsic::vp_load:
451+
case Intrinsic::vp_load_ff:
451452
case Intrinsic::vp_gather:
452453
case Intrinsic::experimental_vp_strided_load:
453454
return 0;
@@ -671,6 +672,10 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams(
671672
VPFunc = Intrinsic::getOrInsertDeclaration(
672673
M, VPID, {ReturnType, Params[0]->getType()});
673674
break;
675+
case Intrinsic::vp_load_ff:
676+
VPFunc = Intrinsic::getOrInsertDeclaration(
677+
M, VPID, {ReturnType->getStructElementType(0), Params[0]->getType()});
678+
break;
674679
case Intrinsic::experimental_vp_strided_load:
675680
VPFunc = Intrinsic::getOrInsertDeclaration(
676681
M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()});

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
880880
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
881881
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
882882
VT, Custom);
883+
setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
883884

884885
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
885886
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
@@ -1031,6 +1032,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10311032
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
10321033
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
10331034
VT, Custom);
1035+
setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
10341036

10351037
setOperationAction(ISD::SELECT, VT, Custom);
10361038
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1101,6 +1103,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11011103
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
11021104
ISD::VP_SCATTER},
11031105
VT, Custom);
1106+
setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
11041107

11051108
setOperationAction(ISD::FNEG, VT, Expand);
11061109
setOperationAction(ISD::FABS, VT, Expand);
@@ -1269,6 +1272,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12691272
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
12701273
ISD::VP_SCATTER},
12711274
VT, Custom);
1275+
setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
12721276

12731277
setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
12741278
ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
@@ -1357,6 +1361,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13571361
ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
13581362
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
13591363
VT, Custom);
1364+
setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
13601365

13611366
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
13621367
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
@@ -7616,6 +7621,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
76167621
case ISD::MLOAD:
76177622
case ISD::VP_LOAD:
76187623
return lowerMaskedLoad(Op, DAG);
7624+
case ISD::VP_LOAD_FF:
7625+
return lowerLoadFF(Op, DAG);
76197626
case ISD::MSTORE:
76207627
case ISD::VP_STORE:
76217628
return lowerMaskedStore(Op, DAG);
@@ -11965,6 +11972,57 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
1196511972
return DAG.getMergeValues({Result, Chain}, DL);
1196611973
}
1196711974

11975+
SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
11976+
assert(Op.getResNo() == 0);
11977+
SDLoc DL(Op);
11978+
MVT VT = Op.getSimpleValueType();
11979+
11980+
const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
11981+
EVT MemVT = VPLoadFF->getMemoryVT();
11982+
MachineMemOperand *MMO = VPLoadFF->getMemOperand();
11983+
SDValue Chain = VPLoadFF->getChain();
11984+
SDValue BasePtr = VPLoadFF->getBasePtr();
11985+
11986+
SDValue Mask = VPLoadFF->getMask();
11987+
SDValue VL = VPLoadFF->getVectorLength();
11988+
11989+
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11990+
11991+
MVT XLenVT = Subtarget.getXLenVT();
11992+
11993+
MVT ContainerVT = VT;
11994+
if (VT.isFixedLengthVector()) {
11995+
ContainerVT = getContainerForFixedLengthVector(VT);
11996+
if (!IsUnmasked) {
11997+
MVT MaskVT = getMaskTypeFor(ContainerVT);
11998+
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11999+
}
12000+
}
12001+
12002+
unsigned IntID =
12003+
IsUnmasked ? Intrinsic::riscv_vleff : Intrinsic::riscv_vleff_mask;
12004+
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12005+
Ops.push_back(DAG.getUNDEF(ContainerVT));
12006+
Ops.push_back(BasePtr);
12007+
if (!IsUnmasked)
12008+
Ops.push_back(Mask);
12009+
Ops.push_back(VL);
12010+
if (!IsUnmasked)
12011+
Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12012+
12013+
SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12014+
12015+
SDValue Result =
12016+
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12017+
SDValue OutVL = Result.getValue(1);
12018+
Chain = Result.getValue(2);
12019+
12020+
if (VT.isFixedLengthVector())
12021+
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12022+
12023+
return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12024+
}
12025+
1196812026
SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
1196912027
SelectionDAG &DAG) const {
1197012028
SDLoc DL(Op);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ class RISCVTargetLowering : public TargetLowering {
991991
SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
992992
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
993993
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
994+
SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const;
994995
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
995996
SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
996997
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,

0 commit comments

Comments
 (0)