Skip to content

Commit 8478de3

Browse files
[LLVM][CodeGen] Remove failure cases when widening EXTRACT/INSERT_SUBVECTOR. (#162308)
This PR implements catch all handling for widening the scalable subvector operand (INSERT_SUBVECTOR) or result (EXTRACT_SUBVECTOR). It does this via the stack using masked memory operations. With general handling available we can add optimiations for specific cases.
1 parent 989ac4c commit 8478de3

File tree

5 files changed

+510
-126
lines changed

5 files changed

+510
-126
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,12 @@ class SelectionDAG {
11911191

11921192
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS);
11931193

1194+
/// Return a vector with the first 'Len' lanes set to true and remaining lanes
1195+
/// set to false. The mask's ValueType is the same as when comparing vectors
1196+
/// of type VT.
1197+
LLVM_ABI SDValue getMaskFromElementCount(const SDLoc &DL, EVT VT,
1198+
ElementCount Len);
1199+
11941200
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
11951201
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
11961202
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 71 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6216,8 +6216,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
62166216
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
62176217
}
62186218

6219-
report_fatal_error("Don't know how to widen the result of "
6220-
"EXTRACT_SUBVECTOR for scalable vectors");
6219+
// Fallback to extracting through memory.
6220+
6221+
Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
6222+
SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
6223+
MachineFunction &MF = DAG.getMachineFunction();
6224+
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6225+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
6226+
6227+
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
6228+
PtrInfo, MachineMemOperand::MOStore,
6229+
LocationSize::beforeOrAfterPointer(), Alignment);
6230+
MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
6231+
PtrInfo, MachineMemOperand::MOLoad,
6232+
LocationSize::beforeOrAfterPointer(), Alignment);
6233+
6234+
// Write out the input vector.
6235+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
6236+
6237+
// Build a mask to match the length of the non-widened result.
6238+
SDValue Mask =
6239+
DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
6240+
6241+
// Read back the sub-vector setting the remaining lanes to poison.
6242+
StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
6243+
return DAG.getMaskedLoad(
6244+
WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask,
6245+
DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
62216246
}
62226247

62236248
// We could try widening the input to the right length but for now, extract
@@ -6321,11 +6346,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
63216346
if (VT.isVector()) {
63226347
// If all else fails replace the load with a wide masked load.
63236348
SDLoc DL(N);
6324-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
6325-
6326-
SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
6327-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
6328-
DAG.getConstant(0, DL, IdxVT), Len);
6349+
SDValue Mask =
6350+
DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
63296351

63306352
SDValue NewLoad = DAG.getMaskedLoad(
63316353
WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -7462,9 +7484,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74627484
SDValue InVec = N->getOperand(0);
74637485

74647486
EVT OrigVT = SubVec.getValueType();
7465-
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
7466-
SubVec = GetWidenedVector(SubVec);
7467-
7487+
SubVec = GetWidenedVector(SubVec);
74687488
EVT SubVT = SubVec.getValueType();
74697489

74707490
// Whether or not all the elements of the widened SubVec will be inserted into
@@ -7486,17 +7506,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74867506
}
74877507
}
74887508

7509+
if (!IndicesValid)
7510+
report_fatal_error(
7511+
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7512+
74897513
SDLoc DL(N);
74907514

74917515
// We need to make sure that the indices are still valid, otherwise we might
74927516
// widen what was previously well-defined to something undefined.
7493-
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7517+
if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
74947518
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
74957519
N->getOperand(2));
74967520

7497-
if (!IndicesValid || OrigVT.isScalableVector())
7498-
report_fatal_error(
7499-
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7521+
if (OrigVT.isScalableVector()) {
7522+
// Fallback to inserting through memory.
7523+
7524+
Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
7525+
SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
7526+
MachineFunction &MF = DAG.getMachineFunction();
7527+
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7528+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
7529+
7530+
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
7531+
PtrInfo, MachineMemOperand::MOStore,
7532+
LocationSize::beforeOrAfterPointer(), Alignment);
7533+
MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
7534+
PtrInfo, MachineMemOperand::MOLoad,
7535+
LocationSize::beforeOrAfterPointer(), Alignment);
7536+
7537+
// Write out the vector being inserting into.
7538+
SDValue Ch =
7539+
DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
7540+
7541+
// Build a mask to match the length of the sub-vector.
7542+
SDValue Mask =
7543+
DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
7544+
7545+
// Overwrite the sub-vector at the required offset.
7546+
SDValue SubVecPtr =
7547+
TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7548+
Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr,
7549+
DAG.getUNDEF(SubVecPtr.getValueType()), Mask, VT,
7550+
StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
7551+
7552+
// Read back the result.
7553+
return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
7554+
}
75007555

75017556
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
75027557
// with a series of INSERT_VECTOR_ELT
@@ -7575,12 +7630,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
75757630
if (StVT.isVector()) {
75767631
// If all else fails replace the store with a wide masked store.
75777632
SDLoc DL(N);
7578-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7579-
75807633
SDValue WideStVal = GetWidenedVector(StVal);
7581-
SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
7582-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
7583-
DAG.getConstant(0, DL, IdxVT), Len);
7634+
SDValue Mask =
7635+
DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
75847636

75857637
return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
75867638
ST->getOffset(), Mask, ST->getMemoryVT(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,6 +2137,14 @@ SDValue SelectionDAG::getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS) {
21372137
return getFixedOrScalableQuantity(*this, DL, VT, TS);
21382138
}
21392139

2140+
SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT,
2141+
ElementCount EC) {
2142+
EVT IdxVT = TLI->getVectorIdxTy(getDataLayout());
2143+
EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT);
2144+
return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT,
2145+
getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC));
2146+
}
2147+
21402148
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
21412149
APInt One(ResVT.getScalarSizeInBits(), 1);
21422150
return getStepVector(DL, ResVT, One);

0 commit comments

Comments
 (0)