Skip to content

Commit 4b4fd33

Browse files
[LLVM][CodeGen] Remove failure cases when widening EXTRACT/INSERT_SUBVECTOR.
This PR implements catch all handling for widening the scalable subvector operand (INSERT_SUBVECTOR) or result (EXTRACT_SUBVECTOR). It does this via the stack using masked memory operations. With general handling available we can add optimiations for specific cases.
1 parent 35530f4 commit 4b4fd33

File tree

5 files changed

+518
-126
lines changed

5 files changed

+518
-126
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,12 @@ class SelectionDAG {
11861186
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
11871187
bool ConstantFold = true);
11881188

1189+
/// Return a vector with the first 'Len' lanes set to true and remaining lanes
1190+
/// set to false. The mask's ValueType is the same as when comparing vectors
1191+
/// of type VT.
1192+
LLVM_ABI SDValue getMaskFromElementCount(const SDLoc &DL, EVT VT,
1193+
ElementCount Len);
1194+
11891195
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
11901196
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
11911197
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 71 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6201,8 +6201,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
62016201
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
62026202
}
62036203

6204-
report_fatal_error("Don't know how to widen the result of "
6205-
"EXTRACT_SUBVECTOR for scalable vectors");
6204+
// Fallback to extracting through memory.
6205+
6206+
Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
6207+
SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
6208+
auto &MF = DAG.getMachineFunction();
6209+
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6210+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
6211+
6212+
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
6213+
PtrInfo, MachineMemOperand::MOStore,
6214+
LocationSize::beforeOrAfterPointer(), Alignment);
6215+
MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
6216+
PtrInfo, MachineMemOperand::MOLoad,
6217+
LocationSize::beforeOrAfterPointer(), Alignment);
6218+
6219+
// Write out the input vector.
6220+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
6221+
6222+
// Build a mask to match the length of the non-widened result.
6223+
SDValue Mask =
6224+
DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
6225+
6226+
// Read back the sub-vector setting the remaining lanes to poison.
6227+
StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
6228+
return DAG.getMaskedLoad(
6229+
WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask,
6230+
DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
62066231
}
62076232

62086233
// We could try widening the input to the right length but for now, extract
@@ -6306,11 +6331,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
63066331
if (VT.isVector()) {
63076332
// If all else fails replace the load with a wide masked load.
63086333
SDLoc DL(N);
6309-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
6310-
6311-
SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
6312-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
6313-
DAG.getConstant(0, DL, IdxVT), Len);
6334+
SDValue Mask =
6335+
DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
63146336

63156337
SDValue NewLoad = DAG.getMaskedLoad(
63166338
WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -7447,9 +7469,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74477469
SDValue InVec = N->getOperand(0);
74487470

74497471
EVT OrigVT = SubVec.getValueType();
7450-
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
7451-
SubVec = GetWidenedVector(SubVec);
7452-
7472+
SubVec = GetWidenedVector(SubVec);
74537473
EVT SubVT = SubVec.getValueType();
74547474

74557475
// Whether or not all the elements of the widened SubVec will be inserted into
@@ -7471,17 +7491,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74717491
}
74727492
}
74737493

7494+
if (!IndicesValid)
7495+
report_fatal_error(
7496+
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7497+
74747498
SDLoc DL(N);
74757499

74767500
// We need to make sure that the indices are still valid, otherwise we might
74777501
// widen what was previously well-defined to something undefined.
7478-
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7502+
if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
74797503
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
74807504
N->getOperand(2));
74817505

7482-
if (!IndicesValid || OrigVT.isScalableVector())
7483-
report_fatal_error(
7484-
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7506+
if (OrigVT.isScalableVector()) {
7507+
// Fallback to inserting through memory.
7508+
7509+
Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
7510+
SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
7511+
auto &MF = DAG.getMachineFunction();
7512+
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7513+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
7514+
7515+
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
7516+
PtrInfo, MachineMemOperand::MOStore,
7517+
LocationSize::beforeOrAfterPointer(), Alignment);
7518+
MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
7519+
PtrInfo, MachineMemOperand::MOLoad,
7520+
LocationSize::beforeOrAfterPointer(), Alignment);
7521+
7522+
// Write out the vector being inserting into.
7523+
SDValue Ch =
7524+
DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
7525+
7526+
// Build a mask to match the length of the sub-vector.
7527+
SDValue Mask =
7528+
DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
7529+
7530+
// Overwrite the sub-vector at the required offset.
7531+
StackPtr =
7532+
TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7533+
Ch = DAG.getMaskedStore(Ch, DL, SubVec, StackPtr,
7534+
DAG.getUNDEF(StackPtr.getValueType()), Mask, VT,
7535+
StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
7536+
7537+
// Read back the result.
7538+
return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
7539+
}
74857540

74867541
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
74877542
// with a series of INSERT_VECTOR_ELT
@@ -7560,12 +7615,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
75607615
if (StVT.isVector()) {
75617616
// If all else fails replace the store with a wide masked store.
75627617
SDLoc DL(N);
7563-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7564-
75657618
SDValue WideStVal = GetWidenedVector(StVal);
7566-
SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
7567-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
7568-
DAG.getConstant(0, DL, IdxVT), Len);
7619+
SDValue Mask =
7620+
DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
75697621

75707622
return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
75717623
ST->getOffset(), Mask, ST->getMemoryVT(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2112,6 +2112,14 @@ SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
21122112
return getConstant(EC.getKnownMinValue(), DL, VT);
21132113
}
21142114

2115+
SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT,
2116+
ElementCount EC) {
2117+
EVT IdxVT = TLI->getVectorIdxTy(getDataLayout());
2118+
EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT);
2119+
return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT,
2120+
getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC));
2121+
}
2122+
21152123
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
21162124
APInt One(ResVT.getScalarSizeInBits(), 1);
21172125
return getStepVector(DL, ResVT, One);

0 commit comments

Comments
 (0)