Skip to content

Commit 6c8f100

Browse files
[LLVM][CodeGen] Remove failure cases when widening EXTRACT/INSERT_SUBVECTOR.
This PR implements catch all handling for widening the scalable subvector operand (INSERT_SUBVECTOR) or result (EXTRACT_SUBVECTOR). It does this via the stack using masked memory operations. With general handling available we can add optimiations for specific cases.
1 parent c7a9be8 commit 6c8f100

File tree

5 files changed

+518
-126
lines changed

5 files changed

+518
-126
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,12 @@ class SelectionDAG {
11901190
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
11911191
bool ConstantFold = true);
11921192

1193+
/// Return a vector with the first 'Len' lanes set to true and remaining lanes
1194+
/// set to false. The mask's ValueType is the same as when comparing vectors
1195+
/// of type VT.
1196+
LLVM_ABI SDValue getMaskFromElementCount(const SDLoc &DL, EVT VT,
1197+
ElementCount Len);
1198+
11931199
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
11941200
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
11951201
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 71 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6218,8 +6218,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
62186218
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
62196219
}
62206220

6221-
report_fatal_error("Don't know how to widen the result of "
6222-
"EXTRACT_SUBVECTOR for scalable vectors");
6221+
// Fallback to extracting through memory.
6222+
6223+
Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
6224+
SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
6225+
auto &MF = DAG.getMachineFunction();
6226+
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6227+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
6228+
6229+
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
6230+
PtrInfo, MachineMemOperand::MOStore,
6231+
LocationSize::beforeOrAfterPointer(), Alignment);
6232+
MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
6233+
PtrInfo, MachineMemOperand::MOLoad,
6234+
LocationSize::beforeOrAfterPointer(), Alignment);
6235+
6236+
// Write out the input vector.
6237+
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
6238+
6239+
// Build a mask to match the length of the non-widened result.
6240+
SDValue Mask =
6241+
DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
6242+
6243+
// Read back the sub-vector setting the remaining lanes to poison.
6244+
StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
6245+
return DAG.getMaskedLoad(
6246+
WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask,
6247+
DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
62236248
}
62246249

62256250
// We could try widening the input to the right length but for now, extract
@@ -6323,11 +6348,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
63236348
if (VT.isVector()) {
63246349
// If all else fails replace the load with a wide masked load.
63256350
SDLoc DL(N);
6326-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
6327-
6328-
SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
6329-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
6330-
DAG.getConstant(0, DL, IdxVT), Len);
6351+
SDValue Mask =
6352+
DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
63316353

63326354
SDValue NewLoad = DAG.getMaskedLoad(
63336355
WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -7464,9 +7486,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74647486
SDValue InVec = N->getOperand(0);
74657487

74667488
EVT OrigVT = SubVec.getValueType();
7467-
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
7468-
SubVec = GetWidenedVector(SubVec);
7469-
7489+
SubVec = GetWidenedVector(SubVec);
74707490
EVT SubVT = SubVec.getValueType();
74717491

74727492
// Whether or not all the elements of the widened SubVec will be inserted into
@@ -7488,17 +7508,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
74887508
}
74897509
}
74907510

7511+
if (!IndicesValid)
7512+
report_fatal_error(
7513+
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7514+
74917515
SDLoc DL(N);
74927516

74937517
// We need to make sure that the indices are still valid, otherwise we might
74947518
// widen what was previously well-defined to something undefined.
7495-
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7519+
if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
74967520
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
74977521
N->getOperand(2));
74987522

7499-
if (!IndicesValid || OrigVT.isScalableVector())
7500-
report_fatal_error(
7501-
"Don't know how to widen the operands for INSERT_SUBVECTOR");
7523+
if (OrigVT.isScalableVector()) {
7524+
// Fallback to inserting through memory.
7525+
7526+
Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
7527+
SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
7528+
auto &MF = DAG.getMachineFunction();
7529+
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7530+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
7531+
7532+
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
7533+
PtrInfo, MachineMemOperand::MOStore,
7534+
LocationSize::beforeOrAfterPointer(), Alignment);
7535+
MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
7536+
PtrInfo, MachineMemOperand::MOLoad,
7537+
LocationSize::beforeOrAfterPointer(), Alignment);
7538+
7539+
// Write out the vector being inserting into.
7540+
SDValue Ch =
7541+
DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
7542+
7543+
// Build a mask to match the length of the sub-vector.
7544+
SDValue Mask =
7545+
DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
7546+
7547+
// Overwrite the sub-vector at the required offset.
7548+
StackPtr =
7549+
TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7550+
Ch = DAG.getMaskedStore(Ch, DL, SubVec, StackPtr,
7551+
DAG.getUNDEF(StackPtr.getValueType()), Mask, VT,
7552+
StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
7553+
7554+
// Read back the result.
7555+
return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
7556+
}
75027557

75037558
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
75047559
// with a series of INSERT_VECTOR_ELT
@@ -7577,12 +7632,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
75777632
if (StVT.isVector()) {
75787633
// If all else fails replace the store with a wide masked store.
75797634
SDLoc DL(N);
7580-
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7581-
75827635
SDValue WideStVal = GetWidenedVector(StVal);
7583-
SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
7584-
SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
7585-
DAG.getConstant(0, DL, IdxVT), Len);
7636+
SDValue Mask =
7637+
DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
75867638

75877639
return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
75887640
ST->getOffset(), Mask, ST->getMemoryVT(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,14 @@ SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
21112111
return getConstant(EC.getKnownMinValue(), DL, VT);
21122112
}
21132113

2114+
SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT,
2115+
ElementCount EC) {
2116+
EVT IdxVT = TLI->getVectorIdxTy(getDataLayout());
2117+
EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT);
2118+
return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT,
2119+
getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC));
2120+
}
2121+
21142122
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
21152123
APInt One(ResVT.getScalarSizeInBits(), 1);
21162124
return getStepVector(DL, ResVT, One);

0 commit comments

Comments
 (0)