@@ -25100,26 +25100,26 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
2510025100
2510125101// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
2510225102// if the subvector can be sourced for free.
25103- static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
25103+ static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
2510425104 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25105- V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
25105+ V.getOperand(1).getValueType() == SubVT &&
25106+ V.getConstantOperandAPInt(2) == Index) {
2510625107 return V.getOperand(1);
2510725108 }
25108- auto *IndexC = dyn_cast<ConstantSDNode>(Index);
25109- if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
25109+ if (V.getOpcode() == ISD::CONCAT_VECTORS &&
2511025110 V.getOperand(0).getValueType() == SubVT &&
25111- (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
25112- uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
25111+ (Index % SubVT.getVectorMinNumElements()) == 0) {
25112+ uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
2511325113 return V.getOperand(SubIdx);
2511425114 }
2511525115 return SDValue();
2511625116}
2511725117
25118- static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
25118+ static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp,
25119+ unsigned Index, const SDLoc &DL,
2511925120 SelectionDAG &DAG,
2512025121 bool LegalOperations) {
2512125122 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25122- SDValue BinOp = Extract->getOperand(0);
2512325123 unsigned BinOpcode = BinOp.getOpcode();
2512425124 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
2512525125 return SDValue();
@@ -25128,9 +25128,6 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
2512825128 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
2512925129 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
2513025130 return SDValue();
25131-
25132- SDValue Index = Extract->getOperand(1);
25133- EVT SubVT = Extract->getValueType(0);
2513425131 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
2513525132 return SDValue();
2513625133
@@ -25146,29 +25143,26 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
2514625143 // We are inserting both operands of the wide binop only to extract back
2514725144 // to the narrow vector size. Eliminate all of the insert/extract:
2514825145 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25149- return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
25150- BinOp->getFlags());
25146+ return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
2515125147}
2515225148
2515325149/// If we are extracting a subvector produced by a wide binary operator try
2515425150/// to use a narrow binary operator and/or avoid concatenation and extraction.
25155- static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
25151+ static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25152+ const SDLoc &DL, SelectionDAG &DAG,
2515625153 bool LegalOperations) {
2515725154 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
2515825155 // some of these bailouts with other transforms.
2515925156
25160- if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
25157+ if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25158+ LegalOperations))
2516125159 return V;
2516225160
25163- // The extract index must be a constant, so we can map it to a concat operand.
25164- auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
25165- if (!ExtractIndexC)
25166- return SDValue();
2516725161
2516825162 // We are looking for an optionally bitcasted wide vector binary operator
2516925163 // feeding an extract subvector.
2517025164 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25171- SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0) );
25165+ SDValue BinOp = peekThroughBitcasts(Src );
2517225166 unsigned BOpcode = BinOp.getOpcode();
2517325167 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
2517425168 return SDValue();
@@ -25190,9 +25184,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
2519025184 if (!WideBVT.isFixedLengthVector())
2519125185 return SDValue();
2519225186
25193- EVT VT = Extract->getValueType(0);
25194- unsigned ExtractIndex = ExtractIndexC->getZExtValue();
25195- assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
25187+ assert((Index % VT.getVectorNumElements()) == 0 &&
2519625188 "Extract index is not a multiple of the vector length.");
2519725189
2519825190 // Bail out if this is not a proper multiple width extraction.
@@ -25219,12 +25211,11 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
2521925211 // for concat ops. The narrow binop alone makes this transform profitable.
2522025212 // We can't just reuse the original extract index operand because we may have
2522125213 // bitcasted.
25222- unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
25214+ unsigned ConcatOpNum = Index / VT.getVectorNumElements();
2522325215 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
2522425216 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25225- BinOp.hasOneUse() && Extract->getOperand(0) ->hasOneUse()) {
25217+ BinOp.hasOneUse() && Src ->hasOneUse()) {
2522625218 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25227- SDLoc DL(Extract);
2522825219 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
2522925220 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
2523025221 BinOp.getOperand(0), NewExtIndex);
@@ -25264,7 +25255,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
2526425255 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
2526525256 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
2526625257 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25267- SDLoc DL(Extract);
2526825258 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
2526925259 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
2527025260 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
@@ -25284,24 +25274,24 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
2528425274/// If we are extracting a subvector from a wide vector load, convert to a
2528525275/// narrow load to eliminate the extraction:
2528625276/// (extract_subvector (load wide vector)) --> (load narrow vector)
25287- static SDValue narrowExtractedVectorLoad(SDNode *Extract, const SDLoc &DL ,
25288- SelectionDAG &DAG) {
25277+ static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index ,
25278+ const SDLoc &DL, SelectionDAG &DAG) {
2528925279 // TODO: Add support for big-endian. The offset calculation must be adjusted.
2529025280 if (DAG.getDataLayout().isBigEndian())
2529125281 return SDValue();
2529225282
25293- auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0) );
25283+ auto *Ld = dyn_cast<LoadSDNode>(Src );
2529425284 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
2529525285 return SDValue();
2529625286
25297- // Allow targets to opt-out.
25298- EVT VT = Extract->getValueType(0);
25299-
2530025287 // We can only create byte sized loads.
2530125288 if (!VT.isByteSized())
2530225289 return SDValue();
2530325290
25304- unsigned Index = Extract->getConstantOperandVal(1);
25291+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25292+ if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
25293+ return SDValue();
25294+
2530525295 unsigned NumElts = VT.getVectorMinNumElements();
2530625296 // A fixed length vector being extracted from a scalable vector
2530725297 // may not be any *smaller* than the scalable one.
@@ -25319,7 +25309,6 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, const SDLoc &DL,
2531925309 if (Offset.isFixed())
2532025310 ByteOffset = Offset.getFixedValue();
2532125311
25322- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2532325312 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
2532425313 return SDValue();
2532525314
@@ -25350,23 +25339,18 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, const SDLoc &DL,
2535025339/// iff it is legal and profitable to do so. Notably, the trimmed mask
2535125340/// (containing only the elements that are extracted)
2535225341/// must reference at most two subvectors.
25353- static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
25342+ static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src,
25343+ unsigned Index,
25344+ const SDLoc &DL,
2535425345 SelectionDAG &DAG,
25355- const TargetLowering &TLI,
2535625346 bool LegalOperations) {
25357- assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25358- "Must only be called on EXTRACT_SUBVECTOR's");
25359-
25360- SDValue N0 = N->getOperand(0);
25361-
2536225347 // Only deal with non-scalable vectors.
25363- EVT NarrowVT = N->getValueType(0);
25364- EVT WideVT = N0.getValueType();
25348+ EVT WideVT = Src.getValueType();
2536525349 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
2536625350 return SDValue();
2536725351
2536825352 // The operand must be a shufflevector.
25369- auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0 );
25353+ auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src );
2537025354 if (!WideShuffleVector)
2537125355 return SDValue();
2537225356
@@ -25375,13 +25359,13 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
2537525359 return SDValue();
2537625360
2537725361 // And the narrow shufflevector that we'll form must be legal.
25362+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2537825363 if (LegalOperations &&
2537925364 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
2538025365 return SDValue();
2538125366
25382- uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
2538325367 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25384- assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
25368+ assert((Index % NumEltsExtracted) == 0 &&
2538525369 "Extract index is not a multiple of the output vector length.");
2538625370
2538725371 int WideNumElts = WideVT.getVectorNumElements();
@@ -25392,8 +25376,7 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
2539225376 DemandedSubvectors;
2539325377
2539425378 // Try to decode the wide mask into narrow mask from at most two subvectors.
25395- for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25396- NumEltsExtracted)) {
25379+ for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
2539725380 assert((M >= -1) && (M < (2 * WideNumElts)) &&
2539825381 "Out-of-bounds shuffle mask?");
2539925382
@@ -25476,8 +25459,6 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
2547625459 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
2547725460 return SDValue();
2547825461
25479- SDLoc DL(N);
25480-
2548125462 SmallVector<SDValue, 2> NewOps;
2548225463 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
2548325464 &DemandedSubvector : DemandedSubvectors) {
@@ -25507,9 +25488,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2550725488 if (V.isUndef())
2550825489 return DAG.getUNDEF(NVT);
2550925490
25510- if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
25511- if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DL, DAG))
25512- return NarrowLoad;
25491+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
25492+ return NarrowLoad;
2551325493
2551425494 // Combine an extract of an extract into a single extract_subvector.
2551525495 // ext (ext X, C), 0 --> ext X, C
@@ -25631,9 +25611,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2563125611 }
2563225612 }
2563325613
25634- if (SDValue V =
25635- foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
25636- return V;
25614+ if (SDValue Shuffle = foldExtractSubvectorFromShuffleVector(
25615+ NVT, V, ExtIdx, DL, DAG, LegalOperations))
25616+ return Shuffle;
25617+
25618+ if (SDValue NarrowBOp =
25619+ narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
25620+ return NarrowBOp;
2563725621
2563825622 V = peekThroughBitcasts(V);
2563925623
@@ -25694,9 +25678,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2569425678 }
2569525679 }
2569625680
25697- if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
25698- return NarrowBOp;
25699-
2570025681 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
2570125682 // simplify it based on the (valid) extractions.
2570225683 if (!V.getValueType().isScalableVector() &&
0 commit comments