@@ -5170,26 +5170,45 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
51705170 return false ;
51715171 }
51725172 case Instruction::Mul: {
5173+ auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
5174+ auto *Ty = cast<VectorType>(V->getType ());
5175+ // For SVE the lane-indexing is within 128-bits, so we can't fold splats.
5176+ if (Ty->isScalableTy ())
5177+ return false ;
5178+
5179+ // Indexed variants of Mul exist for i16 and i32 element types only.
5180+ return Ty->getScalarSizeInBits () == 16 || Ty->getScalarSizeInBits () == 32 ;
5181+ };
5182+
51735183 int NumZExts = 0 , NumSExts = 0 ;
51745184 for (auto &Op : I->operands ()) {
51755185 // Make sure we are not already sinking this operand
51765186 if (any_of (Ops, [&](Use *U) { return U->get () == Op; }))
51775187 continue ;
51785188
5179- if (match (&Op, m_SExt (m_Value ()))) {
5180- NumSExts++;
5181- continue ;
5182- } else if (match (&Op, m_ZExt (m_Value ()))) {
5183- NumZExts++;
5189+ if (match (&Op, m_ZExtOrSExt (m_Value ()))) {
5190+ auto *Ext = cast<Instruction>(Op);
5191+ auto *ExtOp = Ext->getOperand (0 );
5192+ if (isSplatShuffle (ExtOp) && ShouldSinkSplatForIndexedVariant (ExtOp))
5193+ Ops.push_back (&Ext->getOperandUse (0 ));
5194+ Ops.push_back (&Op);
5195+
5196+ if (isa<SExtInst>(Ext))
5197+ NumSExts++;
5198+ else
5199+ NumZExts++;
5200+
51845201 continue ;
51855202 }
51865203
51875204 ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
5205+ if (!Shuffle)
5206+ continue ;
51885207
51895208 // If the Shuffle is a splat and the operand is a zext/sext, sinking the
51905209 // operand and the s/zext can help create indexed s/umull. This is
51915210 // especially useful to prevent i64 mul being scalarized.
5192- if (Shuffle && isSplatShuffle (Shuffle) &&
5211+ if (isSplatShuffle (Shuffle) &&
51935212 match (Shuffle->getOperand (0 ), m_ZExtOrSExt (m_Value ()))) {
51945213 Ops.push_back (&Shuffle->getOperandUse (0 ));
51955214 Ops.push_back (&Op);
@@ -5200,9 +5219,6 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
52005219 continue ;
52015220 }
52025221
5203- if (!Shuffle)
5204- continue ;
5205-
52065222 Value *ShuffleOperand = Shuffle->getOperand (0 );
52075223 InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
52085224 if (!Insert)
@@ -5234,12 +5250,26 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
52345250 NumZExts++;
52355251 }
52365252
5253+ Ops.push_back (&Insert->getOperandUse (1 ));
52375254 Ops.push_back (&Shuffle->getOperandUse (0 ));
52385255 Ops.push_back (&Op);
52395256 }
52405257
5241- // Is it profitable to sink if we found two of the same type of extends.
5242- return !Ops.empty () && (NumSExts == 2 || NumZExts == 2 );
5258+ // It is profitable to sink if we found two of the same type of extends.
5259+ if (!Ops.empty () && (NumSExts == 2 || NumZExts == 2 ))
5260+ return true ;
5261+
5262+ // Otherwise, see if we should sink splats for indexed variants.
5263+ if (!ShouldSinkSplatForIndexedVariant (I))
5264+ return false ;
5265+
5266+ Ops.clear ();
5267+ if (isSplatShuffle (I->getOperand (0 )))
5268+ Ops.push_back (&I->getOperandUse (0 ));
5269+ if (isSplatShuffle (I->getOperand (1 )))
5270+ Ops.push_back (&I->getOperandUse (1 ));
5271+
5272+ return !Ops.empty ();
52435273 }
52445274 case Instruction::FMul: {
52455275 // For SVE the lane-indexing is within 128-bits, so we can't fold splats.
0 commit comments