@@ -15347,13 +15347,14 @@ BoUpSLP::isGatherShuffledEntry(
1534715347
1534815348InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1534915349 Type *ScalarTy) const {
15350- auto *VecTy = getWidenedType(ScalarTy, VL.size());
15350+ const unsigned VF = VL.size();
15351+ auto *VecTy = getWidenedType(ScalarTy, VF);
1535115352 bool DuplicateNonConst = false;
1535215353 // Find the cost of inserting/extracting values from the vector.
1535315354 // Check if the same elements are inserted several times and count them as
1535415355 // shuffle candidates.
15355- APInt ShuffledElements = APInt::getZero(VL.size() );
15356- APInt DemandedElements = APInt::getZero(VL.size() );
15356+ APInt ShuffledElements = APInt::getZero(VF );
15357+ APInt DemandedElements = APInt::getZero(VF );
1535715358 DenseMap<Value *, unsigned> UniqueElements;
1535815359 constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1535915360 InstructionCost Cost;
@@ -15363,11 +15364,10 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1536315364 Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
1536415365 TTI::CastContextHint::None, CostKind);
1536515366 };
15366- SmallVector<int> ShuffleMask(VL.size() , PoisonMaskElem);
15367- SmallVector<int> ConstantShuffleMask(VL.size() , PoisonMaskElem);
15367+ SmallVector<int> ShuffleMask(VF , PoisonMaskElem);
15368+ SmallVector<int> ConstantShuffleMask(VF , PoisonMaskElem);
1536815369 std::iota(ConstantShuffleMask.begin(), ConstantShuffleMask.end(), 0);
15369- for (unsigned I = 0, E = VL.size(); I < E; ++I) {
15370- Value *V = VL[I];
15370+ for (auto [I, V] : enumerate(VL)) {
1537115371 // No need to shuffle duplicates for constants.
1537215372 if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
1537315373 ShuffledElements.setBit(I);
@@ -15376,7 +15376,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1537615376 }
1537715377
1537815378 if (isConstant(V)) {
15379- ConstantShuffleMask[I] = I + E ;
15379+ ConstantShuffleMask[I] = I + VF ;
1538015380 ShuffleMask[I] = I;
1538115381 continue;
1538215382 }
@@ -15398,12 +15398,15 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1539815398 if (!ForPoisonSrc && IsAnyNonUndefConst) {
1539915399 Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteTwoSrc, VecTy,
1540015400 ConstantShuffleMask);
15401- for (auto [Idx, I] : enumerate(ShuffleMask)) {
15402- if (I == PoisonMaskElem)
15403- I = Idx;
15404- else
15405- I += VL.size();
15406- }
15401+ // Update the shuffle mask for shuffling with incoming source (all elements
15402+ // are used!) or with constant subvector.
15403+ for_each(enumerate(ShuffleMask), [&](auto P) {
15404+ if ((!ForPoisonSrc && P.value() == PoisonMaskElem) ||
15405+ ConstantShuffleMask[P.index()] != PoisonMaskElem)
15406+ P.value() = P.index();
15407+ else if (P.value() != PoisonMaskElem)
15408+ P.value() += VF;
15409+ });
1540715410 }
1540815411
1540915412 // 2. Insert unique non-constants.
@@ -15415,7 +15418,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1541515418 // 3. Shuffle duplicates.
1541615419 if (DuplicateNonConst)
1541715420 Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
15418- VecTy, ShuffleMask);
15421+ VecTy, ShuffleMask, CostKind );
1541915422 return Cost;
1542015423}
1542115424
0 commit comments