@@ -13443,14 +13443,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1344313443 for_each(SubMask, [&](int &Idx) {
1344413444 if (Idx == PoisonMaskElem)
1344513445 return;
13446- Idx = (Idx % VF) - (MinElement % VF ) +
13446+ Idx = (Idx % VF) - (( MinElement / NewVF) * NewVF ) +
1344713447 (Idx >= static_cast<int>(VF) ? NewVF : 0);
1344813448 });
13449- VF = NewVF;
13449+ } else {
13450+ NewVF = VF;
1345013451 }
1345113452
1345213453 constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13453- auto *VecTy = getWidenedType(VL.front()->getType(), VF );
13454+ auto *VecTy = getWidenedType(VL.front()->getType(), NewVF );
1345413455 auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
1345513456 auto GetShuffleCost = [&,
1345613457 &TTI = *TTI](ArrayRef<int> Mask,
@@ -13475,7 +13476,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1347513476 APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1347613477 bool IsIdentity = true;
1347713478 for (auto [I, Idx] : enumerate(FirstMask)) {
13478- if (Idx >= static_cast<int>(VF )) {
13479+ if (Idx >= static_cast<int>(NewVF )) {
1347913480 Idx = PoisonMaskElem;
1348013481 } else {
1348113482 DemandedElts.clearBit(I);
@@ -13498,12 +13499,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1349813499 APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1349913500 bool IsIdentity = true;
1350013501 for (auto [I, Idx] : enumerate(SecondMask)) {
13501- if (Idx < static_cast<int>(VF ) && Idx >= 0) {
13502+ if (Idx < static_cast<int>(NewVF ) && Idx >= 0) {
1350213503 Idx = PoisonMaskElem;
1350313504 } else {
1350413505 DemandedElts.clearBit(I);
1350513506 if (Idx != PoisonMaskElem) {
13506- Idx -= VF ;
13507+ Idx -= NewVF ;
1350713508 IsIdentity &= static_cast<int>(I) == Idx;
1350813509 }
1350913510 }
@@ -13523,12 +13524,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1352313524 /*Extract=*/false, CostKind);
1352413525 const TreeEntry *BestEntry = nullptr;
1352513526 if (FirstShuffleCost < ShuffleCost) {
13526- copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13527+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13528+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13529+ [&](int &Idx) {
13530+ if (Idx >= static_cast<int>(VF))
13531+ Idx = PoisonMaskElem;
13532+ });
1352713533 BestEntry = Entries.front();
1352813534 ShuffleCost = FirstShuffleCost;
1352913535 }
1353013536 if (SecondShuffleCost < ShuffleCost) {
13531- copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13537+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13538+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13539+ [&](int &Idx) {
13540+ if (Idx < static_cast<int>(VF))
13541+ Idx = PoisonMaskElem;
13542+ else
13543+ Idx -= VF;
13544+ });
1353213545 BestEntry = Entries[1];
1353313546 ShuffleCost = SecondShuffleCost;
1353413547 }
0 commit comments