@@ -13445,14 +13445,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1344513445 for_each(SubMask, [&](int &Idx) {
1344613446 if (Idx == PoisonMaskElem)
1344713447 return;
13448- Idx = (Idx % VF) - (MinElement % VF) +
13448+ Idx = (( Idx % VF) - ((( MinElement % VF) / NewVF) * NewVF)) % NewVF +
1344913449 (Idx >= static_cast<int>(VF) ? NewVF : 0);
1345013450 });
13451- VF = NewVF;
13451+ } else {
13452+ NewVF = VF;
1345213453 }
1345313454
1345413455 constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13455- auto *VecTy = getWidenedType(VL.front()->getType(), VF );
13456+ auto *VecTy = getWidenedType(VL.front()->getType(), NewVF );
1345613457 auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
1345713458 auto GetShuffleCost = [&,
1345813459 &TTI = *TTI](ArrayRef<int> Mask,
@@ -13477,7 +13478,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1347713478 APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1347813479 bool IsIdentity = true;
1347913480 for (auto [I, Idx] : enumerate(FirstMask)) {
13480- if (Idx >= static_cast<int>(VF )) {
13481+ if (Idx >= static_cast<int>(NewVF )) {
1348113482 Idx = PoisonMaskElem;
1348213483 } else {
1348313484 DemandedElts.clearBit(I);
@@ -13500,12 +13501,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1350013501 APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1350113502 bool IsIdentity = true;
1350213503 for (auto [I, Idx] : enumerate(SecondMask)) {
13503- if (Idx < static_cast<int>(VF ) && Idx >= 0) {
13504+ if (Idx < static_cast<int>(NewVF ) && Idx >= 0) {
1350413505 Idx = PoisonMaskElem;
1350513506 } else {
1350613507 DemandedElts.clearBit(I);
1350713508 if (Idx != PoisonMaskElem) {
13508- Idx -= VF ;
13509+ Idx -= NewVF ;
1350913510 IsIdentity &= static_cast<int>(I) == Idx;
1351013511 }
1351113512 }
@@ -13525,12 +13526,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1352513526 /*Extract=*/false, CostKind);
1352613527 const TreeEntry *BestEntry = nullptr;
1352713528 if (FirstShuffleCost < ShuffleCost) {
13528- copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13529+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13530+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13531+ [&](int &Idx) {
13532+ if (Idx >= static_cast<int>(VF))
13533+ Idx = PoisonMaskElem;
13534+ });
1352913535 BestEntry = Entries.front();
1353013536 ShuffleCost = FirstShuffleCost;
1353113537 }
1353213538 if (SecondShuffleCost < ShuffleCost) {
13533- copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13539+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13540+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13541+ [&](int &Idx) {
13542+ if (Idx < static_cast<int>(VF))
13543+ Idx = PoisonMaskElem;
13544+ else
13545+ Idx -= VF;
13546+ });
1353413547 BestEntry = Entries[1];
1353513548 ShuffleCost = SecondShuffleCost;
1353613549 }
0 commit comments