@@ -9771,6 +9771,28 @@ void BoUpSLP::transformNodes() {
97719771 // Strided store is more profitable than reverse + consecutive store -
97729772 // transform the node to strided store.
97739773 E.State = TreeEntry::StridedVectorize;
9774+ } else if (!E.ReorderIndices.empty()) {
9775+ // Check for interleaved stores.
9776+ auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
9777+ auto *BaseSI = cast<StoreInst>(E.Scalars.front());
9778+ assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
9779+ if (Mask.size() < 4)
9780+ return 0u;
9781+ for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
9782+ if (ShuffleVectorInst::isInterleaveMask(
9783+ Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
9784+ TTI.isLegalInterleavedAccessType(
9785+ VecTy, Factor, BaseSI->getAlign(),
9786+ BaseSI->getPointerAddressSpace()))
9787+ return Factor;
9788+ }
9789+
9790+ return 0u;
9791+ };
9792+ SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
9793+ unsigned InterleaveFactor = IsInterleaveMask(Mask);
9794+ if (InterleaveFactor != 0)
9795+ E.setInterleave(InterleaveFactor);
97749796 }
97759797 break;
97769798 }
@@ -11441,10 +11463,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1144111463 } else {
1144211464 assert(E->State == TreeEntry::Vectorize &&
1144311465 "Expected either strided or consecutive stores.");
11444- TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11445- VecStCost = TTI->getMemoryOpCost(
11446- Instruction::Store, VecTy, BaseSI->getAlign(),
11447- BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11466+ if (unsigned Factor = E->getInterleaveFactor()) {
11467+ assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
11468+ "No reused shuffles expected");
11469+ CommonCost = 0;
11470+ VecStCost = TTI->getInterleavedMemoryOpCost(
11471+ Instruction::Store, VecTy, Factor, std::nullopt,
11472+ BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
11473+ } else {
11474+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11475+ VecStCost = TTI->getMemoryOpCost(
11476+ Instruction::Store, VecTy, BaseSI->getAlign(),
11477+ BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11478+ }
1144811479 }
1144911480 return VecStCost + CommonCost;
1145011481 };
0 commit comments