@@ -9771,6 +9771,28 @@ void BoUpSLP::transformNodes() {
9771
9771
// Strided store is more profitable than reverse + consecutive store -
9772
9772
// transform the node to strided store.
9773
9773
E.State = TreeEntry::StridedVectorize;
9774
+ } else if (!E.ReorderIndices.empty()) {
9775
+ // Check for interleaved stores.
9776
+ auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
9777
+ auto *BaseSI = cast<StoreInst>(E.Scalars.front());
9778
+ assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
9779
+ if (Mask.size() < 4)
9780
+ return 0u;
9781
+ for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
9782
+ if (ShuffleVectorInst::isInterleaveMask(
9783
+ Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
9784
+ TTI.isLegalInterleavedAccessType(
9785
+ VecTy, Factor, BaseSI->getAlign(),
9786
+ BaseSI->getPointerAddressSpace()))
9787
+ return Factor;
9788
+ }
9789
+
9790
+ return 0u;
9791
+ };
9792
+ SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
9793
+ unsigned InterleaveFactor = IsInterleaveMask(Mask);
9794
+ if (InterleaveFactor != 0)
9795
+ E.setInterleave(InterleaveFactor);
9774
9796
}
9775
9797
break;
9776
9798
}
@@ -11441,10 +11463,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
11441
11463
} else {
11442
11464
assert(E->State == TreeEntry::Vectorize &&
11443
11465
"Expected either strided or consecutive stores.");
11444
- TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11445
- VecStCost = TTI->getMemoryOpCost(
11446
- Instruction::Store, VecTy, BaseSI->getAlign(),
11447
- BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11466
+ if (unsigned Factor = E->getInterleaveFactor()) {
11467
+ assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
11468
+ "No reused shuffles expected");
11469
+ CommonCost = 0;
11470
+ VecStCost = TTI->getInterleavedMemoryOpCost(
11471
+ Instruction::Store, VecTy, Factor, std::nullopt,
11472
+ BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
11473
+ } else {
11474
+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11475
+ VecStCost = TTI->getMemoryOpCost(
11476
+ Instruction::Store, VecTy, BaseSI->getAlign(),
11477
+ BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11478
+ }
11448
11479
}
11449
11480
return VecStCost + CommonCost;
11450
11481
};
0 commit comments