@@ -11316,44 +11316,91 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1131611316 VL, *this, TryCopyableElementsVectorization,
1131711317 /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
1131811318
11319+ bool AreScatterAllGEPSameBlock = false;
11320+ if (!S) {
11321+ SmallVector<unsigned> SortedIndices;
11322+ BasicBlock *BB = nullptr;
11323+ bool IsScatterVectorizeUserTE =
11324+ UserTreeIdx.UserTE &&
11325+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11326+ AreScatterAllGEPSameBlock =
11327+ (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11328+ VL.size() > 2 &&
11329+ all_of(VL,
11330+ [&BB](Value *V) {
11331+ auto *I = dyn_cast<GetElementPtrInst>(V);
11332+ if (!I)
11333+ return doesNotNeedToBeScheduled(V);
11334+ if (!BB)
11335+ BB = I->getParent();
11336+ return BB == I->getParent() && I->getNumOperands() == 2;
11337+ }) &&
11338+ BB &&
11339+ sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL,
11340+ *SE, SortedIndices));
11341+ if (!AreScatterAllGEPSameBlock) {
11342+ LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11343+ "C,S,B,O, small shuffle. \n";
11344+ dbgs() << "[";
11345+ interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11346+ dbgs() << "]\n");
11347+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11348+ /*TryToFindDuplicates=*/true,
11349+ /*TrySplitVectorize=*/true);
11350+ }
11351+ // Reset S to make it GetElementPtr kind of node.
11352+ const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11353+ assert(It != VL.end() && "Expected at least one GEP.");
11354+ S = getSameOpcode(*It, *TLI);
11355+ }
11356+ assert(S && "Must be valid.");
11357+
11358+ // Don't handle vectors.
11359+ if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11360+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11361+ // Do not try to pack to avoid extra instructions here.
11362+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11363+ /*TryToFindDuplicates=*/false);
11364+ }
11365+
11366+ // Check that all of the users of the scalars that we want to vectorize are
11367+ // schedulable.
11368+ BasicBlock *BB = S.getMainOp()->getParent();
11369+
11370+ if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11371+ !DT->isReachableFromEntry(BB)) {
11372+ // Don't go into unreachable blocks. They may contain instructions with
11373+ // dependency cycles which confuse the final scheduling.
11374+ // Do not vectorize EH and non-returning blocks, not profitable in most
11375+ // cases.
11376+ LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11377+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11378+ }
11379+
1131911380 // Don't go into catchswitch blocks, which can happen with PHIs.
1132011381 // Such blocks can only have PHIs and the catchswitch. There is no
1132111382 // place to insert a shuffle if we need to, so just avoid that issue.
11322- if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent() ->getTerminator())) {
11383+ if (isa<CatchSwitchInst>(BB ->getTerminator())) {
1132311384 LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
1132411385 // Do not try to pack to avoid extra instructions here.
1132511386 return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
1132611387 /*TryToFindDuplicates=*/false);
1132711388 }
1132811389
11329- // Check if this is a duplicate of another entry.
11330- if (S) {
11331- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11332- for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11333- if (E->isSame(VL)) {
11334- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11335- << ".\n");
11336- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11337- }
11338- SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11339- if (all_of(VL, [&](Value *V) {
11340- return isa<PoisonValue>(V) || Values.contains(V) ||
11341- (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11342- LI->getLoopFor(S.getMainOp()->getParent()) &&
11343- isVectorized(V));
11344- })) {
11345- LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11346- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11347- }
11348- }
11390+ // Don't handle scalable vectors
11391+ if (S.getOpcode() == Instruction::ExtractElement &&
11392+ isa<ScalableVectorType>(
11393+ cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11394+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11395+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1134911396 }
1135011397
1135111398 // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
1135211399 // a load), in which case peek through to include it in the tree, without
1135311400 // ballooning over-budget.
1135411401 if (Depth >= RecursionMaxDepth &&
11355- !(S && !S .isAltShuffle() && VL.size() >= 4 &&
11356- (match(S.getMainOp(), m_Load(m_Value())) ||
11402+ (S .isAltShuffle() || VL.size() < 4 ||
11403+ ! (match(S.getMainOp(), m_Load(m_Value())) ||
1135711404 all_of(VL, [&S](const Value *I) {
1135811405 return match(I,
1135911406 m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
@@ -11363,20 +11410,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1136311410 return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1136411411 }
1136511412
11366- // Don't handle scalable vectors
11367- if (S && S.getOpcode() == Instruction::ExtractElement &&
11368- isa<ScalableVectorType>(
11369- cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11370- LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11371- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11372- }
11373-
11374- // Don't handle vectors.
11375- if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11376- LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11377- // Do not try to pack to avoid extra instructions here.
11378- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11379- /*TryToFindDuplicates=*/false);
11413+ // Check if this is a duplicate of another entry.
11414+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11415+ for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11416+ if (E->isSame(VL)) {
11417+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11418+ << ".\n");
11419+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11420+ }
11421+ SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11422+ if (all_of(VL, [&](Value *V) {
11423+ return isa<PoisonValue>(V) || Values.contains(V) ||
11424+ (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11425+ LI->getLoopFor(S.getMainOp()->getParent()) &&
11426+ isVectorized(V));
11427+ })) {
11428+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11429+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11430+ }
1138011431 }
1138111432
1138211433 // If all of the operands are identical or constant we have a simple solution.
@@ -11435,44 +11486,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1143511486 }
1143611487 return true;
1143711488 };
11438- SmallVector<unsigned> SortedIndices;
11439- BasicBlock *BB = nullptr;
11440- bool IsScatterVectorizeUserTE =
11441- UserTreeIdx.UserTE &&
11442- UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11443- bool AreAllSameBlock = S.valid();
11444- bool AreScatterAllGEPSameBlock =
11445- (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11446- VL.size() > 2 &&
11447- all_of(VL,
11448- [&BB](Value *V) {
11449- auto *I = dyn_cast<GetElementPtrInst>(V);
11450- if (!I)
11451- return doesNotNeedToBeScheduled(V);
11452- if (!BB)
11453- BB = I->getParent();
11454- return BB == I->getParent() && I->getNumOperands() == 2;
11455- }) &&
11456- BB &&
11457- sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
11458- SortedIndices));
11489+ bool AreAllSameBlock = !AreScatterAllGEPSameBlock;
1145911490 bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
11460- if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
11461- (S &&
11462- isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
11491+ if (!AreAllSameInsts || isSplat(VL) ||
11492+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
1146311493 S.getMainOp()) &&
1146411494 !all_of(VL, isVectorLikeInstWithConstOps)) ||
1146511495 NotProfitableForVectorization(VL)) {
11466- if (!S) {
11467- LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11468- "C,S,B,O, small shuffle. \n";
11469- dbgs() << "[";
11470- interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11471- dbgs() << "]\n");
11472- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11473- /*TryToFindDuplicates=*/true,
11474- /*TrySplitVectorize=*/true);
11475- }
1147611496 LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
1147711497 dbgs() << "[";
1147811498 interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
@@ -11481,7 +11501,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1148111501 }
1148211502
1148311503 // Don't vectorize ephemeral values.
11484- if (S && !EphValues.empty()) {
11504+ if (!EphValues.empty()) {
1148511505 for (Value *V : VL) {
1148611506 if (EphValues.count(V)) {
1148711507 LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -11499,7 +11519,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1149911519 // Check that none of the instructions in the bundle are already in the tree
1150011520 // and the node may be not profitable for the vectorization as the small
1150111521 // alternate node.
11502- if (S && S .isAltShuffle()) {
11522+ if (S.isAltShuffle()) {
1150311523 auto GetNumVectorizedExtracted = [&]() {
1150411524 APInt Extracted = APInt::getZero(VL.size());
1150511525 APInt Vectorized = APInt::getAllOnes(VL.size());
@@ -11551,33 +11571,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1155111571 }
1155211572 }
1155311573
11554- // Special processing for sorted pointers for ScatterVectorize node with
11555- // constant indeces only.
11556- if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
11557- assert(VL.front()->getType()->isPointerTy() &&
11558- count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
11559- "Expected pointers only.");
11560- // Reset S to make it GetElementPtr kind of node.
11561- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11562- assert(It != VL.end() && "Expected at least one GEP.");
11563- S = getSameOpcode(*It, *TLI);
11564- }
11565-
11566- // Check that all of the users of the scalars that we want to vectorize are
11567- // schedulable.
11568- Instruction *VL0 = S.getMainOp();
11569- BB = VL0->getParent();
11570-
11571- if (S &&
11572- (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11573- !DT->isReachableFromEntry(BB))) {
11574- // Don't go into unreachable blocks. They may contain instructions with
11575- // dependency cycles which confuse the final scheduling.
11576- // Do not vectorize EH and non-returning blocks, not profitable in most
11577- // cases.
11578- LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11579- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11580- }
1158111574 return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
1158211575}
1158311576
0 commit comments