@@ -11315,44 +11315,90 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1131511315 VL, *this, TryCopyableElementsVectorization,
1131611316 /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
1131711317
11318+ bool AreScatterAllGEPSameBlock = false;
11319+ if (!S) {
11320+ SmallVector<unsigned> SortedIndices;
11321+ BasicBlock *BB = nullptr;
11322+ bool IsScatterVectorizeUserTE =
11323+ UserTreeIdx.UserTE &&
11324+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11325+ AreScatterAllGEPSameBlock =
11326+ (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11327+ VL.size() > 2 &&
11328+ all_of(VL,
11329+ [&BB](Value *V) {
11330+ auto *I = dyn_cast<GetElementPtrInst>(V);
11331+ if (!I)
11332+ return doesNotNeedToBeScheduled(V);
11333+ if (!BB)
11334+ BB = I->getParent();
11335+ return BB == I->getParent() && I->getNumOperands() == 2;
11336+ }) &&
11337+ BB &&
11338+ sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL,
11339+ *SE, SortedIndices));
11340+ if (!AreScatterAllGEPSameBlock) {
11341+ LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11342+ "C,S,B,O, small shuffle. \n";
11343+ dbgs() << "[";
11344+ interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11345+ dbgs() << "]\n");
11346+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11347+ /*TryToFindDuplicates=*/true,
11348+ /*TrySplitVectorize=*/true);
11349+ }
11350+ // Reset S to make it GetElementPtr kind of node.
11351+ const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11352+ assert(It != VL.end() && "Expected at least one GEP.");
11353+ S = getSameOpcode(*It, *TLI);
11354+ }
11355+
11356+ // Don't handle vectors.
11357+ if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11358+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11359+ // Do not try to pack to avoid extra instructions here.
11360+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11361+ /*TryToFindDuplicates=*/false);
11362+ }
11363+
11364+ // Check that all of the users of the scalars that we want to vectorize are
11365+ // schedulable.
11366+ BasicBlock *BB = S.getMainOp()->getParent();
11367+
11368+ if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11369+ !DT->isReachableFromEntry(BB)) {
11370+ // Don't go into unreachable blocks. They may contain instructions with
11371+ // dependency cycles which confuse the final scheduling.
11372+ // Do not vectorize EH and non-returning blocks, not profitable in most
11373+ // cases.
11374+ LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11375+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11376+ }
11377+
1131811378 // Don't go into catchswitch blocks, which can happen with PHIs.
1131911379 // Such blocks can only have PHIs and the catchswitch. There is no
1132011380 // place to insert a shuffle if we need to, so just avoid that issue.
11321- if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent() ->getTerminator())) {
11381+ if (isa<CatchSwitchInst>(BB ->getTerminator())) {
1132211382 LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
1132311383 // Do not try to pack to avoid extra instructions here.
1132411384 return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
1132511385 /*TryToFindDuplicates=*/false);
1132611386 }
1132711387
11328- // Check if this is a duplicate of another entry.
11329- if (S) {
11330- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11331- for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11332- if (E->isSame(VL)) {
11333- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11334- << ".\n");
11335- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11336- }
11337- SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11338- if (all_of(VL, [&](Value *V) {
11339- return isa<PoisonValue>(V) || Values.contains(V) ||
11340- (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11341- LI->getLoopFor(S.getMainOp()->getParent()) &&
11342- isVectorized(V));
11343- })) {
11344- LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11345- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11346- }
11347- }
11388+ // Don't handle scalable vectors
11389+ if (S.getOpcode() == Instruction::ExtractElement &&
11390+ isa<ScalableVectorType>(
11391+ cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11392+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11393+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1134811394 }
1134911395
1135011396 // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
1135111397 // a load), in which case peek through to include it in the tree, without
1135211398 // ballooning over-budget.
1135311399 if (Depth >= RecursionMaxDepth &&
11354- !(S && !S .isAltShuffle() && VL.size() >= 4 &&
11355- (match(S.getMainOp(), m_Load(m_Value())) ||
11400+ (S .isAltShuffle() || VL.size() < 4 ||
11401+ ! (match(S.getMainOp(), m_Load(m_Value())) ||
1135611402 all_of(VL, [&S](const Value *I) {
1135711403 return match(I,
1135811404 m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
@@ -11362,20 +11408,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1136211408 return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1136311409 }
1136411410
11365- // Don't handle scalable vectors
11366- if (S && S.getOpcode() == Instruction::ExtractElement &&
11367- isa<ScalableVectorType>(
11368- cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11369- LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11370- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11371- }
11372-
11373- // Don't handle vectors.
11374- if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11375- LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11376- // Do not try to pack to avoid extra instructions here.
11377- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11378- /*TryToFindDuplicates=*/false);
11411+ // Check if this is a duplicate of another entry.
11412+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11413+ for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11414+ if (E->isSame(VL)) {
11415+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11416+ << ".\n");
11417+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11418+ }
11419+ SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11420+ if (all_of(VL, [&](Value *V) {
11421+ return isa<PoisonValue>(V) || Values.contains(V) ||
11422+ (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11423+ LI->getLoopFor(S.getMainOp()->getParent()) &&
11424+ isVectorized(V));
11425+ })) {
11426+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11427+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11428+ }
1137911429 }
1138011430
1138111431 // If all of the operands are identical or constant we have a simple solution.
@@ -11434,44 +11484,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1143411484 }
1143511485 return true;
1143611486 };
11437- SmallVector<unsigned> SortedIndices;
11438- BasicBlock *BB = nullptr;
11439- bool IsScatterVectorizeUserTE =
11440- UserTreeIdx.UserTE &&
11441- UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11442- bool AreAllSameBlock = S.valid();
11443- bool AreScatterAllGEPSameBlock =
11444- (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11445- VL.size() > 2 &&
11446- all_of(VL,
11447- [&BB](Value *V) {
11448- auto *I = dyn_cast<GetElementPtrInst>(V);
11449- if (!I)
11450- return doesNotNeedToBeScheduled(V);
11451- if (!BB)
11452- BB = I->getParent();
11453- return BB == I->getParent() && I->getNumOperands() == 2;
11454- }) &&
11455- BB &&
11456- sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
11457- SortedIndices));
11487+ bool AreAllSameBlock = !AreScatterAllGEPSameBlock;
1145811488 bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
11459- if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
11460- (S &&
11461- isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
11489+ if (!AreAllSameInsts || isSplat(VL) ||
11490+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
1146211491 S.getMainOp()) &&
1146311492 !all_of(VL, isVectorLikeInstWithConstOps)) ||
1146411493 NotProfitableForVectorization(VL)) {
11465- if (!S) {
11466- LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11467- "C,S,B,O, small shuffle. \n";
11468- dbgs() << "[";
11469- interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11470- dbgs() << "]\n");
11471- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11472- /*TryToFindDuplicates=*/true,
11473- /*TrySplitVectorize=*/true);
11474- }
1147511494 LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
1147611495 dbgs() << "[";
1147711496 interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
@@ -11480,7 +11499,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1148011499 }
1148111500
1148211501 // Don't vectorize ephemeral values.
11483- if (S && !EphValues.empty()) {
11502+ if (!EphValues.empty()) {
1148411503 for (Value *V : VL) {
1148511504 if (EphValues.count(V)) {
1148611505 LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -11498,7 +11517,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1149811517 // Check that none of the instructions in the bundle are already in the tree
1149911518 // and the node may be not profitable for the vectorization as the small
1150011519 // alternate node.
11501- if (S && S .isAltShuffle()) {
11520+ if (S.isAltShuffle()) {
1150211521 auto GetNumVectorizedExtracted = [&]() {
1150311522 APInt Extracted = APInt::getZero(VL.size());
1150411523 APInt Vectorized = APInt::getAllOnes(VL.size());
@@ -11550,33 +11569,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1155011569 }
1155111570 }
1155211571
11553- // Special processing for sorted pointers for ScatterVectorize node with
11554- // constant indeces only.
11555- if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
11556- assert(VL.front()->getType()->isPointerTy() &&
11557- count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
11558- "Expected pointers only.");
11559- // Reset S to make it GetElementPtr kind of node.
11560- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11561- assert(It != VL.end() && "Expected at least one GEP.");
11562- S = getSameOpcode(*It, *TLI);
11563- }
11564-
11565- // Check that all of the users of the scalars that we want to vectorize are
11566- // schedulable.
11567- Instruction *VL0 = S.getMainOp();
11568- BB = VL0->getParent();
11569-
11570- if (S &&
11571- (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11572- !DT->isReachableFromEntry(BB))) {
11573- // Don't go into unreachable blocks. They may contain instructions with
11574- // dependency cycles which confuse the final scheduling.
11575- // Do not vectorize EH and non-returning blocks, not profitable in most
11576- // cases.
11577- LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11578- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11579- }
1158011572 return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
1158111573}
1158211574
0 commit comments