Skip to content

Commit d9cd54a

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.7
1 parent e7748e9 commit d9cd54a

File tree

1 file changed

+92
-100
lines changed

1 file changed

+92
-100
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 92 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -11315,44 +11315,90 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1131511315
VL, *this, TryCopyableElementsVectorization,
1131611316
/*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
1131711317

11318+
bool AreScatterAllGEPSameBlock = false;
11319+
if (!S) {
11320+
SmallVector<unsigned> SortedIndices;
11321+
BasicBlock *BB = nullptr;
11322+
bool IsScatterVectorizeUserTE =
11323+
UserTreeIdx.UserTE &&
11324+
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11325+
AreScatterAllGEPSameBlock =
11326+
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11327+
VL.size() > 2 &&
11328+
all_of(VL,
11329+
[&BB](Value *V) {
11330+
auto *I = dyn_cast<GetElementPtrInst>(V);
11331+
if (!I)
11332+
return doesNotNeedToBeScheduled(V);
11333+
if (!BB)
11334+
BB = I->getParent();
11335+
return BB == I->getParent() && I->getNumOperands() == 2;
11336+
}) &&
11337+
BB &&
11338+
sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL,
11339+
*SE, SortedIndices));
11340+
if (!AreScatterAllGEPSameBlock) {
11341+
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11342+
"C,S,B,O, small shuffle. \n";
11343+
dbgs() << "[";
11344+
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11345+
dbgs() << "]\n");
11346+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11347+
/*TryToFindDuplicates=*/true,
11348+
/*TrySplitVectorize=*/true);
11349+
}
11350+
// Reset S to make it GetElementPtr kind of node.
11351+
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11352+
assert(It != VL.end() && "Expected at least one GEP.");
11353+
S = getSameOpcode(*It, *TLI);
11354+
}
11355+
11356+
// Don't handle vectors.
11357+
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11358+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11359+
// Do not try to pack to avoid extra instructions here.
11360+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11361+
/*TryToFindDuplicates=*/false);
11362+
}
11363+
11364+
// Check that all of the users of the scalars that we want to vectorize are
11365+
// schedulable.
11366+
BasicBlock *BB = S.getMainOp()->getParent();
11367+
11368+
if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11369+
!DT->isReachableFromEntry(BB)) {
11370+
// Don't go into unreachable blocks. They may contain instructions with
11371+
// dependency cycles which confuse the final scheduling.
11372+
// Do not vectorize EH and non-returning blocks, not profitable in most
11373+
// cases.
11374+
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11375+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11376+
}
11377+
1131811378
// Don't go into catchswitch blocks, which can happen with PHIs.
1131911379
// Such blocks can only have PHIs and the catchswitch. There is no
1132011380
// place to insert a shuffle if we need to, so just avoid that issue.
11321-
if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
11381+
if (isa<CatchSwitchInst>(BB->getTerminator())) {
1132211382
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
1132311383
// Do not try to pack to avoid extra instructions here.
1132411384
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
1132511385
/*TryToFindDuplicates=*/false);
1132611386
}
1132711387

11328-
// Check if this is a duplicate of another entry.
11329-
if (S) {
11330-
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11331-
for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11332-
if (E->isSame(VL)) {
11333-
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11334-
<< ".\n");
11335-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11336-
}
11337-
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11338-
if (all_of(VL, [&](Value *V) {
11339-
return isa<PoisonValue>(V) || Values.contains(V) ||
11340-
(S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11341-
LI->getLoopFor(S.getMainOp()->getParent()) &&
11342-
isVectorized(V));
11343-
})) {
11344-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11345-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11346-
}
11347-
}
11388+
// Don't handle scalable vectors
11389+
if (S.getOpcode() == Instruction::ExtractElement &&
11390+
isa<ScalableVectorType>(
11391+
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11392+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11393+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1134811394
}
1134911395

1135011396
// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
1135111397
// a load), in which case peek through to include it in the tree, without
1135211398
// ballooning over-budget.
1135311399
if (Depth >= RecursionMaxDepth &&
11354-
!(S && !S.isAltShuffle() && VL.size() >= 4 &&
11355-
(match(S.getMainOp(), m_Load(m_Value())) ||
11400+
(S.isAltShuffle() || VL.size() < 4 ||
11401+
!(match(S.getMainOp(), m_Load(m_Value())) ||
1135611402
all_of(VL, [&S](const Value *I) {
1135711403
return match(I,
1135811404
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
@@ -11362,20 +11408,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1136211408
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1136311409
}
1136411410

11365-
// Don't handle scalable vectors
11366-
if (S && S.getOpcode() == Instruction::ExtractElement &&
11367-
isa<ScalableVectorType>(
11368-
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11369-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11370-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11371-
}
11372-
11373-
// Don't handle vectors.
11374-
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11375-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11376-
// Do not try to pack to avoid extra instructions here.
11377-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11378-
/*TryToFindDuplicates=*/false);
11411+
// Check if this is a duplicate of another entry.
11412+
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11413+
for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11414+
if (E->isSame(VL)) {
11415+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11416+
<< ".\n");
11417+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11418+
}
11419+
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11420+
if (all_of(VL, [&](Value *V) {
11421+
return isa<PoisonValue>(V) || Values.contains(V) ||
11422+
(S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11423+
LI->getLoopFor(S.getMainOp()->getParent()) &&
11424+
isVectorized(V));
11425+
})) {
11426+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11427+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11428+
}
1137911429
}
1138011430

1138111431
// If all of the operands are identical or constant we have a simple solution.
@@ -11434,44 +11484,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1143411484
}
1143511485
return true;
1143611486
};
11437-
SmallVector<unsigned> SortedIndices;
11438-
BasicBlock *BB = nullptr;
11439-
bool IsScatterVectorizeUserTE =
11440-
UserTreeIdx.UserTE &&
11441-
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11442-
bool AreAllSameBlock = S.valid();
11443-
bool AreScatterAllGEPSameBlock =
11444-
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11445-
VL.size() > 2 &&
11446-
all_of(VL,
11447-
[&BB](Value *V) {
11448-
auto *I = dyn_cast<GetElementPtrInst>(V);
11449-
if (!I)
11450-
return doesNotNeedToBeScheduled(V);
11451-
if (!BB)
11452-
BB = I->getParent();
11453-
return BB == I->getParent() && I->getNumOperands() == 2;
11454-
}) &&
11455-
BB &&
11456-
sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
11457-
SortedIndices));
11487+
bool AreAllSameBlock = !AreScatterAllGEPSameBlock;
1145811488
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
11459-
if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
11460-
(S &&
11461-
isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
11489+
if (!AreAllSameInsts || isSplat(VL) ||
11490+
(isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
1146211491
S.getMainOp()) &&
1146311492
!all_of(VL, isVectorLikeInstWithConstOps)) ||
1146411493
NotProfitableForVectorization(VL)) {
11465-
if (!S) {
11466-
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11467-
"C,S,B,O, small shuffle. \n";
11468-
dbgs() << "[";
11469-
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11470-
dbgs() << "]\n");
11471-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11472-
/*TryToFindDuplicates=*/true,
11473-
/*TrySplitVectorize=*/true);
11474-
}
1147511494
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
1147611495
dbgs() << "[";
1147711496
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
@@ -11480,7 +11499,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1148011499
}
1148111500

1148211501
// Don't vectorize ephemeral values.
11483-
if (S && !EphValues.empty()) {
11502+
if (!EphValues.empty()) {
1148411503
for (Value *V : VL) {
1148511504
if (EphValues.count(V)) {
1148611505
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -11498,7 +11517,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1149811517
// Check that none of the instructions in the bundle are already in the tree
1149911518
// and the node may be not profitable for the vectorization as the small
1150011519
// alternate node.
11501-
if (S && S.isAltShuffle()) {
11520+
if (S.isAltShuffle()) {
1150211521
auto GetNumVectorizedExtracted = [&]() {
1150311522
APInt Extracted = APInt::getZero(VL.size());
1150411523
APInt Vectorized = APInt::getAllOnes(VL.size());
@@ -11550,33 +11569,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1155011569
}
1155111570
}
1155211571

11553-
// Special processing for sorted pointers for ScatterVectorize node with
11554-
// constant indeces only.
11555-
if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
11556-
assert(VL.front()->getType()->isPointerTy() &&
11557-
count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
11558-
"Expected pointers only.");
11559-
// Reset S to make it GetElementPtr kind of node.
11560-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11561-
assert(It != VL.end() && "Expected at least one GEP.");
11562-
S = getSameOpcode(*It, *TLI);
11563-
}
11564-
11565-
// Check that all of the users of the scalars that we want to vectorize are
11566-
// schedulable.
11567-
Instruction *VL0 = S.getMainOp();
11568-
BB = VL0->getParent();
11569-
11570-
if (S &&
11571-
(BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11572-
!DT->isReachableFromEntry(BB))) {
11573-
// Don't go into unreachable blocks. They may contain instructions with
11574-
// dependency cycles which confuse the final scheduling.
11575-
// Do not vectorize EH and non-returning blocks, not profitable in most
11576-
// cases.
11577-
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11578-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11579-
}
1158011572
return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
1158111573
}
1158211574

0 commit comments

Comments
 (0)