Skip to content

Commit 48e75e4

Browse files
alexey-bataevkcloudy0717
authored andcommitted
[SLP][NFC]Simplify analysis of the scalars, NFC.
Just an attempt to simplify some checks, remove extra calls and reorder checks to make code simpler and faster Reviewers: RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm#170382
1 parent ee7c26e commit 48e75e4

File tree

1 file changed

+93
-100
lines changed

1 file changed

+93
-100
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 93 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -11316,44 +11316,91 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1131611316
VL, *this, TryCopyableElementsVectorization,
1131711317
/*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
1131811318

11319+
bool AreScatterAllGEPSameBlock = false;
11320+
if (!S) {
11321+
SmallVector<unsigned> SortedIndices;
11322+
BasicBlock *BB = nullptr;
11323+
bool IsScatterVectorizeUserTE =
11324+
UserTreeIdx.UserTE &&
11325+
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11326+
AreScatterAllGEPSameBlock =
11327+
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11328+
VL.size() > 2 &&
11329+
all_of(VL,
11330+
[&BB](Value *V) {
11331+
auto *I = dyn_cast<GetElementPtrInst>(V);
11332+
if (!I)
11333+
return doesNotNeedToBeScheduled(V);
11334+
if (!BB)
11335+
BB = I->getParent();
11336+
return BB == I->getParent() && I->getNumOperands() == 2;
11337+
}) &&
11338+
BB &&
11339+
sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL,
11340+
*SE, SortedIndices));
11341+
if (!AreScatterAllGEPSameBlock) {
11342+
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11343+
"C,S,B,O, small shuffle. \n";
11344+
dbgs() << "[";
11345+
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11346+
dbgs() << "]\n");
11347+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11348+
/*TryToFindDuplicates=*/true,
11349+
/*TrySplitVectorize=*/true);
11350+
}
11351+
// Reset S to make it GetElementPtr kind of node.
11352+
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11353+
assert(It != VL.end() && "Expected at least one GEP.");
11354+
S = getSameOpcode(*It, *TLI);
11355+
}
11356+
assert(S && "Must be valid.");
11357+
11358+
// Don't handle vectors.
11359+
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11360+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11361+
// Do not try to pack to avoid extra instructions here.
11362+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11363+
/*TryToFindDuplicates=*/false);
11364+
}
11365+
11366+
// Check that all of the users of the scalars that we want to vectorize are
11367+
// schedulable.
11368+
BasicBlock *BB = S.getMainOp()->getParent();
11369+
11370+
if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11371+
!DT->isReachableFromEntry(BB)) {
11372+
// Don't go into unreachable blocks. They may contain instructions with
11373+
// dependency cycles which confuse the final scheduling.
11374+
// Do not vectorize EH and non-returning blocks, not profitable in most
11375+
// cases.
11376+
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11377+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11378+
}
11379+
1131911380
// Don't go into catchswitch blocks, which can happen with PHIs.
1132011381
// Such blocks can only have PHIs and the catchswitch. There is no
1132111382
// place to insert a shuffle if we need to, so just avoid that issue.
11322-
if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
11383+
if (isa<CatchSwitchInst>(BB->getTerminator())) {
1132311384
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
1132411385
// Do not try to pack to avoid extra instructions here.
1132511386
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
1132611387
/*TryToFindDuplicates=*/false);
1132711388
}
1132811389

11329-
// Check if this is a duplicate of another entry.
11330-
if (S) {
11331-
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11332-
for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11333-
if (E->isSame(VL)) {
11334-
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11335-
<< ".\n");
11336-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11337-
}
11338-
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11339-
if (all_of(VL, [&](Value *V) {
11340-
return isa<PoisonValue>(V) || Values.contains(V) ||
11341-
(S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11342-
LI->getLoopFor(S.getMainOp()->getParent()) &&
11343-
isVectorized(V));
11344-
})) {
11345-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11346-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11347-
}
11348-
}
11390+
// Don't handle scalable vectors
11391+
if (S.getOpcode() == Instruction::ExtractElement &&
11392+
isa<ScalableVectorType>(
11393+
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11394+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11395+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1134911396
}
1135011397

1135111398
// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
1135211399
// a load), in which case peek through to include it in the tree, without
1135311400
// ballooning over-budget.
1135411401
if (Depth >= RecursionMaxDepth &&
11355-
!(S && !S.isAltShuffle() && VL.size() >= 4 &&
11356-
(match(S.getMainOp(), m_Load(m_Value())) ||
11402+
(S.isAltShuffle() || VL.size() < 4 ||
11403+
!(match(S.getMainOp(), m_Load(m_Value())) ||
1135711404
all_of(VL, [&S](const Value *I) {
1135811405
return match(I,
1135911406
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
@@ -11363,20 +11410,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1136311410
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
1136411411
}
1136511412

11366-
// Don't handle scalable vectors
11367-
if (S && S.getOpcode() == Instruction::ExtractElement &&
11368-
isa<ScalableVectorType>(
11369-
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
11370-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
11371-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11372-
}
11373-
11374-
// Don't handle vectors.
11375-
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
11376-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
11377-
// Do not try to pack to avoid extra instructions here.
11378-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11379-
/*TryToFindDuplicates=*/false);
11413+
// Check if this is a duplicate of another entry.
11414+
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
11415+
for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
11416+
if (E->isSame(VL)) {
11417+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
11418+
<< ".\n");
11419+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11420+
}
11421+
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
11422+
if (all_of(VL, [&](Value *V) {
11423+
return isa<PoisonValue>(V) || Values.contains(V) ||
11424+
(S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
11425+
LI->getLoopFor(S.getMainOp()->getParent()) &&
11426+
isVectorized(V));
11427+
})) {
11428+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
11429+
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11430+
}
1138011431
}
1138111432

1138211433
// If all of the operands are identical or constant we have a simple solution.
@@ -11435,44 +11486,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1143511486
}
1143611487
return true;
1143711488
};
11438-
SmallVector<unsigned> SortedIndices;
11439-
BasicBlock *BB = nullptr;
11440-
bool IsScatterVectorizeUserTE =
11441-
UserTreeIdx.UserTE &&
11442-
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
11443-
bool AreAllSameBlock = S.valid();
11444-
bool AreScatterAllGEPSameBlock =
11445-
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
11446-
VL.size() > 2 &&
11447-
all_of(VL,
11448-
[&BB](Value *V) {
11449-
auto *I = dyn_cast<GetElementPtrInst>(V);
11450-
if (!I)
11451-
return doesNotNeedToBeScheduled(V);
11452-
if (!BB)
11453-
BB = I->getParent();
11454-
return BB == I->getParent() && I->getNumOperands() == 2;
11455-
}) &&
11456-
BB &&
11457-
sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
11458-
SortedIndices));
11489+
bool AreAllSameBlock = !AreScatterAllGEPSameBlock;
1145911490
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
11460-
if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
11461-
(S &&
11462-
isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
11491+
if (!AreAllSameInsts || isSplat(VL) ||
11492+
(isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
1146311493
S.getMainOp()) &&
1146411494
!all_of(VL, isVectorLikeInstWithConstOps)) ||
1146511495
NotProfitableForVectorization(VL)) {
11466-
if (!S) {
11467-
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
11468-
"C,S,B,O, small shuffle. \n";
11469-
dbgs() << "[";
11470-
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
11471-
dbgs() << "]\n");
11472-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
11473-
/*TryToFindDuplicates=*/true,
11474-
/*TrySplitVectorize=*/true);
11475-
}
1147611496
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
1147711497
dbgs() << "[";
1147811498
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
@@ -11481,7 +11501,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1148111501
}
1148211502

1148311503
// Don't vectorize ephemeral values.
11484-
if (S && !EphValues.empty()) {
11504+
if (!EphValues.empty()) {
1148511505
for (Value *V : VL) {
1148611506
if (EphValues.count(V)) {
1148711507
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -11499,7 +11519,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1149911519
// Check that none of the instructions in the bundle are already in the tree
1150011520
// and the node may be not profitable for the vectorization as the small
1150111521
// alternate node.
11502-
if (S && S.isAltShuffle()) {
11522+
if (S.isAltShuffle()) {
1150311523
auto GetNumVectorizedExtracted = [&]() {
1150411524
APInt Extracted = APInt::getZero(VL.size());
1150511525
APInt Vectorized = APInt::getAllOnes(VL.size());
@@ -11551,33 +11571,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
1155111571
}
1155211572
}
1155311573

11554-
// Special processing for sorted pointers for ScatterVectorize node with
11555-
// constant indeces only.
11556-
if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
11557-
assert(VL.front()->getType()->isPointerTy() &&
11558-
count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
11559-
"Expected pointers only.");
11560-
// Reset S to make it GetElementPtr kind of node.
11561-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
11562-
assert(It != VL.end() && "Expected at least one GEP.");
11563-
S = getSameOpcode(*It, *TLI);
11564-
}
11565-
11566-
// Check that all of the users of the scalars that we want to vectorize are
11567-
// schedulable.
11568-
Instruction *VL0 = S.getMainOp();
11569-
BB = VL0->getParent();
11570-
11571-
if (S &&
11572-
(BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
11573-
!DT->isReachableFromEntry(BB))) {
11574-
// Don't go into unreachable blocks. They may contain instructions with
11575-
// dependency cycles which confuse the final scheduling.
11576-
// Do not vectorize EH and non-returning blocks, not profitable in most
11577-
// cases.
11578-
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
11579-
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
11580-
}
1158111574
return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
1158211575
}
1158311576

0 commit comments

Comments
 (0)