Skip to content

Commit 323ed23

Browse files
committed
[SLP]Improve/fix CSE analysis of the blocks/instructions.
Added analysis for invariant extractelement instructions and improved detection of the CSE blocks for generated extractelement instructions. Differential Revision: https://reviews.llvm.org/D135279
1 parent 7850df3 commit 323ed23

File tree

2 files changed

+27
-24
lines changed

2 files changed

+27
-24
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2714,8 +2714,9 @@ class BoUpSLP {
27142714
/// Values used only by @llvm.assume calls.
27152715
SmallPtrSet<const Value *, 32> EphValues;
27162716

2717-
/// Holds all of the instructions that we gathered.
2718-
SetVector<Instruction *> GatherShuffleSeq;
2717+
/// Holds all of the instructions that we gathered, shuffle instructions and
2718+
/// extractelements.
2719+
SetVector<Instruction *> GatherShuffleExtractSeq;
27192720

27202721
/// A list of blocks that we are going to CSE.
27212722
SetVector<BasicBlock *> CSEBlocks;
@@ -7786,7 +7787,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
77867787
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
77877788
if (!InsElt)
77887789
return Vec;
7789-
GatherShuffleSeq.insert(InsElt);
7790+
GatherShuffleExtractSeq.insert(InsElt);
77907791
CSEBlocks.insert(InsElt->getParent());
77917792
// Add to our 'need-to-extract' list.
77927793
if (TreeEntry *Entry = getTreeEntry(V)) {
@@ -7940,7 +7941,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
79407941
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
79417942
}
79427943
if (auto *I = dyn_cast<Instruction>(V)) {
7943-
GatherShuffleSeq.insert(I);
7944+
GatherShuffleExtractSeq.insert(I);
79447945
CSEBlocks.insert(I->getParent());
79457946
}
79467947
}
@@ -8005,7 +8006,7 @@ Value *BoUpSLP::createBuildVector(ArrayRef<Value *> VL) {
80058006
VL = UniqueValues;
80068007
}
80078008

8008-
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
8009+
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
80098010
CSEBlocks);
80108011
Value *Vec = gather(VL);
80118012
if (!ReuseShuffleIndicies.empty()) {
@@ -8025,7 +8026,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
80258026

80268027
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
80278028
unsigned VF = E->getVectorFactor();
8028-
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
8029+
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
80298030
CSEBlocks);
80308031
if (E->State == TreeEntry::NeedToGather) {
80318032
if (E->getMainOp())
@@ -8041,7 +8042,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
80418042
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
80428043
Entries.back()->VectorizedValue, Mask);
80438044
if (auto *I = dyn_cast<Instruction>(Vec)) {
8044-
GatherShuffleSeq.insert(I);
8045+
GatherShuffleExtractSeq.insert(I);
80458046
CSEBlocks.insert(I->getParent());
80468047
}
80478048
} else {
@@ -8173,7 +8174,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
81738174
if (!IsIdentity || NumElts != NumScalars) {
81748175
V = Builder.CreateShuffleVector(V, Mask);
81758176
if (auto *I = dyn_cast<Instruction>(V)) {
8176-
GatherShuffleSeq.insert(I);
8177+
GatherShuffleExtractSeq.insert(I);
81778178
CSEBlocks.insert(I->getParent());
81788179
}
81798180
}
@@ -8191,7 +8192,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
81918192
V = Builder.CreateShuffleVector(
81928193
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
81938194
if (auto *I = dyn_cast<Instruction>(V)) {
8194-
GatherShuffleSeq.insert(I);
8195+
GatherShuffleExtractSeq.insert(I);
81958196
CSEBlocks.insert(I->getParent());
81968197
}
81978198
// Create freeze for undef values.
@@ -8209,7 +8210,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
82098210
FirstInsert->getOperand(0), V, InsertMask,
82108211
cast<Instruction>(E->Scalars.back())->getName());
82118212
if (auto *I = dyn_cast<Instruction>(V)) {
8212-
GatherShuffleSeq.insert(I);
8213+
GatherShuffleExtractSeq.insert(I);
82138214
CSEBlocks.insert(I->getParent());
82148215
}
82158216
}
@@ -8587,7 +8588,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
85878588
// instruction, if any.
85888589
for (Value *V : {V0, V1}) {
85898590
if (auto *I = dyn_cast<Instruction>(V)) {
8590-
GatherShuffleSeq.insert(I);
8591+
GatherShuffleExtractSeq.insert(I);
85918592
CSEBlocks.insert(I->getParent());
85928593
}
85938594
}
@@ -8611,7 +8612,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
86118612
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
86128613
if (auto *I = dyn_cast<Instruction>(V)) {
86138614
V = propagateMetadata(I, E->Scalars);
8614-
GatherShuffleSeq.insert(I);
8615+
GatherShuffleExtractSeq.insert(I);
86158616
CSEBlocks.insert(I->getParent());
86168617
}
86178618
V = ShuffleBuilder.finalize(V);
@@ -8711,6 +8712,12 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
87118712
} else {
87128713
Ex = Builder.CreateExtractElement(Vec, Lane);
87138714
}
8715+
// The then branch of the previous if may produce constants, since 0
8716+
// operand might be a constant.
8717+
if (auto *ExI = dyn_cast<Instruction>(Ex)) {
8718+
GatherShuffleExtractSeq.insert(ExI);
8719+
CSEBlocks.insert(ExI->getParent());
8720+
}
87148721
// If necessary, sign-extend or zero-extend ScalarRoot
87158722
// to the larger type.
87168723
if (!MinBWs.count(ScalarRoot))
@@ -8740,7 +8747,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
87408747
Builder.SetInsertPoint(&F->getEntryBlock().front());
87418748
}
87428749
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8743-
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
87448750
auto &NewInstLocs = ExternallyUsedValues[NewInst];
87458751
auto It = ExternallyUsedValues.find(Scalar);
87468752
assert(It != ExternallyUsedValues.end() &&
@@ -8832,20 +8838,17 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
88328838
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
88338839
}
88348840
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8835-
CSEBlocks.insert(PH->getIncomingBlock(i));
88368841
PH->setOperand(i, NewInst);
88378842
}
88388843
}
88398844
} else {
88408845
Builder.SetInsertPoint(cast<Instruction>(User));
88418846
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8842-
CSEBlocks.insert(cast<Instruction>(User)->getParent());
88438847
User->replaceUsesOfWith(Scalar, NewInst);
88448848
}
88458849
} else {
88468850
Builder.SetInsertPoint(&F->getEntryBlock().front());
88478851
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8848-
CSEBlocks.insert(&F->getEntryBlock());
88498852
User->replaceUsesOfWith(Scalar, NewInst);
88508853
}
88518854

@@ -8959,7 +8962,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
89598962
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
89608963
CombinedMask1);
89618964
if (auto *I = dyn_cast<Instruction>(Vec)) {
8962-
GatherShuffleSeq.insert(I);
8965+
GatherShuffleExtractSeq.insert(I);
89638966
CSEBlocks.insert(I->getParent());
89648967
}
89658968
return Vec;
@@ -8974,7 +8977,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
89748977
!IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
89758978
Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
89768979
if (auto *I = dyn_cast<Instruction>(Vec)) {
8977-
GatherShuffleSeq.insert(I);
8980+
GatherShuffleExtractSeq.insert(I);
89788981
CSEBlocks.insert(I->getParent());
89798982
}
89808983
return Vec;
@@ -9114,10 +9117,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
91149117
}
91159118

91169119
void BoUpSLP::optimizeGatherSequence() {
9117-
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size()
9120+
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq.size()
91189121
<< " gather sequences instructions.\n");
91199122
// LICM InsertElementInst sequences.
9120-
for (Instruction *I : GatherShuffleSeq) {
9123+
for (Instruction *I : GatherShuffleExtractSeq) {
91219124
if (isDeleted(I))
91229125
continue;
91239126

@@ -9219,7 +9222,7 @@ void BoUpSLP::optimizeGatherSequence() {
92199222
if (isDeleted(&In))
92209223
continue;
92219224
if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) &&
9222-
!GatherShuffleSeq.contains(&In))
9225+
!GatherShuffleExtractSeq.contains(&In))
92239226
continue;
92249227

92259228
// Check if we can replace this instruction with any of the
@@ -9238,7 +9241,7 @@ void BoUpSLP::optimizeGatherSequence() {
92389241
break;
92399242
}
92409243
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
9241-
GatherShuffleSeq.contains(V) &&
9244+
GatherShuffleExtractSeq.contains(V) &&
92429245
IsIdenticalOrLessDefined(V, &In, NewMask) &&
92439246
DT->dominates(In.getParent(), V->getParent())) {
92449247
In.moveAfter(V);
@@ -9259,7 +9262,7 @@ void BoUpSLP::optimizeGatherSequence() {
92599262
}
92609263
}
92619264
CSEBlocks.clear();
9262-
GatherShuffleSeq.clear();
9265+
GatherShuffleExtractSeq.clear();
92639266
}
92649267

92659268
BoUpSLP::ScheduleData *

llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ define void @test(i32* %ptr, i32* noalias %s) {
1111
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1212
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[S:%.*]] to <4 x i32>*
1313
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
14+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1415
; CHECK-NEXT: br label [[LOOP1:%.*]]
1516
; CHECK: loop1:
16-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1717
; CHECK-NEXT: store i32 [[TMP3]], i32* [[S]], align 4
1818
; CHECK-NEXT: br i1 true, label [[LOOP1]], label [[CONT:%.*]]
1919
; CHECK: cont:

0 commit comments

Comments
 (0)