@@ -2714,8 +2714,9 @@ class BoUpSLP {
2714
2714
/// Values used only by @llvm.assume calls.
2715
2715
SmallPtrSet<const Value *, 32> EphValues;
2716
2716
2717
- /// Holds all of the instructions that we gathered.
2718
- SetVector<Instruction *> GatherShuffleSeq;
2717
+ /// Holds all of the instructions that we gathered, shuffle instructions and
2718
+ /// extractelements.
2719
+ SetVector<Instruction *> GatherShuffleExtractSeq;
2719
2720
2720
2721
/// A list of blocks that we are going to CSE.
2721
2722
SetVector<BasicBlock *> CSEBlocks;
@@ -7786,7 +7787,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
7786
7787
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
7787
7788
if (!InsElt)
7788
7789
return Vec;
7789
- GatherShuffleSeq .insert(InsElt);
7790
+ GatherShuffleExtractSeq .insert(InsElt);
7790
7791
CSEBlocks.insert(InsElt->getParent());
7791
7792
// Add to our 'need-to-extract' list.
7792
7793
if (TreeEntry *Entry = getTreeEntry(V)) {
@@ -7940,7 +7941,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
7940
7941
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
7941
7942
}
7942
7943
if (auto *I = dyn_cast<Instruction>(V)) {
7943
- GatherShuffleSeq .insert(I);
7944
+ GatherShuffleExtractSeq .insert(I);
7944
7945
CSEBlocks.insert(I->getParent());
7945
7946
}
7946
7947
}
@@ -8005,7 +8006,7 @@ Value *BoUpSLP::createBuildVector(ArrayRef<Value *> VL) {
8005
8006
VL = UniqueValues;
8006
8007
}
8007
8008
8008
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq ,
8009
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq ,
8009
8010
CSEBlocks);
8010
8011
Value *Vec = gather(VL);
8011
8012
if (!ReuseShuffleIndicies.empty()) {
@@ -8025,7 +8026,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8025
8026
8026
8027
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
8027
8028
unsigned VF = E->getVectorFactor();
8028
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq ,
8029
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq ,
8029
8030
CSEBlocks);
8030
8031
if (E->State == TreeEntry::NeedToGather) {
8031
8032
if (E->getMainOp())
@@ -8041,7 +8042,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8041
8042
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
8042
8043
Entries.back()->VectorizedValue, Mask);
8043
8044
if (auto *I = dyn_cast<Instruction>(Vec)) {
8044
- GatherShuffleSeq .insert(I);
8045
+ GatherShuffleExtractSeq .insert(I);
8045
8046
CSEBlocks.insert(I->getParent());
8046
8047
}
8047
8048
} else {
@@ -8173,7 +8174,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8173
8174
if (!IsIdentity || NumElts != NumScalars) {
8174
8175
V = Builder.CreateShuffleVector(V, Mask);
8175
8176
if (auto *I = dyn_cast<Instruction>(V)) {
8176
- GatherShuffleSeq .insert(I);
8177
+ GatherShuffleExtractSeq .insert(I);
8177
8178
CSEBlocks.insert(I->getParent());
8178
8179
}
8179
8180
}
@@ -8191,7 +8192,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8191
8192
V = Builder.CreateShuffleVector(
8192
8193
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
8193
8194
if (auto *I = dyn_cast<Instruction>(V)) {
8194
- GatherShuffleSeq .insert(I);
8195
+ GatherShuffleExtractSeq .insert(I);
8195
8196
CSEBlocks.insert(I->getParent());
8196
8197
}
8197
8198
// Create freeze for undef values.
@@ -8209,7 +8210,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8209
8210
FirstInsert->getOperand(0), V, InsertMask,
8210
8211
cast<Instruction>(E->Scalars.back())->getName());
8211
8212
if (auto *I = dyn_cast<Instruction>(V)) {
8212
- GatherShuffleSeq .insert(I);
8213
+ GatherShuffleExtractSeq .insert(I);
8213
8214
CSEBlocks.insert(I->getParent());
8214
8215
}
8215
8216
}
@@ -8587,7 +8588,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8587
8588
// instruction, if any.
8588
8589
for (Value *V : {V0, V1}) {
8589
8590
if (auto *I = dyn_cast<Instruction>(V)) {
8590
- GatherShuffleSeq .insert(I);
8591
+ GatherShuffleExtractSeq .insert(I);
8591
8592
CSEBlocks.insert(I->getParent());
8592
8593
}
8593
8594
}
@@ -8611,7 +8612,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
8611
8612
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
8612
8613
if (auto *I = dyn_cast<Instruction>(V)) {
8613
8614
V = propagateMetadata(I, E->Scalars);
8614
- GatherShuffleSeq .insert(I);
8615
+ GatherShuffleExtractSeq .insert(I);
8615
8616
CSEBlocks.insert(I->getParent());
8616
8617
}
8617
8618
V = ShuffleBuilder.finalize(V);
@@ -8711,6 +8712,12 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
8711
8712
} else {
8712
8713
Ex = Builder.CreateExtractElement(Vec, Lane);
8713
8714
}
8715
+ // The then branch of the previous if may produce constants, since 0
8716
+ // operand might be a constant.
8717
+ if (auto *ExI = dyn_cast<Instruction>(Ex)) {
8718
+ GatherShuffleExtractSeq.insert(ExI);
8719
+ CSEBlocks.insert(ExI->getParent());
8720
+ }
8714
8721
// If necessary, sign-extend or zero-extend ScalarRoot
8715
8722
// to the larger type.
8716
8723
if (!MinBWs.count(ScalarRoot))
@@ -8740,7 +8747,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
8740
8747
Builder.SetInsertPoint(&F->getEntryBlock().front());
8741
8748
}
8742
8749
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8743
- CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
8744
8750
auto &NewInstLocs = ExternallyUsedValues[NewInst];
8745
8751
auto It = ExternallyUsedValues.find(Scalar);
8746
8752
assert(It != ExternallyUsedValues.end() &&
@@ -8832,20 +8838,17 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
8832
8838
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
8833
8839
}
8834
8840
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8835
- CSEBlocks.insert(PH->getIncomingBlock(i));
8836
8841
PH->setOperand(i, NewInst);
8837
8842
}
8838
8843
}
8839
8844
} else {
8840
8845
Builder.SetInsertPoint(cast<Instruction>(User));
8841
8846
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8842
- CSEBlocks.insert(cast<Instruction>(User)->getParent());
8843
8847
User->replaceUsesOfWith(Scalar, NewInst);
8844
8848
}
8845
8849
} else {
8846
8850
Builder.SetInsertPoint(&F->getEntryBlock().front());
8847
8851
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
8848
- CSEBlocks.insert(&F->getEntryBlock());
8849
8852
User->replaceUsesOfWith(Scalar, NewInst);
8850
8853
}
8851
8854
@@ -8959,7 +8962,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
8959
8962
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
8960
8963
CombinedMask1);
8961
8964
if (auto *I = dyn_cast<Instruction>(Vec)) {
8962
- GatherShuffleSeq .insert(I);
8965
+ GatherShuffleExtractSeq .insert(I);
8963
8966
CSEBlocks.insert(I->getParent());
8964
8967
}
8965
8968
return Vec;
@@ -8974,7 +8977,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
8974
8977
!IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
8975
8978
Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
8976
8979
if (auto *I = dyn_cast<Instruction>(Vec)) {
8977
- GatherShuffleSeq .insert(I);
8980
+ GatherShuffleExtractSeq .insert(I);
8978
8981
CSEBlocks.insert(I->getParent());
8979
8982
}
8980
8983
return Vec;
@@ -9114,10 +9117,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
9114
9117
}
9115
9118
9116
9119
void BoUpSLP::optimizeGatherSequence() {
9117
- LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq .size()
9120
+ LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq .size()
9118
9121
<< " gather sequences instructions.\n");
9119
9122
// LICM InsertElementInst sequences.
9120
- for (Instruction *I : GatherShuffleSeq ) {
9123
+ for (Instruction *I : GatherShuffleExtractSeq ) {
9121
9124
if (isDeleted(I))
9122
9125
continue;
9123
9126
@@ -9219,7 +9222,7 @@ void BoUpSLP::optimizeGatherSequence() {
9219
9222
if (isDeleted(&In))
9220
9223
continue;
9221
9224
if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) &&
9222
- !GatherShuffleSeq .contains(&In))
9225
+ !GatherShuffleExtractSeq .contains(&In))
9223
9226
continue;
9224
9227
9225
9228
// Check if we can replace this instruction with any of the
@@ -9238,7 +9241,7 @@ void BoUpSLP::optimizeGatherSequence() {
9238
9241
break;
9239
9242
}
9240
9243
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
9241
- GatherShuffleSeq .contains(V) &&
9244
+ GatherShuffleExtractSeq .contains(V) &&
9242
9245
IsIdenticalOrLessDefined(V, &In, NewMask) &&
9243
9246
DT->dominates(In.getParent(), V->getParent())) {
9244
9247
In.moveAfter(V);
@@ -9259,7 +9262,7 @@ void BoUpSLP::optimizeGatherSequence() {
9259
9262
}
9260
9263
}
9261
9264
CSEBlocks.clear();
9262
- GatherShuffleSeq .clear();
9265
+ GatherShuffleExtractSeq .clear();
9263
9266
}
9264
9267
9265
9268
BoUpSLP::ScheduleData *
0 commit comments