@@ -1134,6 +1134,7 @@ class BoUpSLP {
11341134 MustGather.clear();
11351135 EntryToLastInstruction.clear();
11361136 ExternalUses.clear();
1137+ ExternalUsesAsGEPs.clear();
11371138 for (auto &Iter : BlocksSchedules) {
11381139 BlockScheduling *BS = Iter.second.get();
11391140 BS->clear();
@@ -3154,6 +3155,10 @@ class BoUpSLP {
31543155 /// after vectorization.
31553156 UserList ExternalUses;
31563157
3158+ /// A list of GEPs which can be reaplced by scalar GEPs instead of
3159+ /// extractelement instructions.
3160+ SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;
3161+
31573162 /// Values used only by @llvm.assume calls.
31583163 SmallPtrSet<const Value *, 32> EphValues;
31593164
@@ -5541,6 +5546,7 @@ void BoUpSLP::buildExternalUses(
55415546 << FoundLane << " from " << *Scalar << ".\n");
55425547 ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
55435548 ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
5549+ continue;
55445550 }
55455551 for (User *U : Scalar->users()) {
55465552 LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -9925,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
99259931 SmallVector<APInt> DemandedElts;
99269932 SmallDenseSet<Value *, 4> UsedInserts;
99279933 DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
9934+ std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
99289935 for (ExternalUser &EU : ExternalUses) {
99299936 // We only add extract cost once for the same scalar.
99309937 if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -10033,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1003310040 }
1003410041 }
1003510042 }
10043+ // Leave the GEPs as is, they are free in most cases and better to keep them
10044+ // as GEPs.
10045+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
10046+ if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
10047+ if (!ValueToExtUses) {
10048+ ValueToExtUses.emplace();
10049+ for_each(enumerate(ExternalUses), [&](const auto &P) {
10050+ ValueToExtUses->try_emplace(P.value().Scalar, P.index());
10051+ });
10052+ }
10053+ // Can use original GEP, if no operands vectorized or they are marked as
10054+ // externally used already.
10055+ bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
10056+ if (!getTreeEntry(V))
10057+ return true;
10058+ auto It = ValueToExtUses->find(V);
10059+ if (It != ValueToExtUses->end()) {
10060+ // Replace all uses to avoid compiler crash.
10061+ ExternalUses[It->second].User = nullptr;
10062+ return true;
10063+ }
10064+ return false;
10065+ });
10066+ if (CanBeUsedAsGEP) {
10067+ ExtractCost += TTI->getInstructionCost(GEP, CostKind);
10068+ ExternalUsesAsGEPs.insert(EU.Scalar);
10069+ continue;
10070+ }
10071+ }
1003610072
1003710073 // If we plan to rewrite the tree in a smaller type, we will need to sign
1003810074 // extend the extracted value back to the original type. Here, we account
1003910075 // for the extract and the added cost of the sign extend if needed.
1004010076 auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
10041- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1004210077 auto It = MinBWs.find(getTreeEntry(EU.Scalar));
1004310078 if (It != MinBWs.end()) {
1004410079 auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -13161,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree(
1316113196 if (Scalar->getType() != Vec->getType()) {
1316213197 Value *Ex = nullptr;
1316313198 Value *ExV = nullptr;
13199+ auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
13200+ bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
1316413201 auto It = ScalarToEEs.find(Scalar);
1316513202 if (It != ScalarToEEs.end()) {
1316613203 // No need to emit many extracts, just move the only one in the
@@ -13186,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree(
1318613223 if (const TreeEntry *ETE = getTreeEntry(V))
1318713224 V = ETE->VectorizedValue;
1318813225 Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
13226+ } else if (ReplaceGEP) {
13227+ // Leave the GEPs as is, they are free in most cases and better to
13228+ // keep them as GEPs.
13229+ auto *CloneGEP = GEP->clone();
13230+ CloneGEP->insertBefore(*Builder.GetInsertBlock(),
13231+ Builder.GetInsertPoint());
13232+ if (GEP->hasName())
13233+ CloneGEP->takeName(GEP);
13234+ Ex = CloneGEP;
1318913235 } else {
1319013236 Ex = Builder.CreateExtractElement(Vec, Lane);
1319113237 }
@@ -13224,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree(
1322413270 assert((ExternallyUsedValues.count(Scalar) ||
1322513271 any_of(Scalar->users(),
1322613272 [&](llvm::User *U) {
13273+ if (ExternalUsesAsGEPs.contains(U))
13274+ return true;
1322713275 TreeEntry *UseEntry = getTreeEntry(U);
1322813276 return UseEntry &&
1322913277 (UseEntry->State == TreeEntry::Vectorize ||
0 commit comments