@@ -4558,8 +4558,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
45584558 DemandedElts.clearAllBits();
45594559 for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) {
45604560 SmallVector<LoadsState> States;
4561- for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End;
4562- Cnt += VF) {
4561+ for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF) {
45634562 ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
45644563 SmallVector<unsigned> Order;
45654564 SmallVector<Value *> PointerOps;
@@ -6225,7 +6224,7 @@ static bool gatherPossiblyVectorizableLoads(
62256224 Repeated.insert(Cnt);
62266225 }
62276226 if (NumUniques > 0 &&
6228- (Loads.size() == NumUniques || /*GatheredLoads[Idx].size() == 1 ||*/
6227+ (Loads.size() == NumUniques ||
62296228 (Loads.size() - NumUniques >= 2 &&
62306229 Loads.size() - NumUniques >= Loads.size() / 2 &&
62316230 (isPowerOf2_64(Data.size() + NumUniques) ||
@@ -10946,8 +10945,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1094610945 SmallDenseSet<Value *, 4> UsedInserts;
1094710946 DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
1094810947 std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
10949- DenseMap<const TreeEntry *, DenseMap<const TreeEntry *, SmallBitVector>>
10950- VectToGatherIndices;
1095110948 for (ExternalUser &EU : ExternalUses) {
1095210949 // We only add extract cost once for the same scalar.
1095310950 if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -11086,29 +11083,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1108611083 }
1108711084 }
1108811085
11089- // Try to handle the case, where the extracts are consecutive and produce
11090- // subvector extract/insert sequences.
11091- if (auto *SI = dyn_cast<Instruction>(EU.Scalar);
11092- SI && all_of(SI->users(), [this](User *U) {
11093- return ScalarToTreeEntry.contains(U);
11094- })) {
11095- const TreeEntry *TE = getTreeEntry(SI);
11096- assert(TE && "Expected tree entry for scalar.");
11097- auto &Map = VectToGatherIndices.getOrInsertDefault(TE);
11098- auto It = ValueToGatherNodes.find(SI);
11099- if (It != ValueToGatherNodes.end()) {
11100- const SmallPtrSetImpl<const TreeEntry *> &Gathers = It->getSecond();
11101- for (const TreeEntry *BV : Gathers) {
11102- SmallBitVector &Uses =
11103- Map.try_emplace(BV, SmallBitVector(TE->getVectorFactor()))
11104- .first->getSecond();
11105- Uses.set(BV->findLaneForValue(SI));
11106- }
11107- __builtin_trap();
11108- continue;
11109- }
11110- }
11111-
1111211086 // If we plan to rewrite the tree in a smaller type, we will need to sign
1111311087 // extend the extracted value back to the original type. Here, we account
1111411088 // for the extract and the added cost of the sign extend if needed.
0 commit comments