[SLP][NFC]Format canVectorizeLoads after previous NFC patches.

alexey-bataev · alexey-bataev · commit 50515db57f1a · 2024-08-29T04:31:13.000-07:00
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4757,13 +4757,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                  });
         });
     const unsigned AbsoluteDiff = std::abs(*Diff);
-    if (IsPossibleStrided &&
-        (IsAnyPointerUsedOutGraph ||
-         ((Sz > MinProfitableStridedLoads ||
-           (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
-            has_single_bit(AbsoluteDiff))) &&
-          AbsoluteDiff > Sz) ||
-         *Diff == -(static_cast<int>(Sz) - 1))) {
+    if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
+                              ((Sz > MinProfitableStridedLoads ||
+                                (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
+                                 has_single_bit(AbsoluteDiff))) &&
+                               AbsoluteDiff > Sz) ||
+                              *Diff == -(static_cast<int>(Sz) - 1))) {
       int Stride = *Diff / static_cast<int>(Sz - 1);
       if (*Diff == Stride * static_cast<int>(Sz - 1)) {
         Align Alignment =
@@ -4778,8 +4777,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
             if (Ptr == PtrN)
               Dist = *Diff;
             else if (Ptr != Ptr0)
-              Dist =
-                  *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
+              Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
             // If the strides are not the same or repeated, we can't
             // vectorize.
             if (((Dist / Stride) * Stride) != Dist ||
@@ -4822,14 +4820,14 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
       if (VectorizedCnt == VL.size() / VF) {
         // Compare masked gather cost and loads + insersubvector costs.
         TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-        auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
-            TTI, PointerOps, PointerOps.front(), Instruction::GetElementPtr,
-            CostKind, ScalarTy, VecTy);
+        auto [ScalarGEPCost, VectorGEPCost] =
+            getGEPCosts(TTI, PointerOps, PointerOps.front(),
+                        Instruction::GetElementPtr, CostKind, ScalarTy, VecTy);
         InstructionCost MaskedGatherCost =
-            TTI.getGatherScatterOpCost(
-                Instruction::Load, VecTy,
-                cast<LoadInst>(VL0)->getPointerOperand(),
-                /*VariableMask=*/false, CommonAlignment, CostKind) +
+            TTI.getGatherScatterOpCost(Instruction::Load, VecTy,
+                                       cast<LoadInst>(VL0)->getPointerOperand(),
+                                       /*VariableMask=*/false, CommonAlignment,
+                                       CostKind) +
             VectorGEPCost - ScalarGEPCost;
         InstructionCost VecLdCost = 0;
         auto *SubVecTy = getWidenedType(ScalarTy, VF);
@@ -4853,23 +4851,23 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                 getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
                             LI0->getPointerOperand(), Instruction::Load,
                             CostKind, ScalarTy, SubVecTy);
-            VecLdCost +=
-                TTI.getStridedMemoryOpCost(
-                    Instruction::Load, SubVecTy, LI0->getPointerOperand(),
-                    /*VariableMask=*/false, CommonAlignment, CostKind) +
-                VectorGEPCost - ScalarGEPCost;
+            VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
+                                                    LI0->getPointerOperand(),
+                                                    /*VariableMask=*/false,
+                                                    CommonAlignment, CostKind) +
+                         VectorGEPCost - ScalarGEPCost;
             break;
           }
           case LoadsState::ScatterVectorize: {
             auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
                 TTI, ArrayRef(PointerOps).slice(I * VF, VF),
-                LI0->getPointerOperand(), Instruction::GetElementPtr,
-                CostKind, ScalarTy, SubVecTy);
-            VecLdCost +=
-                TTI.getGatherScatterOpCost(
-                    Instruction::Load, SubVecTy, LI0->getPointerOperand(),
-                    /*VariableMask=*/false, CommonAlignment, CostKind) +
-                VectorGEPCost - ScalarGEPCost;
+                LI0->getPointerOperand(), Instruction::GetElementPtr, CostKind,
+                ScalarTy, SubVecTy);
+            VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
+                                                    LI0->getPointerOperand(),
+                                                    /*VariableMask=*/false,
+                                                    CommonAlignment, CostKind) +
+                         VectorGEPCost - ScalarGEPCost;
             break;
           }
           case LoadsState::Gather:
@@ -4880,8 +4878,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
           for (int Idx : seq<int>(0, VL.size()))
             ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
           VecLdCost +=
-              ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
-                               ShuffleMask, CostKind, I * VF, SubVecTy);
+              ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy, ShuffleMask,
+                               CostKind, I * VF, SubVecTy);
         }
         // If masked gather cost is higher - better to vectorize, so
         // consider it as a gather node. It will be better estimated
@@ -4897,10 +4895,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
   // increases the cost.
   Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
   bool ProfitableGatherPointers =
-      L && Sz > 2 &&
-      static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
-        return L->isLoopInvariant(V);
-      })) <= Sz / 2;
+      L && Sz > 2 && static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
+                       return L->isLoopInvariant(V);
+                     })) <= Sz / 2;
   if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
         auto *GEP = dyn_cast<GetElementPtrInst>(P);
         return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||