From 1e8ef2e14f700ce33d7a12e955ef66a6f6fb3c80 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 26 Jun 2025 17:55:18 +0100
Subject: [PATCH 1/3] [LV] Add support for cmp reductions with decreasing IVs
 using SMin.

Similar to FindLastIV, add FindFirstIV to support select (icmp(), x, y)
reductions where one of x or y is a decreasing induction. This is done
via a new recurrence kind FindFirstIVSMin, which selects the first
 value from the reduction vector using smin instead of the last value
(FindLastIV). It uses signed max as sentinel value. The
---
 llvm/include/llvm/Analysis/IVDescriptors.h    |   42 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |   68 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp       |   13 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    |   62 +-
 .../Transforms/Vectorize/SLPVectorizer.cpp    |    3 +
 llvm/lib/Transforms/Vectorize/VPlan.h         |    2 +-
 .../Transforms/Vectorize/VPlanAnalysis.cpp    |    2 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |   32 +-
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp |    2 +-
 .../LoopVectorize/iv-select-cmp-decreasing.ll | 1060 +++++++++++++++--
 .../vplan-printing-reductions.ll              |    2 +-
 11 files changed, 1140 insertions(+), 148 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 463249461483f..310d05b6f04af 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -54,6 +54,9 @@ enum class RecurKind {
   FMulAdd,  ///< Sum of float products with llvm.fmuladd(a * b + sum).
   AnyOf,    ///< AnyOf reduction with select(cmp(),x,y) where one of (x,y) is
             ///< loop invariant, and both x and y are integer type.
+  FindFirstIVSMin, /// FindFirst reduction with select(icmp(),x,y) where one of
+                   ///< (x,y) is a decreasing loop induction, and both x and y
+                   ///< are integer type, producing a SMin reduction.
   FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of
                   ///< (x,y) is increasing loop induction, and both x and y
                   ///< are integer type, producing a SMax reduction.
@@ -165,13 +168,13 @@ class RecurrenceDescriptor {
   /// Returns a struct describing whether the instruction is either a
   ///   Select(ICmp(A, B), X, Y), or
   ///   Select(FCmp(A, B), X, Y)
-  /// where one of (X, Y) is an increasing loop induction variable, and the
-  /// other is a PHI value.
+  /// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
+  /// loop induction variable, and the other is a PHI value.
   // TODO: Support non-monotonic variable. FindLast does not need be restricted
   // to increasing loop induction variables.
-  LLVM_ABI static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
-                                               Instruction *I,
-                                               ScalarEvolution &SE);
+  LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
+                                           PHINode *OrigPhi, Instruction *I,
+                                           ScalarEvolution &SE);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -259,6 +262,12 @@ class RecurrenceDescriptor {
     return Kind == RecurKind::AnyOf;
   }
 
+  /// Returns true if the recurrence kind is of the form
+  ///   select(cmp(),x,y) where one of (x,y) is decreasing loop induction.
+  static bool isFindFirstIVRecurrenceKind(RecurKind Kind) {
+    return Kind == RecurKind::FindFirstIVSMin;
+  }
+
   /// Returns true if the recurrence kind is of the form
   ///   select(cmp(),x,y) where one of (x,y) is increasing loop induction.
   static bool isFindLastIVRecurrenceKind(RecurKind Kind) {
@@ -269,22 +278,35 @@ class RecurrenceDescriptor {
   /// Returns true if recurrece kind is a signed redux kind.
   static bool isSignedRecurrenceKind(RecurKind Kind) {
     return Kind == RecurKind::SMax || Kind == RecurKind::SMin ||
+           Kind == RecurKind::FindFirstIVSMin ||
            Kind == RecurKind::FindLastIVSMax;
   }
 
+  /// Returns true if the recurrence kind is of the form
+  ///   select(cmp(),x,y) where one of (x,y) is an increasing or decreasing loop
+  ///   induction.
+  static bool isFindIVRecurrenceKind(RecurKind Kind) {
+    return isFindFirstIVRecurrenceKind(Kind) ||
+           isFindLastIVRecurrenceKind(Kind);
+  }
+
   /// Returns the type of the recurrence. This type can be narrower than the
   /// actual type of the Phi if the recurrence has been type-promoted.
   Type *getRecurrenceType() const { return RecurrenceType; }
 
-  /// Returns the sentinel value for FindLastIV recurrences to replace the start
-  /// value.
+  /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to
+  /// replace the start value.
   Value *getSentinelValue() const {
-    assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
     Type *Ty = StartValue->getType();
     unsigned BW = Ty->getIntegerBitWidth();
+    if (isFindLastIVRecurrenceKind(Kind)) {
+      return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind)
+                                      ? APInt::getSignedMinValue(BW)
+                                      : APInt::getMinValue(BW));
+    }
     return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind)
-                                    ? APInt::getSignedMinValue(BW)
-                                    : APInt::getMinValue(BW));
+                                    ? APInt::getSignedMaxValue(BW)
+                                    : APInt::getMaxValue(BW));
   }
 
   /// Returns a reference to the instructions used for type-promoting the
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index c8e97e5ec0e58..b49258e3b54ef 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -50,6 +50,7 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
   case RecurKind::UMax:
   case RecurKind::UMin:
   case RecurKind::AnyOf:
+  case RecurKind::FindFirstIVSMin:
   case RecurKind::FindLastIVSMax:
   case RecurKind::FindLastIVUMax:
     return true;
@@ -684,8 +685,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
 // value of the data type or a non-constant value by using mask and multiple
 // reduction operations.
 RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
-                                          Instruction *I, ScalarEvolution &SE) {
+RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
+                                      PHINode *OrigPhi, Instruction *I,
+                                      ScalarEvolution &SE) {
   // TODO: Support the vectorization of FindLastIV when the reduction phi is
   // used by more than one select instruction. This vectorization is only
   // performed when the SCEV of each increasing induction variable used by the
@@ -713,25 +715,49 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
       return std::nullopt;
 
     const SCEV *Step = AR->getStepRecurrence(SE);
-    if (!SE.isKnownPositive(Step))
+
+    if (isFindFirstIVRecurrenceKind(Kind)) {
+      if (!SE.isKnownNegative(Step))
+        return std::nullopt;
+    } else if (!SE.isKnownPositive(Step))
       return std::nullopt;
 
     // Keep the minimum value of the recurrence type as the sentinel value.
     // The maximum acceptable range for the increasing induction variable,
     // called the valid range, will be defined as
+
+    const ConstantRange IVRange = SE.getSignedRange(AR);
+    // Keep the minimum (FindLast) or maximum (FindFirst) value of the
+    // recurrence type as the sentinel value. The maximum acceptable range for
+    // the induction variable, called the valid range, will be defined as
     //   [<sentinel value> + 1, <sentinel value>)
-    // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>)
+    // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>) for
+    // FindLastIV or [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
     // TODO: This range restriction can be lifted by adding an additional
     // virtual OR reduction.
     auto CheckRange = [&](bool IsSigned) {
       const ConstantRange IVRange =
           IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
       unsigned NumBits = Ty->getIntegerBitWidth();
-      const APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
-                                      : APInt::getMinValue(NumBits);
-      const ConstantRange ValidRange =
-          ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
-      LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
+      ConstantRange ValidRange = ConstantRange::getEmpty(NumBits);
+      if (isFindLastIVRecurrenceKind(Kind)) {
+        APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
+                                  : APInt::getMinValue(NumBits);
+        ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
+      } else {
+        assert(isFindFirstIVRecurrenceKind(Kind) &&
+               "Kind must either be a FindLastIV or FindFirstIV");
+        assert(IsSigned &&
+               "only FindFirstIV with SMax is supported at the moment");
+        ValidRange =
+            ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
+                                       APInt::getSignedMaxValue(NumBits) - 1);
+      }
+
+      LLVM_DEBUG(dbgs() << "LV: "
+                        << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV"
+                                                             : "FindFirstIV")
+                        << " valid range is " << ValidRange
                         << ", and the range of " << *AR << " is " << IVRange
                         << "\n");
 
@@ -739,10 +765,18 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
       // its range is fully contained within the valid range.
       return ValidRange.contains(IVRange);
     };
+    if (isFindLastIVRecurrenceKind(Kind)) {
+      if (CheckRange(true))
+        return RecurKind::FindLastIVSMax;
+      if (CheckRange(false))
+        return RecurKind::FindLastIVUMax;
+      return std::nullopt;
+    }
+    assert(isFindFirstIVRecurrenceKind(Kind) &&
+           "Kind must either be a FindLastIV or FindFirstIV");
+
     if (CheckRange(true))
-      return RecurKind::FindLastIVSMax;
-    if (CheckRange(false))
-      return RecurKind::FindLastIVUMax;
+      return RecurKind::FindFirstIVSMin;
     return std::nullopt;
   };
 
@@ -888,8 +922,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
     if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
         Kind == RecurKind::Add || Kind == RecurKind::Mul)
       return isConditionalRdxPattern(I);
-    if (isFindLastIVRecurrenceKind(Kind) && SE)
-      return isFindLastIVPattern(L, OrigPhi, I, *SE);
+    if (isFindIVRecurrenceKind(Kind) && SE)
+      return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
     [[fallthrough]];
   case Instruction::FCmp:
   case Instruction::ICmp:
@@ -1003,6 +1037,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
     LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n");
     return true;
   }
+  if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB,
+                      AC, DT, SE)) {
+    LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n");
+    return true;
+  }
   if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
                       SE)) {
     LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
@@ -1150,6 +1189,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
   case RecurKind::Mul:
     return Instruction::Mul;
   case RecurKind::AnyOf:
+  case RecurKind::FindFirstIVSMin:
   case RecurKind::FindLastIVSMax:
   case RecurKind::FindLastIVUMax:
   case RecurKind::Or:
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c50bb4a497c6a..ac27ccf409d6b 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1227,9 +1227,12 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
                                        RecurKind RdxKind, Value *Start,
                                        Value *Sentinel) {
   bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
-  Value *MaxRdx = Src->getType()->isVectorTy()
-                      ? Builder.CreateIntMaxReduce(Src, IsSigned)
-                      : Src;
+  Value *MaxRdx =
+      Src->getType()->isVectorTy()
+          ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind)
+                 ? Builder.CreateIntMaxReduce(Src, IsSigned)
+                 : Builder.CreateIntMinReduce(Src, IsSigned))
+          : Src;
   // Correct the final reduction result back to the start value if the maximum
   // reduction is sentinel value.
   Value *Cmp =
@@ -1324,8 +1327,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
 Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
                                    RecurKind Kind, Value *Mask, Value *EVL) {
   assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
-         !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
-         "AnyOf or FindLastIV reductions are not supported.");
+         !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
+         "AnyOf, FindFirstIV and FindLastIV reductions are not supported.");
   Intrinsic::ID Id = getReductionIntrinsicID(Kind);
   auto VPID = VPIntrinsic::getForIntrinsic(Id);
   assert(VPReductionIntrinsic::isVPReduction(VPID) &&
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa16083829625..95479373b4393 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4922,7 +4922,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
           const RecurrenceDescriptor &RdxDesc = Reduction.second;
           RecurKind RK = RdxDesc.getRecurrenceKind();
           return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
-                 RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK);
+                 RecurrenceDescriptor::isFindIVRecurrenceKind(RK);
         });
     if (HasSelectCmpReductions) {
       LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
@@ -7240,8 +7240,8 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
 
 static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
   using namespace VPlanPatternMatch;
-  assert(RdxResult->getOpcode() == VPInstruction::ComputeFindLastIVResult &&
-         "RdxResult must be ComputeFindLastIVResult");
+  assert(RdxResult->getOpcode() == VPInstruction::ComputeFindIVResult &&
+         "RdxResult must be ComputeFindIVResult");
   VPValue *StartVPV = RdxResult->getOperand(1);
   match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
   return StartVPV->getLiveInIRValue();
@@ -7259,7 +7259,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
   if (!EpiRedResult ||
       (EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
        EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
-       EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
+       EpiRedResult->getOpcode() != VPInstruction::ComputeFindIVResult))
     return;
 
   auto *EpiRedHeaderPhi =
@@ -7285,7 +7285,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
            "AnyOf expected to start by comparing main resume value to original "
            "start value");
     MainResumeValue = Cmp->getOperand(0);
-  } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+  } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(
                  RdxDesc.getRecurrenceKind())) {
     Value *StartV = getStartValueFromReductionResult(EpiRedResult);
     Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
@@ -9041,8 +9041,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     RecurKind Kind = RdxDesc.getRecurrenceKind();
     assert(
         !RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
-        !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
-        "AnyOf and FindLast reductions are not allowed for in-loop reductions");
+        !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
+        "AnyOf and FindIV reductions are not allowed for in-loop reductions");
 
     // Collect the chain of "link" recipes for the reduction starting at PhiR.
     SetVector<VPSingleDefRecipe *> Worklist;
@@ -9200,7 +9200,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
                 cast<VPInstruction>(&U)->getOpcode() ==
                     VPInstruction::ComputeReductionResult ||
                 cast<VPInstruction>(&U)->getOpcode() ==
-                    VPInstruction::ComputeFindLastIVResult);
+                    VPInstruction::ComputeFindIVResult);
       });
       if (CM.usePredicatedReductionSelect())
         PhiR->setOperand(1, NewExitingVPV);
@@ -9244,12 +9244,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     VPInstruction *FinalReductionResult;
     VPBuilder::InsertPointGuard Guard(Builder);
     Builder.setInsertPoint(MiddleVPBB, IP);
-    if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+    if (RecurrenceDescriptor::isFindIVRecurrenceKind(
             RdxDesc.getRecurrenceKind())) {
       VPValue *Start = PhiR->getStartValue();
       VPValue *Sentinel = Plan->getOrAddLiveIn(RdxDesc.getSentinelValue());
       FinalReductionResult =
-          Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
+          Builder.createNaryOp(VPInstruction::ComputeFindIVResult,
                                {PhiR, Start, Sentinel, NewExitingVPV}, ExitDL);
     } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
                    RdxDesc.getRecurrenceKind())) {
@@ -9312,16 +9312,16 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       continue;
     }
 
-    if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+    if (RecurrenceDescriptor::isFindIVRecurrenceKind(
             RdxDesc.getRecurrenceKind())) {
-      // Adjust the start value for FindLastIV recurrences to use the sentinel
-      // value after generating the ResumePhi recipe, which uses the original
-      // start value.
+      // Adjust the start value for FindFirstIV/FindLastIV recurrences to use
+      // the sentinel value after generating the ResumePhi recipe, which uses
+      // the original start value.
       PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
     }
     RecurKind RK = RdxDesc.getRecurrenceKind();
     if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
-         !RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+         !RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
          !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
       VPBuilder PHBuilder(Plan->getVectorPreheader());
       VPValue *Iden = Plan->getOrAddLiveIn(
@@ -9704,18 +9704,18 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
   VPlanTransforms::runPass(VPlanTransforms::removeDeadRecipes, MainPlan);
 
   using namespace VPlanPatternMatch;
-  // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
-  // uses of undef/poison. If the reduction start value may be undef or poison
-  // it needs to be frozen and the frozen start has to be used when computing
-  // the reduction result. We also need to use the frozen value in the resume
-  // phi generated by the main vector loop, as this is also used to compute the
-  // reduction result after the epilogue vector loop.
+  // When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can
+  // introduce multiple uses of undef/poison. If the reduction start value may
+  // be undef or poison it needs to be frozen and the frozen start has to be
+  // used when computing the reduction result. We also need to use the frozen
+  // value in the resume phi generated by the main vector loop, as this is also
+  // used to compute the reduction result after the epilogue vector loop.
   auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
                                              bool UpdateResumePhis) {
     VPBuilder Builder(Plan.getEntry());
     for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
       auto *VPI = dyn_cast<VPInstruction>(&R);
-      if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
+      if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindIVResult)
         continue;
       VPValue *OrigStart = VPI->getOperand(1);
       if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue()))
@@ -9810,7 +9810,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
             return VPI &&
                    (VPI->getOpcode() == VPInstruction::ComputeAnyOfResult ||
                     VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
-                    VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult);
+                    VPI->getOpcode() == VPInstruction::ComputeFindIVResult);
           }));
       ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
                     ->getIncomingValueForBlock(L->getLoopPreheader());
@@ -9828,20 +9828,20 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
         BasicBlock *PBB = cast<Instruction>(ResumeV)->getParent();
         IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
         ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
-      } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+      } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(RK)) {
         Value *StartV = getStartValueFromReductionResult(RdxResult);
         assert(RdxDesc.getRecurrenceStartValue() == StartV &&
-               "start value from ComputeFindLastIVResult must match");
+               "start value from ComputeFinIVResult must match");
 
         ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
             EPI.MainLoopIterationCountCheck);
 
-        // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
-        // to the resume value. The resume value is adjusted to the sentinel
-        // value when the final value from the main vector loop equals the start
-        // value. This ensures correctness when the start value might not be
-        // less than the minimum value of a monotonically increasing induction
-        // variable.
+        // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
+        // an adjustment to the resume value. The resume value is adjusted to
+        // the sentinel value when the final value from the main vector loop
+        // equals the start value. This ensures correctness when the start value
+        // might not be less than the minimum value of a monotonically
+        // increasing induction variable.
         BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
         IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
         Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 27a7538ecd939..0941bf61953f1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23180,6 +23180,7 @@ class HorizontalReduction {
         case RecurKind::FMul:
         case RecurKind::FMulAdd:
         case RecurKind::AnyOf:
+        case RecurKind::FindFirstIVSMin:
         case RecurKind::FindLastIVSMax:
         case RecurKind::FindLastIVUMax:
         case RecurKind::FMaximumNum:
@@ -23315,6 +23316,7 @@ class HorizontalReduction {
     case RecurKind::FMul:
     case RecurKind::FMulAdd:
     case RecurKind::AnyOf:
+    case RecurKind::FindFirstIVSMin:
     case RecurKind::FindLastIVSMax:
     case RecurKind::FindLastIVUMax:
     case RecurKind::FMaximumNum:
@@ -23415,6 +23417,7 @@ class HorizontalReduction {
     case RecurKind::FMul:
     case RecurKind::FMulAdd:
     case RecurKind::AnyOf:
+    case RecurKind::FindFirstIVSMin:
     case RecurKind::FindLastIVSMax:
     case RecurKind::FindLastIVUMax:
     case RecurKind::FMaximumNum:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 32e788be76cb7..61b5ccd85bc6e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -946,7 +946,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
     /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
     /// where one of (x,y) is loop invariant, and both x and y are integer type.
     ComputeAnyOfResult,
-    ComputeFindLastIVResult,
+    ComputeFindIVResult,
     ComputeReductionResult,
     // Extracts the last lane from its operand if it is a vector, or the last
     // part if scalar. In the latter case, the recipe will be removed during
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index da4a52203db3f..8b48dda5bcf71 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -91,7 +91,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
            "different types inferred for different operands");
     return IntegerType::get(Ctx, 1);
   case VPInstruction::ComputeAnyOfResult:
-  case VPInstruction::ComputeFindLastIVResult:
+  case VPInstruction::ComputeFindIVResult:
   case VPInstruction::ComputeReductionResult: {
     auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
     auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7b302407519e7..73d82319e13ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -460,7 +460,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
   case VPInstruction::ComputeAnyOfResult:
   case VPInstruction::ReductionStartVector:
     return 3;
-  case VPInstruction::ComputeFindLastIVResult:
+  case VPInstruction::ComputeFindIVResult:
     return 4;
   case Instruction::Call:
   case Instruction::GetElementPtr:
@@ -725,14 +725,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
     return createAnyOfReduction(Builder, ReducedPartRdx,
                                 State.get(getOperand(1), VPLane(0)), OrigPhi);
   }
-  case VPInstruction::ComputeFindLastIVResult: {
+  case VPInstruction::ComputeFindIVResult: {
     // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
     // and will be removed by breaking up the recipe further.
     auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
     // Get its reduction variable descriptor.
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
     RecurKind RK = RdxDesc.getRecurrenceKind();
-    assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+    assert(RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
            "Unexpected reduction kind");
     assert(!PhiR->isInLoop() &&
            "In-loop FindLastIV reduction is not supported yet");
@@ -741,9 +741,17 @@ Value *VPInstruction::generate(VPTransformState &State) {
     // sentinel value, followed by one operand for each part of the reduction.
     unsigned UF = getNumOperands() - 3;
     Value *ReducedPartRdx = State.get(getOperand(3));
-    RecurKind MinMaxKind = RecurrenceDescriptor::isSignedRecurrenceKind(RK)
-                               ? RecurKind::SMax
-                               : RecurKind::UMax;
+    RecurKind MinMaxKind;
+    bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RK);
+    if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+      MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
+    } else {
+      assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) &&
+             "Kind must either be a FindLastIV or FindFirstIV");
+      assert(IsSigned &&
+             "only FindFirstIV with SMax is supported at the moment");
+      MinMaxKind = RecurKind::SMin;
+    }
     for (unsigned Part = 1; Part < UF; ++Part)
       ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
                                       State.get(getOperand(3 + Part)));
@@ -761,8 +769,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
 
     RecurKind RK = RdxDesc.getRecurrenceKind();
-    assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
-           "should be handled by ComputeFindLastIVResult");
+    assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
+           "should be handled by ComputeFindIVResult");
 
     Type *ResultTy = State.TypeAnalysis.inferScalarType(this);
     // The recipe's operands are the reduction phi, followed by one operand for
@@ -977,7 +985,7 @@ bool VPInstruction::isVectorToScalar() const {
          getOpcode() == Instruction::ExtractElement ||
          getOpcode() == VPInstruction::FirstActiveLane ||
          getOpcode() == VPInstruction::ComputeAnyOfResult ||
-         getOpcode() == VPInstruction::ComputeFindLastIVResult ||
+         getOpcode() == VPInstruction::ComputeFindIVResult ||
          getOpcode() == VPInstruction::ComputeReductionResult ||
          getOpcode() == VPInstruction::AnyOf;
 }
@@ -1079,7 +1087,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
   case VPInstruction::PtrAdd:
     return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
   case VPInstruction::ComputeAnyOfResult:
-  case VPInstruction::ComputeFindLastIVResult:
+  case VPInstruction::ComputeFindIVResult:
     return Op == getOperand(1);
   };
   llvm_unreachable("switch should return");
@@ -1168,8 +1176,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::ComputeAnyOfResult:
     O << "compute-anyof-result";
     break;
-  case VPInstruction::ComputeFindLastIVResult:
-    O << "compute-find-last-iv-result";
+  case VPInstruction::ComputeFindIVResult:
+    O << "compute-find-iv-result";
     break;
   case VPInstruction::ComputeReductionResult:
     O << "compute-reduction-result";
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 6c2b8210ffb7b..2dd43c092ff7a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -355,7 +355,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
                       m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
         match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
                       m_VPValue(), m_VPValue(Op1))) ||
-        match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
+        match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
                       m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
       addUniformForAllParts(cast<VPInstruction>(&R));
       for (unsigned Part = 1; Part != UF; ++Part)
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index cf10d32e063ab..ee0a64397e2ac 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -1,26 +1,185 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s
 
 define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
-; CHECK-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 19999, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; CHECK-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC1VF4-SAME: ptr [[A:%.*]]) {
+; IC1VF4-NEXT:  [[ENTRY:.*]]:
+; IC1VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4:       [[VECTOR_PH]]:
+; IC1VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1VF4:       [[VECTOR_BODY]]:
+; IC1VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC1VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC1VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC1VF4-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC1VF4-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC1VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC1VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC1VF4-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC1VF4-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC1VF4:       [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
+; IC1VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807
+; IC1VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
+; IC1VF4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4:       [[SCALAR_PH]]:
+; IC1VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC1VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC1VF4-NEXT:    br label %[[LOOP:.*]]
+; IC1VF4:       [[LOOP]]:
+; IC1VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC1VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC1VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC1VF4:       [[EXIT]]:
+; IC1VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC4VF4-SAME: ptr [[A:%.*]]) {
+; IC4VF4-NEXT:  [[ENTRY:.*]]:
+; IC4VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4:       [[VECTOR_PH]]:
+; IC4VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF4:       [[VECTOR_BODY]]:
+; IC4VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
+; IC4VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC4VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC4VF4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC4VF4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4
+; IC4VF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3
+; IC4VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8
+; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3
+; IC4VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12
+; IC4VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3
+; IC4VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC4VF4-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; IC4VF4-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
+; IC4VF4-NEXT:    [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
+; IC4VF4-NEXT:    [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC4VF4-NEXT:    [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
+; IC4VF4-NEXT:    [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
+; IC4VF4-NEXT:    [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
+; IC4VF4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC4VF4-NEXT:    [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
+; IC4VF4-NEXT:    [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
+; IC4VF4-NEXT:    [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; IC4VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
+; IC4VF4-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC4VF4-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC4VF4:       [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
+; IC4VF4-NEXT:    [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
+; IC4VF4-NEXT:    [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
+; IC4VF4-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX11]])
+; IC4VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807
+; IC4VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
+; IC4VF4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF4:       [[SCALAR_PH]]:
+; IC4VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC4VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF4-NEXT:    br label %[[LOOP:.*]]
+; IC4VF4:       [[LOOP]]:
+; IC4VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF4:       [[EXIT]]:
+; IC4VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC4VF1-SAME: ptr [[A:%.*]]) {
+; IC4VF1-NEXT:  [[ENTRY:.*]]:
+; IC4VF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1:       [[VECTOR_PH]]:
+; IC4VF1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF1:       [[VECTOR_BODY]]:
+; IC4VF1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC4VF1-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; IC4VF1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; IC4VF1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; IC4VF1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
+; IC4VF1-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
+; IC4VF1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
+; IC4VF1-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
+; IC4VF1-NEXT:    [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
+; IC4VF1-NEXT:    [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
+; IC4VF1-NEXT:    [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
+; IC4VF1-NEXT:    [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
+; IC4VF1-NEXT:    [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
+; IC4VF1-NEXT:    [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
+; IC4VF1-NEXT:    [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
+; IC4VF1-NEXT:    [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
+; IC4VF1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC4VF1-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC4VF1-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC4VF1:       [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT:    [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP15]], i64 [[TMP16]])
+; IC4VF1-NEXT:    [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
+; IC4VF1-NEXT:    [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
+; IC4VF1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807
+; IC4VF1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
+; IC4VF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1:       [[SCALAR_PH]]:
+; IC4VF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC4VF1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF1-NEXT:    br label %[[LOOP:.*]]
+; IC4VF1:       [[LOOP]]:
+; IC4VF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF1-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF1:       [[EXIT]]:
+; IC4VF1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
 ;
 entry:
   br label %loop
@@ -43,23 +202,354 @@ exit:                                             ; preds = %loop
 @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
 
 define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
-; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
-; CHECK-SAME: i16 noundef [[VAL:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
-; CHECK-NEXT:    [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
-; CHECK-NEXT:    [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
-; CHECK-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) {
+; IC1VF4-NEXT:  [[ENTRY:.*]]:
+; IC1VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4:       [[VECTOR_PH]]:
+; IC1VF4-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; IC1VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; IC1VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1VF4:       [[VECTOR_BODY]]:
+; IC1VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC1VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC1VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC1VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1
+; IC1VF4-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]]
+; IC1VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC1VF4-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
+; IC1VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC1VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC1VF4-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC1VF4-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC1VF4:       [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
+; IC1VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
+; IC1VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
+; IC1VF4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4:       [[SCALAR_PH]]:
+; IC1VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC1VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC1VF4-NEXT:    br label %[[LOOP:.*]]
+; IC1VF4:       [[LOOP]]:
+; IC1VF4-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC1VF4-NEXT:    [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC1VF4-NEXT:    [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC1VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC1VF4:       [[EXIT]]:
+; IC1VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) {
+; IC4VF4-NEXT:  [[ENTRY:.*]]:
+; IC4VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4:       [[VECTOR_PH]]:
+; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; IC4VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF4:       [[VECTOR_BODY]]:
+; IC4VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
+; IC4VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
+; IC4VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; IC4VF4-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
+; IC4VF4-NEXT:    [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
+; IC4VF4-NEXT:    [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
+; IC4VF4-NEXT:    [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
+; IC4VF4-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4:       [[PRED_LOAD_IF]]:
+; IC4VF4-NEXT:    [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
+; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT:    [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
+; IC4VF4-NEXT:    [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE]]:
+; IC4VF4-NEXT:    [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; IC4VF4-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4:       [[PRED_LOAD_IF15]]:
+; IC4VF4-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT:    [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
+; IC4VF4-NEXT:    [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE16]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE16]]:
+; IC4VF4-NEXT:    [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
+; IC4VF4-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4:       [[PRED_LOAD_IF17]]:
+; IC4VF4-NEXT:    [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT:    [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
+; IC4VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE18]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE18]]:
+; IC4VF4-NEXT:    [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
+; IC4VF4-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4:       [[PRED_LOAD_IF19]]:
+; IC4VF4-NEXT:    [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT:    [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
+; IC4VF4-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE20]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE20]]:
+; IC4VF4-NEXT:    [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
+; IC4VF4-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4:       [[PRED_LOAD_IF21]]:
+; IC4VF4-NEXT:    [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
+; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT:    [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
+; IC4VF4-NEXT:    [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE22]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE22]]:
+; IC4VF4-NEXT:    [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
+; IC4VF4-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4:       [[PRED_LOAD_IF23]]:
+; IC4VF4-NEXT:    [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
+; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT:    [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
+; IC4VF4-NEXT:    [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE24]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE24]]:
+; IC4VF4-NEXT:    [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
+; IC4VF4-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4:       [[PRED_LOAD_IF25]]:
+; IC4VF4-NEXT:    [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
+; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT:    [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
+; IC4VF4-NEXT:    [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE26]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE26]]:
+; IC4VF4-NEXT:    [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
+; IC4VF4-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4:       [[PRED_LOAD_IF27]]:
+; IC4VF4-NEXT:    [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
+; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT:    [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
+; IC4VF4-NEXT:    [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE28]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE28]]:
+; IC4VF4-NEXT:    [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
+; IC4VF4-NEXT:    [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4:       [[PRED_LOAD_IF29]]:
+; IC4VF4-NEXT:    [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
+; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT:    [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
+; IC4VF4-NEXT:    [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE30]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE30]]:
+; IC4VF4-NEXT:    [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
+; IC4VF4-NEXT:    [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4:       [[PRED_LOAD_IF31]]:
+; IC4VF4-NEXT:    [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
+; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT:    [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
+; IC4VF4-NEXT:    [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE32]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE32]]:
+; IC4VF4-NEXT:    [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
+; IC4VF4-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4:       [[PRED_LOAD_IF33]]:
+; IC4VF4-NEXT:    [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
+; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT:    [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
+; IC4VF4-NEXT:    [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE34]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE34]]:
+; IC4VF4-NEXT:    [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
+; IC4VF4-NEXT:    [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4:       [[PRED_LOAD_IF35]]:
+; IC4VF4-NEXT:    [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
+; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT:    [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
+; IC4VF4-NEXT:    [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE36]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE36]]:
+; IC4VF4-NEXT:    [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
+; IC4VF4-NEXT:    [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4:       [[PRED_LOAD_IF37]]:
+; IC4VF4-NEXT:    [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
+; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT:    [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
+; IC4VF4-NEXT:    [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE38]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE38]]:
+; IC4VF4-NEXT:    [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
+; IC4VF4-NEXT:    [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4:       [[PRED_LOAD_IF39]]:
+; IC4VF4-NEXT:    [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
+; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT:    [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
+; IC4VF4-NEXT:    [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE40]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE40]]:
+; IC4VF4-NEXT:    [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
+; IC4VF4-NEXT:    [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4:       [[PRED_LOAD_IF41]]:
+; IC4VF4-NEXT:    [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
+; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT:    [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
+; IC4VF4-NEXT:    [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE42]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE42]]:
+; IC4VF4-NEXT:    [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
+; IC4VF4-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4:       [[PRED_LOAD_IF43]]:
+; IC4VF4-NEXT:    [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
+; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT:    [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
+; IC4VF4-NEXT:    [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE44]]:
+; IC4VF4-NEXT:    [[TMP99:%.*]] = phi <4 x i16> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
+; IC4VF4-NEXT:    [[TMP100:%.*]] = icmp ugt <4 x i16> [[TMP27]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT:    [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT:    [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT:    [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; IC4VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
+; IC4VF4-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF4:       [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
+; IC4VF4-NEXT:    [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
+; IC4VF4-NEXT:    [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
+; IC4VF4-NEXT:    [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
+; IC4VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
+; IC4VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
+; IC4VF4-NEXT:    br label %[[EXIT:.*]]
+; IC4VF4:       [[SCALAR_PH]]:
+; IC4VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ]
+; IC4VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; IC4VF4-NEXT:    br label %[[LOOP:.*]]
+; IC4VF4:       [[LOOP]]:
+; IC4VF4-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF4-NEXT:    [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF4-NEXT:    [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC4VF4:       [[EXIT]]:
+; IC4VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) {
+; IC4VF1-NEXT:  [[ENTRY:.*]]:
+; IC4VF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1:       [[VECTOR_PH]]:
+; IC4VF1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF1:       [[VECTOR_BODY]]:
+; IC4VF1-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF1-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF1-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT:    [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC4VF1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
+; IC4VF1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
+; IC4VF1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
+; IC4VF1-NEXT:    [[TMP7:%.*]] = load i16, ptr [[TMP3]], align 1
+; IC4VF1-NEXT:    [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 1
+; IC4VF1-NEXT:    [[TMP9:%.*]] = load i16, ptr [[TMP5]], align 1
+; IC4VF1-NEXT:    [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
+; IC4VF1-NEXT:    [[TMP11:%.*]] = icmp ugt i16 [[TMP7]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP12:%.*]] = icmp ugt i16 [[TMP8]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP13:%.*]] = icmp ugt i16 [[TMP9]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP14:%.*]] = icmp ugt i16 [[TMP10]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT:    [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
+; IC4VF1-NEXT:    [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
+; IC4VF1-NEXT:    [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
+; IC4VF1-NEXT:    [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
+; IC4VF1-NEXT:    [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
+; IC4VF1-NEXT:    [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
+; IC4VF1-NEXT:    [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
+; IC4VF1-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC4VF1-NEXT:    [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC4VF1-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF1:       [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT:    [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
+; IC4VF1-NEXT:    [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
+; IC4VF1-NEXT:    [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
+; IC4VF1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
+; IC4VF1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
+; IC4VF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1:       [[SCALAR_PH]]:
+; IC4VF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC4VF1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC4VF1-NEXT:    br label %[[LOOP:.*]]
+; IC4VF1:       [[LOOP]]:
+; IC4VF1-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF1-NEXT:    [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF1-NEXT:    [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC4VF1:       [[EXIT]]:
+; IC4VF1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
 ;
 entry:
   br label %loop
@@ -83,23 +573,354 @@ exit:                                             ; preds = %loop
 @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
 
 define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
-; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
-; CHECK-SAME: half noundef [[VAL:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
-; CHECK-NEXT:    [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
-; CHECK-NEXT:    [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
-; CHECK-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC1VF4-SAME: half noundef [[VAL:%.*]]) {
+; IC1VF4-NEXT:  [[ENTRY:.*]]:
+; IC1VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4:       [[VECTOR_PH]]:
+; IC1VF4-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
+; IC1VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
+; IC1VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1VF4:       [[VECTOR_BODY]]:
+; IC1VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC1VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC1VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC1VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1
+; IC1VF4-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT:    [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]]
+; IC1VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC1VF4-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
+; IC1VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC1VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC1VF4-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC1VF4-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC1VF4:       [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
+; IC1VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
+; IC1VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
+; IC1VF4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4:       [[SCALAR_PH]]:
+; IC1VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC1VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC1VF4-NEXT:    br label %[[LOOP:.*]]
+; IC1VF4:       [[LOOP]]:
+; IC1VF4-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC1VF4-NEXT:    [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC1VF4-NEXT:    [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC1VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC1VF4:       [[EXIT]]:
+; IC1VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC4VF4-SAME: half noundef [[VAL:%.*]]) {
+; IC4VF4-NEXT:  [[ENTRY:.*]]:
+; IC4VF4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4:       [[VECTOR_PH]]:
+; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
+; IC4VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF4:       [[VECTOR_BODY]]:
+; IC4VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
+; IC4VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
+; IC4VF4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
+; IC4VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; IC4VF4-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
+; IC4VF4-NEXT:    [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
+; IC4VF4-NEXT:    [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
+; IC4VF4-NEXT:    [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
+; IC4VF4-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
+; IC4VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4:       [[PRED_LOAD_IF]]:
+; IC4VF4-NEXT:    [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
+; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT:    [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
+; IC4VF4-NEXT:    [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE]]:
+; IC4VF4-NEXT:    [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; IC4VF4-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4:       [[PRED_LOAD_IF15]]:
+; IC4VF4-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT:    [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
+; IC4VF4-NEXT:    [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE16]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE16]]:
+; IC4VF4-NEXT:    [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
+; IC4VF4-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4:       [[PRED_LOAD_IF17]]:
+; IC4VF4-NEXT:    [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT:    [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
+; IC4VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE18]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE18]]:
+; IC4VF4-NEXT:    [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
+; IC4VF4-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4:       [[PRED_LOAD_IF19]]:
+; IC4VF4-NEXT:    [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT:    [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
+; IC4VF4-NEXT:    [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE20]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE20]]:
+; IC4VF4-NEXT:    [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
+; IC4VF4-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4:       [[PRED_LOAD_IF21]]:
+; IC4VF4-NEXT:    [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
+; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT:    [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
+; IC4VF4-NEXT:    [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE22]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE22]]:
+; IC4VF4-NEXT:    [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
+; IC4VF4-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4:       [[PRED_LOAD_IF23]]:
+; IC4VF4-NEXT:    [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
+; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT:    [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
+; IC4VF4-NEXT:    [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE24]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE24]]:
+; IC4VF4-NEXT:    [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
+; IC4VF4-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4:       [[PRED_LOAD_IF25]]:
+; IC4VF4-NEXT:    [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
+; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT:    [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
+; IC4VF4-NEXT:    [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE26]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE26]]:
+; IC4VF4-NEXT:    [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
+; IC4VF4-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4:       [[PRED_LOAD_IF27]]:
+; IC4VF4-NEXT:    [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
+; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT:    [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
+; IC4VF4-NEXT:    [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE28]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE28]]:
+; IC4VF4-NEXT:    [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
+; IC4VF4-NEXT:    [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4:       [[PRED_LOAD_IF29]]:
+; IC4VF4-NEXT:    [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
+; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT:    [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
+; IC4VF4-NEXT:    [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE30]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE30]]:
+; IC4VF4-NEXT:    [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
+; IC4VF4-NEXT:    [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4:       [[PRED_LOAD_IF31]]:
+; IC4VF4-NEXT:    [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
+; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT:    [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
+; IC4VF4-NEXT:    [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE32]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE32]]:
+; IC4VF4-NEXT:    [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
+; IC4VF4-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4:       [[PRED_LOAD_IF33]]:
+; IC4VF4-NEXT:    [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
+; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT:    [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
+; IC4VF4-NEXT:    [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE34]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE34]]:
+; IC4VF4-NEXT:    [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
+; IC4VF4-NEXT:    [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4:       [[PRED_LOAD_IF35]]:
+; IC4VF4-NEXT:    [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
+; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT:    [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
+; IC4VF4-NEXT:    [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE36]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE36]]:
+; IC4VF4-NEXT:    [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
+; IC4VF4-NEXT:    [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
+; IC4VF4-NEXT:    br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4:       [[PRED_LOAD_IF37]]:
+; IC4VF4-NEXT:    [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
+; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT:    [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
+; IC4VF4-NEXT:    [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE38]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE38]]:
+; IC4VF4-NEXT:    [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
+; IC4VF4-NEXT:    [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
+; IC4VF4-NEXT:    br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4:       [[PRED_LOAD_IF39]]:
+; IC4VF4-NEXT:    [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
+; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT:    [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
+; IC4VF4-NEXT:    [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE40]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE40]]:
+; IC4VF4-NEXT:    [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
+; IC4VF4-NEXT:    [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
+; IC4VF4-NEXT:    br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4:       [[PRED_LOAD_IF41]]:
+; IC4VF4-NEXT:    [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
+; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT:    [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
+; IC4VF4-NEXT:    [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE42]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE42]]:
+; IC4VF4-NEXT:    [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
+; IC4VF4-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; IC4VF4-NEXT:    br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4:       [[PRED_LOAD_IF43]]:
+; IC4VF4-NEXT:    [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
+; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT:    [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
+; IC4VF4-NEXT:    [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3
+; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4:       [[PRED_LOAD_CONTINUE44]]:
+; IC4VF4-NEXT:    [[TMP99:%.*]] = phi <4 x half> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
+; IC4VF4-NEXT:    [[TMP100:%.*]] = fcmp ugt <4 x half> [[TMP27]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT:    [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
+; IC4VF4-NEXT:    [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT:    [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT:    [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT:    [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; IC4VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
+; IC4VF4-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC4VF4:       [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
+; IC4VF4-NEXT:    [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
+; IC4VF4-NEXT:    [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
+; IC4VF4-NEXT:    [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
+; IC4VF4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
+; IC4VF4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
+; IC4VF4-NEXT:    br label %[[EXIT:.*]]
+; IC4VF4:       [[SCALAR_PH]]:
+; IC4VF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ]
+; IC4VF4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; IC4VF4-NEXT:    br label %[[LOOP:.*]]
+; IC4VF4:       [[LOOP]]:
+; IC4VF4-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF4-NEXT:    [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF4-NEXT:    [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC4VF4:       [[EXIT]]:
+; IC4VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC4VF1-SAME: half noundef [[VAL:%.*]]) {
+; IC4VF1-NEXT:  [[ENTRY:.*]]:
+; IC4VF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1:       [[VECTOR_PH]]:
+; IC4VF1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC4VF1:       [[VECTOR_BODY]]:
+; IC4VF1-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF1-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF1-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT:    [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC4VF1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
+; IC4VF1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
+; IC4VF1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
+; IC4VF1-NEXT:    [[TMP7:%.*]] = load half, ptr [[TMP3]], align 1
+; IC4VF1-NEXT:    [[TMP8:%.*]] = load half, ptr [[TMP4]], align 1
+; IC4VF1-NEXT:    [[TMP9:%.*]] = load half, ptr [[TMP5]], align 1
+; IC4VF1-NEXT:    [[TMP10:%.*]] = load half, ptr [[TMP6]], align 1
+; IC4VF1-NEXT:    [[TMP11:%.*]] = fcmp ugt half [[TMP7]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP12:%.*]] = fcmp ugt half [[TMP8]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP13:%.*]] = fcmp ugt half [[TMP9]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP14:%.*]] = fcmp ugt half [[TMP10]], [[VAL]]
+; IC4VF1-NEXT:    [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT:    [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
+; IC4VF1-NEXT:    [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
+; IC4VF1-NEXT:    [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
+; IC4VF1-NEXT:    [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
+; IC4VF1-NEXT:    [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
+; IC4VF1-NEXT:    [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
+; IC4VF1-NEXT:    [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
+; IC4VF1-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC4VF1-NEXT:    [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC4VF1-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC4VF1:       [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT:    [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
+; IC4VF1-NEXT:    [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
+; IC4VF1-NEXT:    [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
+; IC4VF1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
+; IC4VF1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
+; IC4VF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1:       [[SCALAR_PH]]:
+; IC4VF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC4VF1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC4VF1-NEXT:    br label %[[LOOP:.*]]
+; IC4VF1:       [[LOOP]]:
+; IC4VF1-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF1-NEXT:    [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF1-NEXT:    [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC4VF1:       [[EXIT]]:
+; IC4VF1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
 ;
 entry:
   br label %loop
@@ -141,6 +962,66 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr
 ; CHECK-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
 ; CHECK-NEXT:    ret i64 [[COND_LCSSA]]
 ;
+; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC1VF4-NEXT:  [[ENTRY:.*]]:
+; IC1VF4-NEXT:    br label %[[LOOP:.*]]
+; IC1VF4:       [[LOOP]]:
+; IC1VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC1VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC1VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC1VF4-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC1VF4-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC1VF4-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC1VF4:       [[EXIT]]:
+; IC1VF4-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC1VF4-NEXT:    ret i64 [[COND_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC4VF4-NEXT:  [[ENTRY:.*]]:
+; IC4VF4-NEXT:    br label %[[LOOP:.*]]
+; IC4VF4:       [[LOOP]]:
+; IC4VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC4VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC4VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC4VF4-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC4VF4-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC4VF4-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC4VF4:       [[EXIT]]:
+; IC4VF4-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC4VF4-NEXT:    ret i64 [[COND_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC4VF1-NEXT:  [[ENTRY:.*]]:
+; IC4VF1-NEXT:    br label %[[LOOP:.*]]
+; IC4VF1:       [[LOOP]]:
+; IC4VF1-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC4VF1-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC4VF1-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC4VF1-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC4VF1-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC4VF1-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC4VF1:       [[EXIT]]:
+; IC4VF1-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC4VF1-NEXT:    ret i64 [[COND_LCSSA]]
+;
 entry:
   br label %loop
 
@@ -164,23 +1045,59 @@ exit:                                             ; preds = %loop
 ; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since
 ; the IV hits this value, it is impossible to vectorize this case.
 define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
-; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; CHECK-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC1VF4-SAME: ptr [[A:%.*]]) {
+; IC1VF4-NEXT:  [[ENTRY:.*]]:
+; IC1VF4-NEXT:    br label %[[LOOP:.*]]
+; IC1VF4:       [[LOOP]]:
+; IC1VF4-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC1VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC1VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC1VF4:       [[EXIT]]:
+; IC1VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC1VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC4VF4-SAME: ptr [[A:%.*]]) {
+; IC4VF4-NEXT:  [[ENTRY:.*]]:
+; IC4VF4-NEXT:    br label %[[LOOP:.*]]
+; IC4VF4:       [[LOOP]]:
+; IC4VF4-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC4VF4:       [[EXIT]]:
+; IC4VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC4VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC4VF1-SAME: ptr [[A:%.*]]) {
+; IC4VF1-NEXT:  [[ENTRY:.*]]:
+; IC4VF1-NEXT:    br label %[[LOOP:.*]]
+; IC4VF1:       [[LOOP]]:
+; IC4VF1-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF1-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC4VF1:       [[EXIT]]:
+; IC4VF1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC4VF1-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
 ;
 entry:
   br label %loop
@@ -199,4 +1116,3 @@ loop:                                             ; preds = %entry, %loop
 exit:                                             ; preds = %loop
   ret i64 %spec.select
 }
-
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 9428737814146..2e8109c18948e 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -238,7 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
 ; CHECK-NEXT: Successor(s): middle.block
 ; CHECK-EMPTY:
 ; CHECK-NEXT: middle.block:
-; CHECK-NEXT:   EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond>
+; CHECK-NEXT:   EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond>
 ; CHECK-NEXT:   EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
 ; CHECK-NEXT:   EMIT branch-on-cond vp<%cmp.n>
 ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph

From 3d8d79aad7efb3c505cf19048efaeec3a276ed97 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 27 Jun 2025 13:02:43 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks

---
 llvm/include/llvm/Analysis/IVDescriptors.h    |   2 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |   6 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |   5 +-
 .../LoopVectorize/iv-select-cmp-decreasing.ll | 136 +++---------------
 4 files changed, 25 insertions(+), 124 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 310d05b6f04af..3b92cbff28de4 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -294,7 +294,7 @@ class RecurrenceDescriptor {
   /// actual type of the Phi if the recurrence has been type-promoted.
   Type *getRecurrenceType() const { return RecurrenceType; }
 
-  /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to
+  /// Returns the sentinel value for FindFirstIV & FindLastIV recurrences to
   /// replace the start value.
   Value *getSentinelValue() const {
     Type *Ty = StartValue->getType();
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b49258e3b54ef..523f3694559e6 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -726,7 +726,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
     // The maximum acceptable range for the increasing induction variable,
     // called the valid range, will be defined as
 
-    const ConstantRange IVRange = SE.getSignedRange(AR);
     // Keep the minimum (FindLast) or maximum (FindFirst) value of the
     // recurrence type as the sentinel value. The maximum acceptable range for
     // the induction variable, called the valid range, will be defined as
@@ -746,9 +745,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
         ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
       } else {
         assert(isFindFirstIVRecurrenceKind(Kind) &&
-               "Kind must either be a FindLastIV or FindFirstIV");
-        assert(IsSigned &&
-               "only FindFirstIV with SMax is supported at the moment");
+               "Kind must either be FindLastIV or FindFirstIV");
+        assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
         ValidRange =
             ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
                                        APInt::getSignedMaxValue(NumBits) - 1);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 73d82319e13ec..472b5700bd358 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -747,9 +747,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
       MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
     } else {
       assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) &&
-             "Kind must either be a FindLastIV or FindFirstIV");
-      assert(IsSigned &&
-             "only FindFirstIV with SMax is supported at the moment");
+             "Kind must either be FindLastIV or FindFirstIV");
+      assert(IsSigned && "Only FindFirstIV with SMax is currently supported");
       MinMaxKind = RecurKind::SMin;
     }
     for (unsigned Part = 1; Part < UF; ++Part)
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index ee0a64397e2ac..d224da795997d 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC1VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF1 %s
 
 define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
 ; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
@@ -962,66 +962,6 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr
 ; CHECK-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
 ; CHECK-NEXT:    ret i64 [[COND_LCSSA]]
 ;
-; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC1VF4-NEXT:  [[ENTRY:.*]]:
-; IC1VF4-NEXT:    br label %[[LOOP:.*]]
-; IC1VF4:       [[LOOP]]:
-; IC1VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC1VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC1VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC1VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC1VF4-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC1VF4-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC1VF4-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC1VF4-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC1VF4:       [[EXIT]]:
-; IC1VF4-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC1VF4-NEXT:    ret i64 [[COND_LCSSA]]
-;
-; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC4VF4-NEXT:  [[ENTRY:.*]]:
-; IC4VF4-NEXT:    br label %[[LOOP:.*]]
-; IC4VF4:       [[LOOP]]:
-; IC4VF4-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC4VF4-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC4VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF4-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC4VF4-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC4VF4-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC4VF4-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC4VF4:       [[EXIT]]:
-; IC4VF4-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC4VF4-NEXT:    ret i64 [[COND_LCSSA]]
-;
-; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC4VF1-NEXT:  [[ENTRY:.*]]:
-; IC4VF1-NEXT:    br label %[[LOOP:.*]]
-; IC4VF1:       [[LOOP]]:
-; IC4VF1-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC4VF1-NEXT:    [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF1-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC4VF1-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF1-NEXT:    [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC4VF1-NEXT:    [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC4VF1-NEXT:    [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC4VF1-NEXT:    [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC4VF1:       [[EXIT]]:
-; IC4VF1-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC4VF1-NEXT:    ret i64 [[COND_LCSSA]]
-;
 entry:
   br label %loop
 
@@ -1045,59 +985,23 @@ exit:                                             ; preds = %loop
 ; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since
 ; the IV hits this value, it is impossible to vectorize this case.
 define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
-; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC1VF4-SAME: ptr [[A:%.*]]) {
-; IC1VF4-NEXT:  [[ENTRY:.*]]:
-; IC1VF4-NEXT:    br label %[[LOOP:.*]]
-; IC1VF4:       [[LOOP]]:
-; IC1VF4-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC1VF4-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC1VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC1VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC1VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC1VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC1VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC1VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC1VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC1VF4:       [[EXIT]]:
-; IC1VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC1VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
-;
-; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC4VF4-SAME: ptr [[A:%.*]]) {
-; IC4VF4-NEXT:  [[ENTRY:.*]]:
-; IC4VF4-NEXT:    br label %[[LOOP:.*]]
-; IC4VF4:       [[LOOP]]:
-; IC4VF4-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC4VF4-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC4VF4-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC4VF4-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF4-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC4VF4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC4VF4-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF4-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC4VF4-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC4VF4:       [[EXIT]]:
-; IC4VF4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC4VF4-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
-;
-; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC4VF1-SAME: ptr [[A:%.*]]) {
-; IC4VF1-NEXT:  [[ENTRY:.*]]:
-; IC4VF1-NEXT:    br label %[[LOOP:.*]]
-; IC4VF1:       [[LOOP]]:
-; IC4VF1-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC4VF1-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC4VF1-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC4VF1-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF1-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC4VF1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC4VF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF1-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC4VF1-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC4VF1:       [[EXIT]]:
-; IC4VF1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC4VF1-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
+; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; CHECK-NEXT:    [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i64 [[SPEC_SELECT_LCSSA]]
 ;
 entry:
   br label %loop

From a87ebd6e292976eb21c9389d15e059a82ba55734 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 28 Jun 2025 22:38:15 +0100
Subject: [PATCH 3/3] !fixup address latest comments, thanks!

---
 llvm/lib/Analysis/IVDescriptors.cpp     |  9 ++-------
 llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 ++++++-------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 523f3694559e6..b275b1064cef2 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -715,11 +715,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
       return std::nullopt;
 
     const SCEV *Step = AR->getStepRecurrence(SE);
-
-    if (isFindFirstIVRecurrenceKind(Kind)) {
-      if (!SE.isKnownNegative(Step))
-        return std::nullopt;
-    } else if (!SE.isKnownPositive(Step))
+    if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) ||
+        (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step)))
       return std::nullopt;
 
     // Keep the minimum value of the recurrence type as the sentinel value.
@@ -744,8 +741,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
                                   : APInt::getMinValue(NumBits);
         ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
       } else {
-        assert(isFindFirstIVRecurrenceKind(Kind) &&
-               "Kind must either be FindLastIV or FindFirstIV");
         assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
         ValidRange =
             ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index ac27ccf409d6b..e44fa6af29ffb 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1227,12 +1227,11 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
                                        RecurKind RdxKind, Value *Start,
                                        Value *Sentinel) {
   bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
-  Value *MaxRdx =
-      Src->getType()->isVectorTy()
-          ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind)
-                 ? Builder.CreateIntMaxReduce(Src, IsSigned)
-                 : Builder.CreateIntMinReduce(Src, IsSigned))
-          : Src;
+  bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind);
+  Value *MaxRdx = Src->getType()->isVectorTy()
+                      ? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned)
+                                  : Builder.CreateIntMinReduce(Src, IsSigned))
+                      : Src;
   // Correct the final reduction result back to the start value if the maximum
   // reduction is sentinel value.
   Value *Cmp =
@@ -1328,7 +1327,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
                                    RecurKind Kind, Value *Mask, Value *EVL) {
   assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
          !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
-         "AnyOf, FindFirstIV and FindLastIV reductions are not supported.");
+         "AnyOf and FindIV reductions are not supported.");
   Intrinsic::ID Id = getReductionIntrinsicID(Kind);
   auto VPID = VPIntrinsic::getForIntrinsic(Id);
   assert(VPReductionIntrinsic::isVPReduction(VPID) &&