llvm · SergeyShch01 · Jun 19, 2025 · Jun 19, 2025 · Jun 23, 2025 · Jun 19, 2025
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -97,6 +97,7 @@ namespace {
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
     void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
+    bool forceEqualityForICmp(ICmpInst *ICmp, Instruction *IVOperand);
     void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
                              bool IsSigned);
     void replaceRemWithNumerator(BinaryOperator *Rem);
@@ -244,6 +245,128 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
   return true;
 }
 
+/// Try to change predicate of ICmp to EQ/NE to facilitate better work of OSR.
+/// This can be done only if all possible IV values but one lead to the same
+/// produced comparison result, while the 'chosen one' value gives the opposite
+/// result.
+bool SimplifyIndvar::forceEqualityForICmp(ICmpInst *ICmp,
+                                          Instruction *IVOperand) {
+  if (ICmp->isEquality()) {
+    // nothing to do
+    return false;
+  }
+
+  unsigned BoundOperandIdx = IVOperand == ICmp->getOperand(0) ? 1 : 0;
+  const SCEV *BoundSCEV = SE->getSCEV(ICmp->getOperand(BoundOperandIdx));
+  const SCEVConstant *BoundC = dyn_cast<SCEVConstant>(BoundSCEV);
+  CmpInst::Predicate OrigPredicate = ICmp->getPredicate();
+  CmpInst::Predicate NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
+  Type *Ty = IVOperand->getType();
+  APInt NewBoundA;
+
+  if (BoundC) {
+    // Try to find the 'chosen one' value basing on predicate type and bound
+    const APInt &BoundA = BoundC->getAPInt();
+    ConstantRange ExactCR =
+        ConstantRange::makeExactICmpRegion(OrigPredicate, BoundA);
+    if (!ExactCR.getEquivalentICmp(NewPredicate, NewBoundA)) {
+      NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
+    }
+  }
+
+  if (!ICmpInst::isEquality(NewPredicate)) {
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVOperand));
+    if (!AR) {
+      return false;
+    }
+    const SCEVConstant *IVStart = dyn_cast<SCEVConstant>(AR->getStart());
+    const SCEVConstant *IVStep =
+        dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+    if (!IVStart || !IVStep || !IVStep->getValue()->getValue()) {
+      return false;
+    }
+
+    if (BoundC) {
+      // Check to see the 'chosen one' value is the IV start value
+      bool HasNoWrap = ICmpInst::isSigned(OrigPredicate)
+                           ? AR->hasNoSignedWrap()
+                           : AR->hasNoUnsignedWrap();
+      if (HasNoWrap) {
+        const DataLayout &DL = ICmp->getParent()->getDataLayout();
+        Constant *SecondIterIV =
+            ConstantInt::get(Ty, IVStart->getAPInt() + IVStep->getAPInt());
+        Constant *FirstIterResult = ConstantFoldCompareInstOperands(
+            OrigPredicate, IVStart->getValue(), BoundC->getValue(), DL);
+        Constant *SecondIterResult = ConstantFoldCompareInstOperands(
+            OrigPredicate, SecondIterIV, BoundC->getValue(), DL);
+        if (FirstIterResult != SecondIterResult) {
+          NewBoundA = IVStart->getAPInt();
+          NewPredicate = FirstIterResult->isAllOnesValue() ? CmpInst::ICMP_EQ
+                                                           : CmpInst::ICMP_NE;
+        }
+      }
+    }
+
+    if (!ICmpInst::isEquality(NewPredicate)) {
+      // Check to see the 'chosen one' value is the very last IV value.
+      // To put it differently, check to see if ICmp directly or indirectly
+      // defines maximum loop trip count (or simply has aligned behavior by
+      // accident). This is different from loop exit condition rewriting as here
+      // not only ICmp instructions directly writing to exiting branch are
+      // considered.
+
+      // check to see if max trip count and IV parameters are constant
+      const SCEVConstant *MaxBackCount =
+          dyn_cast<SCEVConstant>(SE->getConstantMaxBackedgeTakenCount(L));
+      if (!MaxBackCount) {
+        return false;
+      }
+
+      // compute the number of consecutive iterations in which produced
+      // predicate value will be the same
+      bool ExitIfTrue = false;
+      auto EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
+      const SCEVConstant *SameIterCount =
+          dyn_cast<SCEVConstant>(EL.ExactNotTaken);
+      if (!SameIterCount || SameIterCount->getValue()->isZero()) {
+        ExitIfTrue = !ExitIfTrue;
+        EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
+        SameIterCount = dyn_cast<SCEVConstant>(EL.ExactNotTaken);
+      }
+
+      if (SameIterCount != MaxBackCount) {
+        // ICmp isn't aligned with maximum trip count
+        return false;
+      }
+
+      unsigned IVBitWidth = IVStep->getAPInt().getBitWidth();
+      unsigned CountBitWidth = SameIterCount->getAPInt().getBitWidth();
+      APInt SameIterCountA = SameIterCount->getAPInt();
+      if (IVBitWidth < CountBitWidth) {
+        SameIterCountA = SameIterCountA.trunc(IVBitWidth);
+      } else if (IVBitWidth > CountBitWidth) {
+        SameIterCountA = SameIterCountA.zext(IVBitWidth);
+      }
+      NewBoundA = IVStart->getAPInt() + (IVStep->getAPInt() * SameIterCountA);
+      NewPredicate = ExitIfTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+    }
+  }
+
+  if (!TTI->isLegalICmpImmediate(NewBoundA.getSExtValue())) {
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "INDVARS: Force EQ/NE predicate for max trip count: "
+                    << *ICmp << '\n');
+
+  assert(Ty->getPrimitiveSizeInBits() == NewBoundA.getBitWidth() &&
+         "bit widths should be aligned");
+  ICmp->setOperand(BoundOperandIdx, ConstantInt::get(Ty, NewBoundA));
+  ICmp->setPredicate(NewPredicate);
+
+  return true;
+}
+
 /// SimplifyIVUsers helper for eliminating useless
 /// comparisons against an induction variable.
 void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
@@ -267,33 +390,43 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
   // If the condition is always true or always false in the given context,
   // replace it with a constant value.
   SmallVector<Instruction *, 4> Users;
+  bool IsDead = false;
   for (auto *U : ICmp->users())
     Users.push_back(cast<Instruction>(U));
   const Instruction *CtxI = findCommonDominator(Users, *DT);
   if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
     SE->forgetValue(ICmp);
     ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
     DeadInsts.emplace_back(ICmp);
+    IsDead = true;
     LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
   } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
-    // fallthrough to end of function
-  } else if (ICmpInst::isSigned(OriginalPred) &&
-             SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+    IsDead = true;
+  } else {
     // If we were unable to make anything above, all we can is to canonicalize
     // the comparison hoping that it will open the doors for other
-    // optimizations. If we find out that we compare two non-negative values,
-    // we turn the instruction's predicate to its unsigned version. Note that
-    // we cannot rely on Pred here unless we check if we have swapped it.
-    assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
-    LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
-                      << '\n');
-    ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
-    ICmp->setSameSign();
-  } else
-    return;
+    // optimizations.
+    if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) &&
+        SE->isKnownNonNegative(X)) {
+      // If we find out that we compare two non-negative values,
+      // we turn the instruction's predicate to its unsigned version. Note that
+      // we cannot rely on Pred here unless we check if we have swapped it.
+      assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+      LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
+                        << '\n');
+      ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
+      ICmp->setSameSign();
+      Changed = true;
+    }
+    if (forceEqualityForICmp(ICmp, IVOperand)) {
+      Changed = true;
+    }
+  }
 
-  ++NumElimCmp;
-  Changed = true;
+  if (IsDead) {
+    NumElimCmp++;
+    Changed = true;
+  }
 }
 
 bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {

diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll
@@ -13,7 +13,7 @@ define i32 @guards_applied_to_add_rec(ptr %dst) {
 ; CHECK-NEXT:    [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
 ; CHECK-NEXT:    [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ]
 ; CHECK-NEXT:    [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1
-; CHECK-NEXT:    [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2
+; CHECK-NEXT:    [[PRE:%.*]] = icmp samesign eq i32 [[OUTER_IV_1]], 1
 ; CHECK-NEXT:    br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]]
 ; CHECK:       [[INNER_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[SHR28]] to i64

diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
@@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32
-; CHECK-NEXT:    br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
+; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader.us.preheader:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1)
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
@@ -110,7 +110,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
 ; CHECK-NEXT:    br label [[FOR_INC13_US]]
 ; CHECK:       for.inc13.us:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
-; CHECK-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4
+; CHECK-NEXT:    [[EXITCOND6:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT4]], 4
 ; CHECK-NEXT:    br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
 ; CHECK:       for.body4.us:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ]
@@ -237,8 +237,7 @@ define i32 @test4(i32 %a) {
 ; CHECK-NEXT:    [[CONV3:%.*]] = trunc i32 [[OR]] to i8
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 242
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret i32 0
@@ -517,8 +516,8 @@ define i32 @test10(i32 %v) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]]
 ; CHECK-NEXT:    call void @consume.i1(i1 [[TMP1]])
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT]], 11
 ; CHECK-NEXT:    call void @consume.i64(i64 [[TMP0]])
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]]
 ; CHECK:       leave:
 ; CHECK-NEXT:    ret i32 22

diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes=indvars -indvars-predicate-loops=0  < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,15 +8,43 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind ssp uwtable
 define void @fn1() {
-; CHECK-LABEL: @fn1(
+; CHECK-LABEL: define void @fn1() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB4_PREHEADER:.*]]
+; CHECK:       [[BB4_PREHEADER]]:
+; CHECK-NEXT:    [[B_03:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP17:%.*]], %[[BB16:.*]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i8 [[B_03]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE:.*]], label %[[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE:.*]]
+; CHECK:       [[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[BB4_PREHEADER_SPLIT:.*]]
+; CHECK:       [[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE]]:
+; CHECK-NEXT:    store i32 0, ptr @a, align 4
+; CHECK-NEXT:    br label %[[BB18_LOOPEXIT_SPLIT:.*]]
+; CHECK:       [[BB4_PREHEADER_SPLIT]]:
+; CHECK-NEXT:    br label %[[BB7:.*]]
+; CHECK:       [[BB4:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[BB7]], label %[[BB16]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    br i1 false, label %[[BB18_LOOPEXIT:.*]], label %[[BB4]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17]] = add nuw nsw i8 [[B_03]], -1
+; CHECK-NEXT:    br i1 false, label %[[BB18_LOOPEXIT1:.*]], label %[[BB4_PREHEADER]]
+; CHECK:       [[BB18_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[BB18_LOOPEXIT_SPLIT]]
+; CHECK:       [[BB18_LOOPEXIT_SPLIT]]:
+; CHECK-NEXT:    br label %[[BB18:.*]]
+; CHECK:       [[BB18_LOOPEXIT1]]:
+; CHECK-NEXT:    [[TMP14_LCSSA5_LCSSA:%.*]] = phi i32 [ 1, %[[BB16]] ]
+; CHECK-NEXT:    store i32 [[TMP14_LCSSA5_LCSSA]], ptr @a, align 4
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    ret void
+;
 bb:
   br label %bb4.preheader
 
 bb4.preheader:                                    ; preds = %bb, %bb16
-; CHECK-LABEL:  bb4.preheader:
   %b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
-; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
-; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
 
   %tmp9 = icmp ugt i8 %b.03, 1
   br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge

diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext
@@ -1,72 +1,44 @@
 # CSIR level Instrumentation Flag
 :csir
-cond.llvm.11253644763537639171
-# Func Hash:
-1152921517491748863
-# Num Counters:
-1
-# Counter Values:
-200000
-
-foo
-# Func Hash:
-1720106746050921044
-# Num Counters:
-2
-# Counter Values:
-100000
-1
-
 bar
 # Func Hash:
 1299757151682747028
 # Num Counters:
 2
 # Counter Values:
-0
-0
-
-bar
-# Func Hash:
-29667547796
-# Num Counters:
-2
-# Counter Values:
 100000
 100000
 
 main
 # Func Hash:
-1152921517491748863
+1895182923573755903
 # Num Counters:
 1
 # Counter Values:
 1
 
-main
+cspgo_bar.c;clobber
 # Func Hash:
 1895182923573755903
 # Num Counters:
 1
 # Counter Values:
-1
+200000
 
-cspgo.c:foo
+cspgo_bar.c;cond
 # Func Hash:
-1720106746050921044
+1895182923573755903
 # Num Counters:
-4
-# Counter Values:
-100000
-100000
-0
 1
+# Counter Values:
+200000
 
-cspgo_bar.c:cond
+cspgo.c;foo
 # Func Hash:
-12884901887
+2216626667076672412
 # Num Counters:
-1
+2
 # Counter Values:
-200000
+100000
+1
 
diff --git a/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll b/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll
@@ -15,8 +15,8 @@
 
 ; CSUSE: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
 ; CSUSE: {{![0-9]+}} = !{i32 1, !"CSProfileSummary", {{![0-9]+}}}
-; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 100000, i32 0}
-; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 0, i32 100000}
+; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 100000, i32 100000}
+; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 1, i32 100000}
 
 source_filename = "cspgo.c"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"