release/19.x: [SCEV] Do not allow refinement in the rewriting of BEValue (llvm#117152)

dtcxzyw · dtcxzyw · commit 6f08a0f1eb21 · 2024-12-01T23:19:17.000+08:00
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -2132,6 +2132,12 @@ class ScalarEvolution {
   bool isGuaranteedToTransferExecutionTo(const Instruction *A,
                                          const Instruction *B);
 
+  /// Returns true if \p Op is guaranteed not to cause immediate UB.
+  bool isGuaranteedNotToCauseUB(const SCEV *Op);
+
+  /// Returns true if \p Op is guaranteed to not be poison.
+  static bool isGuaranteedNotToBePoison(const SCEV *Op);
+
   /// Return true if the SCEV corresponding to \p I is never poison.  Proving
   /// this is more complex than proving that just \p I is never poison, since
   /// SCEV commons expressions across control flow, and you can have cases
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4217,7 +4217,7 @@ bool ScalarEvolution::canReuseInstruction(
 
     // Either the value can't be poison, or the S would also be poison if it
     // is.
-    if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
+    if (PoisonVals.contains(V) || ::isGuaranteedNotToBePoison(V))
       continue;
 
     auto *I = dyn_cast<Instruction>(V);
@@ -4320,6 +4320,8 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
   }
 
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+    if (!isGuaranteedNotToCauseUB(Ops[i]))
+      continue;
     // We can replace %x umin_seq %y with %x umin %y if either:
     //  * %y being poison implies %x is also poison.
     //  * %x cannot be the saturating value (e.g. zero for umin).
@@ -5936,18 +5938,22 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
     // We can generalize this saying that i is the shifted value of BEValue
     // by one iteration:
     //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
-    const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
-    const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false);
-    if (Shifted != getCouldNotCompute() &&
-        Start != getCouldNotCompute()) {
-      const SCEV *StartVal = getSCEV(StartValueV);
-      if (Start == StartVal) {
-        // Okay, for the entire analysis of this edge we assumed the PHI
-        // to be symbolic.  We now need to go back and purge all of the
-        // entries for the scalars that use the symbolic expression.
-        forgetMemoizedResults(SymbolicName);
-        insertValueToMap(PN, Shifted);
-        return Shifted;
+
+    // Do not allow refinement in rewriting of BEValue.
+    if (isGuaranteedNotToCauseUB(BEValue)) {
+      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false);
+      if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute() &&
+          ::impliesPoison(BEValue, Start)) {
+        const SCEV *StartVal = getSCEV(StartValueV);
+        if (Start == StartVal) {
+          // Okay, for the entire analysis of this edge we assumed the PHI
+          // to be symbolic.  We now need to go back and purge all of the
+          // entries for the scalars that use the symbolic expression.
+          forgetMemoizedResults(SymbolicName);
+          insertValueToMap(PN, Shifted);
+          return Shifted;
+        }
       }
     }
   }
@@ -7319,6 +7325,21 @@ bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
   return false;
 }
 
+bool ScalarEvolution::isGuaranteedNotToBePoison(const SCEV *Op) {
+  SCEVPoisonCollector PC(/* LookThroughMaybePoisonBlocking */ true);
+  visitAll(Op, PC);
+  return PC.MaybePoison.empty();
+}
+
+bool ScalarEvolution::isGuaranteedNotToCauseUB(const SCEV *Op) {
+  return !SCEVExprContains(Op, [this](const SCEV *S) {
+    auto *UDiv = dyn_cast<SCEVUDivExpr>(S);
+    // The UDiv may be UB if the divisor is poison or zero. Unless the divisor
+    // is a non-zero constant, we have to assume the UDiv may be UB.
+    return UDiv && (!isKnownNonZero(UDiv->getOperand(1)) ||
+                    !isGuaranteedNotToBePoison(UDiv->getOperand(1)));
+  });
+}
 
 bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
   // Only proceed if we can prove that I does not yield poison.
diff --git a/llvm/test/Analysis/ScalarEvolution/pr117133.ll b/llvm/test/Analysis/ScalarEvolution/pr117133.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
+
+define i32 @widget() {
+; CHECK-LABEL: 'widget'
+; CHECK-NEXT:  Classifying expressions for: @widget
+; CHECK-NEXT:    %phi = phi i32 [ 0, %b ], [ %udiv6, %b5 ]
+; CHECK-NEXT:    --> %phi U: [0,1) S: [0,1) Exits: <<Unknown>> LoopDispositions: { %b1: Variant }
+; CHECK-NEXT:    %phi2 = phi i32 [ 1, %b ], [ %add, %b5 ]
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%b1> U: [1,2) S: [1,2) Exits: <<Unknown>> LoopDispositions: { %b1: Computable }
+; CHECK-NEXT:    %udiv = udiv i32 10, %phi2
+; CHECK-NEXT:    --> (10 /u {1,+,1}<nuw><nsw><%b1>) U: [10,11) S: [10,11) Exits: <<Unknown>> LoopDispositions: { %b1: Computable }
+; CHECK-NEXT:    %urem = urem i32 %udiv, 10
+; CHECK-NEXT:    --> ((-10 * ((10 /u {1,+,1}<nuw><nsw><%b1>) /u 10))<nuw><nsw> + (10 /u {1,+,1}<nuw><nsw><%b1>)) U: [0,1) S: [0,1) Exits: <<Unknown>> LoopDispositions: { %b1: Computable }
+; CHECK-NEXT:    %udiv6 = udiv i32 %phi2, 0
+; CHECK-NEXT:    --> ({1,+,1}<nuw><nsw><%b1> /u 0) U: empty-set S: empty-set Exits: <<Unknown>> LoopDispositions: { %b1: Computable }
+; CHECK-NEXT:    %add = add i32 %phi2, 1
+; CHECK-NEXT:    --> {2,+,1}<nuw><nsw><%b1> U: [2,3) S: [2,3) Exits: <<Unknown>> LoopDispositions: { %b1: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @widget
+; CHECK-NEXT:  Loop %b1: <multiple exits> Unpredictable backedge-taken count.
+; CHECK-NEXT:    exit count for b1: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:    exit count for b3: i32 0
+; CHECK-NEXT:  Loop %b1: constant max backedge-taken count is i32 0
+; CHECK-NEXT:  Loop %b1: symbolic max backedge-taken count is i32 0
+; CHECK-NEXT:    symbolic max exit count for b1: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:    symbolic max exit count for b3: i32 0
+;
+b:
+  br label %b1
+
+b1:                                              ; preds = %b5, %b
+  %phi = phi i32 [ 0, %b ], [ %udiv6, %b5 ]
+  %phi2 = phi i32 [ 1, %b ], [ %add, %b5 ]
+  %icmp = icmp eq i32 %phi, 0
+  br i1 %icmp, label %b3, label %b8
+
+b3:                                              ; preds = %b1
+  %udiv = udiv i32 10, %phi2
+  %urem = urem i32 %udiv, 10
+  %icmp4 = icmp eq i32 %urem, 0
+  br i1 %icmp4, label %b7, label %b5
+
+b5:                                              ; preds = %b3
+  %udiv6 = udiv i32 %phi2, 0
+  %add = add i32 %phi2, 1
+  br label %b1
+
+b7:                                              ; preds = %b3
+  ret i32 5
+
+b8:                                              ; preds = %b1
+  ret i32 7
+}
+
+; Don't fold %indvar2 into (zext {0,+,1}) * %a
+define i64 @test_poisonous(i64 %a, i32 %n) {
+; CHECK-LABEL: 'test_poisonous'
+; CHECK-NEXT:  Classifying expressions for: @test_poisonous
+; CHECK-NEXT:    %indvar1 = phi i32 [ 0, %entry ], [ %indvar1.next, %loop.body ]
+; CHECK-NEXT:    --> {0,+,1}<%loop.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.body: Computable }
+; CHECK-NEXT:    %indvar2 = phi i64 [ 0, %entry ], [ %mul, %loop.body ]
+; CHECK-NEXT:    --> %indvar2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.body: Variant }
+; CHECK-NEXT:    %indvar1.next = add i32 %indvar1, 1
+; CHECK-NEXT:    --> {1,+,1}<%loop.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.body: Computable }
+; CHECK-NEXT:    %ext = zext i32 %indvar1.next to i64
+; CHECK-NEXT:    --> (zext i32 {1,+,1}<%loop.body> to i64) U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop.body: Computable }
+; CHECK-NEXT:    %mul = mul i64 %ext, %a
+; CHECK-NEXT:    --> ((zext i32 {1,+,1}<%loop.body> to i64) * %a) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @test_poisonous
+; CHECK-NEXT:  Loop %loop.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop.body: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %loop.body: Predicated backedge-taken count is (-1 + (1 smax (1 + (sext i32 %n to i64))<nsw>))<nsw>
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      {1,+,1}<%loop.body> Added Flags: <nssw>
+; CHECK-NEXT:  Loop %loop.body: Predicated symbolic max backedge-taken count is (-1 + (1 smax (1 + (sext i32 %n to i64))<nsw>))<nsw>
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      {1,+,1}<%loop.body> Added Flags: <nssw>
+;
+entry:
+  br label %loop.body
+
+loop.body:
+  %indvar1 = phi i32 [ 0, %entry ], [ %indvar1.next, %loop.body ]
+  %indvar2 = phi i64 [ 0, %entry ], [ %mul, %loop.body ]
+  %indvar1.next = add i32 %indvar1, 1
+  %ext = zext i32 %indvar1.next to i64
+  %mul = mul i64 %ext, %a
+  %exitcond = icmp sgt i32 %indvar1.next, %n
+  br i1 %exitcond, label %loop.exit, label %loop.body
+
+loop.exit:
+  ret i64 %mul
+}
diff --git a/llvm/test/Transforms/IndVarSimplify/pr117133.ll b/llvm/test/Transforms/IndVarSimplify/pr117133.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+define i32 @widget() {
+; CHECK-LABEL: define i32 @widget() {
+; CHECK-NEXT:  [[B:.*:]]
+; CHECK-NEXT:    br label %[[B1:.*]]
+; CHECK:       [[B1]]:
+; CHECK-NEXT:    br i1 true, label %[[B3:.*]], label %[[B8:.*]]
+; CHECK:       [[B3]]:
+; CHECK-NEXT:    br i1 true, label %[[B7:.*]], label %[[B5:.*]]
+; CHECK:       [[B5]]:
+; CHECK-NEXT:    br label %[[B1]]
+; CHECK:       [[B7]]:
+; CHECK-NEXT:    ret i32 5
+; CHECK:       [[B8]]:
+; CHECK-NEXT:    ret i32 7
+;
+b:
+  br label %b1
+
+b1:
+  %phi = phi i32 [ 0, %b ], [ %udiv6, %b5 ]
+  %phi2 = phi i32 [ 1, %b ], [ %add, %b5 ]
+  %icmp = icmp eq i32 %phi, 0
+  br i1 %icmp, label %b3, label %b8
+
+b3:
+  %udiv = udiv i32 10, %phi2
+  %urem = urem i32 %udiv, 10
+  %icmp4 = icmp eq i32 %urem, 0
+  br i1 %icmp4, label %b7, label %b5
+
+b5:
+  %udiv6 = udiv i32 %phi2, 0
+  %add = add i32 %phi2, 1
+  br label %b1
+
+b7:
+  ret i32 5
+
+b8:
+  ret i32 7
+}
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -465,9 +465,10 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N
 ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(
 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
 ; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 42, [[N]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i64 [[TMP0]]
+; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
+; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 [[SMAX]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -653,12 +654,13 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N
 ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(
 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
 ; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 42, [[N]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[N]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 42, [[TMP1]]
 ; CHECK-NEXT:    [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0)
-; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[SMAX1]])
+; CHECK-NEXT:    [[TMP10:%.*]] = freeze i64 [[SMAX1]]
+; CHECK-NEXT:    [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
+; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX2]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nuw i64 [[UMIN]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]