fix gep-based inferrence

kasuga-fj · kasuga-fj · commit b5ca793172e6 · 2025-09-01T12:05:56.000Z
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3414,9 +3414,24 @@ SCEVSignedMonotonicityChecker::SCEVSignedMonotonicityChecker(
     ScalarEvolution *SE, const Loop *OutermostLoop, const Value *Ptr)
     : SE(SE), OutermostLoop(OutermostLoop) {
   if (Ptr) {
-    // TODO: This seems incorrect. Maybe we should check the reachability from
-    // the GEP to the target instruction. E.g., in the following case, maybe
-    // no-wrap is not guaranteed:
+    // Perform reasoning similar to LoopAccessAnalysis. If an AddRec would wrap
+    // and the GEP would have nusw, the wrapped memory location would become
+    // like as follows (in the mathmatical sense, assuming the step recurrence
+    // is positive):
+    //
+    //   (previously accessed location) + (step recurrence) - 2^N
+    //
+    // where N is the size of the pointer index type. Since the value of step
+    // recurrence is less than 2^(N-1), the distance between the previously
+    // accessed location and the wrapped location will be greater than 2^(N-1),
+    // which is larger than half the pointer index type space. The size of
+    // allocated object must not exceed the largest signed integer that fits
+    // into the index type, so the GEP value would be poison and any memory
+    // access using it would be immediate UB when executed.
+    //
+    // TODO: We don't check if the result of the GEP is always used. Maybe we
+    // should check the reachability from the GEP to the target instruction.
+    // E.g., in the following case, no-wrap would not trigger immediate UB:
     //
     //  entry:
     //    ...
@@ -3441,8 +3456,6 @@ MonotonicityType SCEVSignedMonotonicityChecker::checkMonotonicity(
     const Value *Ptr) {
   SCEVSignedMonotonicityChecker Checker(SE, OutermostLoop, Ptr);
   MonotonicityType MT = Checker.visit(Expr);
-  if (MT == MonotonicityType::Unknown && Checker.NoWrapFromGEP)
-    MT = MonotonicityType::NoSignedWrap;
 
 #ifndef NDEBUG
   switch (MT) {
@@ -3532,8 +3545,6 @@ SCEVSignedMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
   const SCEV *Start = Expr->getStart();
   const SCEV *Step = Expr->getStepRecurrence(*SE);
 
-  bool IsNSW = Expr->hasNoSignedWrap();
-
   MonotonicityType StartRes = visit(Start);
   if (StartRes == MonotonicityType::Unknown)
     return unknownMonotonicity(Expr);
@@ -3543,7 +3554,7 @@ SCEVSignedMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
     return unknownMonotonicity(Expr);
 
   // TODO: Enhance the inference here.
-  if (!IsNSW) {
+  if (!Expr->hasNoSignedWrap() && !NoWrapFromGEP) {
     if (!SE->isKnownNegative(Step))
       // If the coefficient can be positive value, ensure that the AddRec is
       // monotonically increasing.
@@ -3787,9 +3798,10 @@ bool DependenceInfo::tryDelinearizeParametricSize(
           LI->getLoopFor(Src->getParent())->getOutermostLoop();
 
       // TODO: In general, reasoning about monotonicity of a subscript from the
-      // base pointer would not be allowed. Probably we need to check the loops
-      // associated with this subscript are disjoint from those associated with
-      // the other subscripts. The validation would be something like:
+      // base pointer would lead incorrect result. Probably we need to check
+      // the loops associated with this subscript are disjoint from those
+      // associated with the other subscripts. The validation would be
+      // something like:
       //
       //   LoopsI = collectCommonLoops(SrcSubscripts[I])
       //   LoopsOthers = collectCommonLoops(SrcSCEV - SrcSubscripts[I])
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -465,13 +465,13 @@ for.end12:                                        ; preds = %for.end12.loopexit,
 define void @gcd7(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
 ; CHECK-LABEL: 'gcd7'
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %7, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - output [* *]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
-; CHECK-NEXT:    da analyze - input [* *]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
@@ -640,13 +640,13 @@ for.end15:                                        ; preds = %for.end15.loopexit,
 define void @gcd9(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
 ; CHECK-LABEL: 'gcd9'
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %7, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - output [* *]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
-; CHECK-NEXT:    da analyze - input [* *]!
+; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/monotonic.ll b/llvm/test/Analysis/DependenceAnalysis/monotonic.ll
@@ -332,3 +332,48 @@ loop:
 exit:
   ret void
 }
+
+; The value of step reccurence is not invariant with respect to the outer most
+; loop (the i-loop).
+;
+; offset_i = 0;
+; for (int i = 0; i < 100; i++) {
+;   for (int j = 0; j < 100; j++)
+;     a[offset_i + j] = 0;
+;   offset_i += (i % 2 == 0) ? 0 : 3;
+; }
+;
+define void @step_is_variant(ptr %a) {
+; CHECK-LABEL: 'step_is_variant'
+; CHECK: Failed to prove monotonicity for: %offset.i
+; CHECK: Failed to prove monotonicity for: {%offset.i,+,1}<nuw><nsw><%loop.j>
+; CHECK: Monotonicity: Unknown expr: {%offset.i,+,1}<nuw><nsw><%loop.j>
+;
+entry:
+  br label %loop.i.header
+
+loop.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
+  %offset.i = phi i64 [ 0, %entry ], [ %offset.i.next, %loop.i.latch ]
+  %step.i.0 = phi i64 [ 0, %entry ], [ %step.i.1, %loop.i.latch ]
+  %step.i.1 = phi i64 [ 3, %entry ], [ %step.i.0, %loop.i.latch ]
+  br label %loop.j
+
+loop.j:
+  %j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
+  %offset = add nsw i64 %offset.i, %j
+  %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+  store i8 0, ptr %idx
+  %j.inc = add nsw i64 %j, 1
+  %exitcond.j = icmp eq i64 %j.inc, 100
+  br i1 %exitcond.j, label %loop.i.latch, label %loop.j
+
+loop.i.latch:
+  %i.inc = add nsw i64 %i, 1
+  %offset.i.next = add nsw i64 %offset.i, %step.i.0
+  %exitcond.i = icmp eq i64 %i.inc, 100
+  br i1 %exitcond.i, label %exit, label %loop.i.header
+
+exit:
+  ret void
+}