llvm · sebpop · Aug 24, 2025 · Aug 24, 2025 · efriedma-quic · Aug 25, 2025
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2196,6 +2196,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
       return Dependence::Unknown;
     }
 
+    // For WAW (Write-After-Write) dependencies, negative distances in one
+    // direction can still represent unsafe dependencies. Since we only check
+    // dependencies in program order (AIdx < BIdx), a negative distance means
+    // the later write accesses memory locations before the earlier write.
+    // However, in a vectorized loop, both writes could execute simultaneously,
+    // potentially causing incorrect behavior. Therefore, WAW with negative
+    // distances should be treated as unsafe.
+    bool IsWriteAfterWrite = (AIsWrite && BIsWrite);
+    if (IsWriteAfterWrite) {
+      LLVM_DEBUG(
+          dbgs() << "LAA: WAW dependence with negative distance is unsafe\n");
+      return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
+                                            : Dependence::Unknown;
+    }
+
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     // Check if the first access writes to a location that is read in a later
     // iteration, where the distance between them is not a multiple of a vector

@@ -2951,25 +2951,52 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
         SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
+
+        // Track flags: start with the flags from the first AddRec.
+        bool AllHaveNSW = AddRec->hasNoSignedWrap();
+        bool AllHaveNUW = AddRec->hasNoUnsignedWrap();
+
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
              ++OtherIdx) {
           const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
           if (OtherAddRec->getLoop() == AddRecLoop) {
+            // Update flags based on this AddRec
+            if (!OtherAddRec->hasNoSignedWrap())
+              AllHaveNSW = false;
+            if (!OtherAddRec->hasNoUnsignedWrap())
+              AllHaveNUW = false;
             for (unsigned i = 0, e = OtherAddRec->getNumOperands();
                  i != e; ++i) {
               if (i >= AddRecOps.size()) {
                 append_range(AddRecOps, OtherAddRec->operands().drop_front(i));
                 break;
               }
+              // Preserve no-wrap flags when combining AddRec operands.
+              SCEV::NoWrapFlags CombineFlags = SCEV::FlagAnyWrap;
+              if (auto *AR1 = dyn_cast<SCEVAddRecExpr>(AddRecOps[i]))
+                if (auto *AR2 =
+                        dyn_cast<SCEVAddRecExpr>(OtherAddRec->getOperand(i))) {
+                  if (AR1->hasNoSignedWrap() && AR2->hasNoSignedWrap())
+                    CombineFlags = setFlags(CombineFlags, SCEV::FlagNSW);
+                  if (AR1->hasNoUnsignedWrap() && AR2->hasNoUnsignedWrap())
+                    CombineFlags = setFlags(CombineFlags, SCEV::FlagNUW);
+                }
               SmallVector<const SCEV *, 2> TwoOps = {
                   AddRecOps[i], OtherAddRec->getOperand(i)};
-              AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+              AddRecOps[i] = getAddExpr(TwoOps, CombineFlags, Depth + 1);
             }
             Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
           }
         }
         // Step size has changed, so we cannot guarantee no self-wraparound.
-        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+        // However, preserve NSW/NUW flags if all combined AddRecs had them.
+        SCEV::NoWrapFlags FinalFlags = SCEV::FlagAnyWrap;
+        if (AllHaveNSW)
+          FinalFlags = setFlags(FinalFlags, SCEV::FlagNSW);
+        if (AllHaveNUW)
+          FinalFlags = setFlags(FinalFlags, SCEV::FlagNUW);
+
+        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, FinalFlags);
         return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
       }
     }

diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -339,7 +339,7 @@ define void @a_i_j2k_i(ptr %a) {
 ; CHECK-LABEL: 'a_i_j2k_i'
 ; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
 ; CHECK-NEXT:  In Loop with Header: for.k
-; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1028}<%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1028}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
 ; CHECK-NEXT:  failed to delinearize
 ;
 entry:
@@ -391,7 +391,7 @@ define void @a_i_i_jk(ptr %a) {
 ; CHECK-LABEL: 'a_i_i_jk'
 ; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
 ; CHECK-NEXT:  In Loop with Header: for.k
-; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1152}<%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1152}<nuw><nsw><%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
 ; CHECK-NEXT:  Base offset: %a
 ; CHECK-NEXT:  ArrayDecl[UnknownSize][288] with elements of 4 bytes.
 ; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{\{\{}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]

diff --git a/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll b/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 | FileCheck %s
+
+; Test that SCEV NSW flag preservation enables dependence analysis to work
+; correctly. Previously, SCEV would lose NSW flags when combining AddRec
+; expressions from GEP operations, causing dependence analysis to incorrectly
+; classify expressions as "wrapping" and fail analysis.
+
+define void @test_da_with_scev_flags(ptr %A) {
+; This test verifies that dependence analysis now correctly identifies
+; self-dependences when SCEV preserves NSW flags from GEP index computations.
+; CHECK-LABEL: 'test_da_with_scev_flags'
+; CHECK-NEXT:  Src: %val = load i32, ptr %gep, align 4 --> Dst: %val = load i32, ptr %gep, align 4
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: %val = load i32, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
+; CHECK-NEXT:    da analyze - consistent anti [0|<]!
+; CHECK-NEXT:  Src: store i32 %val, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
+; CHECK-NEXT:    da analyze - none!
+;
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+  ; Create NSW-flagged index computation
+  %mul = mul nsw i64 %i, 3
+  %sub = add nsw i64 %mul, -6
+
+  ; GEP that should result in SCEV: {(-2424 + %A),+,1212}<nw>
+  ; The <nw> flag should prevent false "wrapping" detection in DA
+  %gep = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+
+  ; Self-dependence: should be detected as "none" (no dependence)
+  %val = load i32, ptr %gep
+  store i32 %val, ptr %gep
+
+  %i.next = add nsw i64 %i, 1
+  %cond = icmp ult i64 %i.next, 50
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -disable-output -passes='print<access-info>' < %s 2>&1 | FileCheck %s
 
 
@@ -449,9 +449,10 @@ exit:
 define void @different_type_sizes_forward(ptr %dst) {
 ; CHECK-LABEL: 'different_type_sizes_forward'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Forward:
+; CHECK-NEXT:        Unknown:
 ; CHECK-NEXT:            store i32 0, ptr %gep.10.iv, align 4 ->
 ; CHECK-NEXT:            store i16 1, ptr %gep.iv, align 2
 ; CHECK-EMPTY:

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -70,7 +70,7 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
 ; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
 ; CHECK-NEXT:            %l = load i24, ptr %gep.1, align 1
 ; CHECK-EMPTY:
-; CHECK-NEXT:        Forward:
+; CHECK-NEXT:        Unknown:
 ; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
 ; CHECK-NEXT:            store i24 %l, ptr %ptr.iv, align 1
 ; CHECK-EMPTY:

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
@@ -35,7 +35,7 @@ define void @f(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
 ; CHECK-NEXT:            store i32 %b_p2, ptr %Aidx_next, align 4 ->
 ; CHECK-NEXT:            %a = load i32, ptr %Aidx, align 4
 ; CHECK-EMPTY:
-; CHECK-NEXT:        Forward:
+; CHECK-NEXT:        Unknown:
 ; CHECK-NEXT:            store i32 %b_p2, ptr %Aidx_next, align 4 ->
 ; CHECK-NEXT:            store i32 %b_p1, ptr %Aidx, align 4
 ; CHECK-EMPTY:

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
+
+; Test that LAA correctly identifies Write-After-Write dependencies with negative
+; distances as unsafe. Previously, LAA would incorrectly classify negative distance
+; WAW dependencies as safe Forward dependencies, allowing inappropriate vectorization.
+;
+; This corresponds to the loop:
+;   for(int i = 0; i < n; ++i) {
+;       A[(i+1)*4] = 10;   // First store: A[4, 8, 12, 16, ...]
+;       A[i] = 100;        // Second store: A[0, 1, 2, 3, 4, ...]
+;   }
+;
+; The dependence distance from first store to second store is negative:
+; A[i] - A[(i+1)*4] = {0,+,4} - {16,+,16} = {-16,+,-12}
+; However, the dependence from second store to first store in the next iteration
+; would be positive: A[(i+1)*4] - A[i] = {16,+,16} - {0,+,4} = {16,+,12}
+;
+; This bidirectional dependence pattern (negative in one direction, positive in the
+; other) creates a Write-After-Write dependency that is unsafe for vectorization.
+; DependenceAnalysis would report this as "output [<>]!" indicating the complex
+; dependence direction. LAA must detect this as unsafe even when only checking
+; the negative distance direction.
+
+define void @test_waw_negative_dependence(i64 %n, ptr nocapture %A) {
+; CHECK-LABEL: 'test_waw_negative_dependence'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store i32 10, ptr %arrayidx1, align 4 ->
+; CHECK-NEXT:            store i32 100, ptr %arrayidx2, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %cmp8 = icmp sgt i64 %n, 0
+  br i1 %cmp8, label %loop, label %exit
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  ; First store: A[(i+1)*4] = 10
+  %0 = shl nsw i64 %indvars.iv.next, 2  ; (i+1)*4
+  %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %0
+  store i32 10, ptr %arrayidx1, align 4
+
+  ; Second store: A[i] = 100
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 100, ptr %arrayidx2, align 4
+
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Test a similar case but with different stride to ensure the fix is general.
+define void @test_waw_negative_dependence_different_stride(i64 %n, ptr nocapture %A) {
+; CHECK-LABEL: 'test_waw_negative_dependence_different_stride'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store i32 10, ptr %arrayidx1, align 4 ->
+; CHECK-NEXT:            store i32 100, ptr %arrayidx2, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %cmp8 = icmp sgt i64 %n, 0
+  br i1 %cmp8, label %loop, label %exit
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  ; First store: A[(i+2)*2] = 10
+  %0 = add nsw i64 %indvars.iv, 2       ; i+2
+  %1 = shl nsw i64 %0, 1                ; (i+2)*2
+  %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %1
+  store i32 10, ptr %arrayidx1, align 4
+
+  ; Second store: A[i] = 100
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 100, ptr %arrayidx2, align 4
+
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}