Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2196,6 +2196,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}

// For WAW (Write-After-Write) dependencies, negative distances in one
// direction can still represent unsafe dependencies. Since we only check
// dependencies in program order (AIdx < BIdx), a negative distance means
// the later write accesses memory locations before the earlier write.
// However, in a vectorized loop, both writes could execute simultaneously,
// potentially causing incorrect behavior. Therefore, WAW with negative
// distances should be treated as unsafe.
bool IsWriteAfterWrite = (AIsWrite && BIsWrite);
if (IsWriteAfterWrite) {
LLVM_DEBUG(
dbgs() << "LAA: WAW dependence with negative distance is unsafe\n");
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
: Dependence::Unknown;
}

bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
// Check if the first access writes to a location that is read in a later
// iteration, where the distance between them is not a multiple of a vector
Expand Down
31 changes: 29 additions & 2 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2951,25 +2951,52 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());

// Track flags: start with the flags from the first AddRec.
bool AllHaveNSW = AddRec->hasNoSignedWrap();
bool AllHaveNUW = AddRec->hasNoUnsignedWrap();

for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (OtherAddRec->getLoop() == AddRecLoop) {
// Update flags based on this AddRec
if (!OtherAddRec->hasNoSignedWrap())
AllHaveNSW = false;
if (!OtherAddRec->hasNoUnsignedWrap())
AllHaveNUW = false;
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
if (i >= AddRecOps.size()) {
append_range(AddRecOps, OtherAddRec->operands().drop_front(i));
break;
}
// Preserve no-wrap flags when combining AddRec operands.
SCEV::NoWrapFlags CombineFlags = SCEV::FlagAnyWrap;
if (auto *AR1 = dyn_cast<SCEVAddRecExpr>(AddRecOps[i]))
if (auto *AR2 =
dyn_cast<SCEVAddRecExpr>(OtherAddRec->getOperand(i))) {
if (AR1->hasNoSignedWrap() && AR2->hasNoSignedWrap())
CombineFlags = setFlags(CombineFlags, SCEV::FlagNSW);
if (AR1->hasNoUnsignedWrap() && AR2->hasNoUnsignedWrap())
CombineFlags = setFlags(CombineFlags, SCEV::FlagNUW);
}
SmallVector<const SCEV *, 2> TwoOps = {
AddRecOps[i], OtherAddRec->getOperand(i)};
AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
AddRecOps[i] = getAddExpr(TwoOps, CombineFlags, Depth + 1);
}
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
}
// Step size has changed, so we cannot guarantee no self-wraparound.
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
// However, preserve NSW/NUW flags if all combined AddRecs had them.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand the logic here. Adding together two nsw addrecs doesn't necessarily result in an nsw addrec.

SCEV::NoWrapFlags FinalFlags = SCEV::FlagAnyWrap;
if (AllHaveNSW)
FinalFlags = setFlags(FinalFlags, SCEV::FlagNSW);
if (AllHaveNUW)
FinalFlags = setFlags(FinalFlags, SCEV::FlagNUW);

Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, FinalFlags);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/Delinearization/fixed_size_array.ll
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ define void @a_i_j2k_i(ptr %a) {
; CHECK-LABEL: 'a_i_j2k_i'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1028}<%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1028}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
; CHECK-NEXT: failed to delinearize
;
entry:
Expand Down Expand Up @@ -391,7 +391,7 @@ define void @a_i_i_jk(ptr %a) {
; CHECK-LABEL: 'a_i_i_jk'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1152}<%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1152}<nuw><nsw><%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{\{\{}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 | FileCheck %s

; Test that SCEV NSW flag preservation enables dependence analysis to work
; correctly. Previously, SCEV would lose NSW flags when combining AddRec
; expressions from GEP operations, causing dependence analysis to incorrectly
; classify expressions as "wrapping" and fail analysis.

define void @test_da_with_scev_flags(ptr %A) {
; This test verifies that dependence analysis now correctly identifies
; self-dependences when SCEV preserves NSW flags from GEP index computations.
; CHECK-LABEL: 'test_da_with_scev_flags'
; CHECK-NEXT: Src: %val = load i32, ptr %gep, align 4 --> Dst: %val = load i32, ptr %gep, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %val = load i32, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
; CHECK-NEXT: da analyze - consistent anti [0|<]!
; CHECK-NEXT: Src: store i32 %val, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
; CHECK-NEXT: da analyze - none!
;

entry:
br label %loop

loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]

; Create NSW-flagged index computation
%mul = mul nsw i64 %i, 3
%sub = add nsw i64 %mul, -6

; GEP that should result in SCEV: {(-2424 + %A),+,1212}<nw>
; The <nw> flag should prevent false "wrapping" detection in DA
%gep = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub

; Self-dependence: should be detected as "none" (no dependence)
%val = load i32, ptr %gep
store i32 %val, ptr %gep

%i.next = add nsw i64 %i, 1
%cond = icmp ult i64 %i.next, 50
br i1 %cond, label %loop, label %exit

exit:
ret void
}
7 changes: 4 additions & 3 deletions llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -disable-output -passes='print<access-info>' < %s 2>&1 | FileCheck %s


Expand Down Expand Up @@ -449,9 +449,10 @@ exit:
define void @different_type_sizes_forward(ptr %dst) {
; CHECK-LABEL: 'different_type_sizes_forward'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 0, ptr %gep.10.iv, align 4 ->
; CHECK-NEXT: store i16 1, ptr %gep.iv, align 2
; CHECK-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
; CHECK-NEXT: %l = load i24, ptr %gep.1, align 1
; CHECK-EMPTY:
; CHECK-NEXT: Forward:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
; CHECK-NEXT: store i24 %l, ptr %ptr.iv, align 1
; CHECK-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ define void @f(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
; CHECK-NEXT: store i32 %b_p2, ptr %Aidx_next, align 4 ->
; CHECK-NEXT: %a = load i32, ptr %Aidx, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Forward:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 %b_p2, ptr %Aidx_next, align 4 ->
; CHECK-NEXT: store i32 %b_p1, ptr %Aidx, align 4
; CHECK-EMPTY:
Expand Down
109 changes: 109 additions & 0 deletions llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s

; Test that LAA correctly identifies Write-After-Write dependencies with negative
; distances as unsafe. Previously, LAA would incorrectly classify negative distance
; WAW dependencies as safe Forward dependencies, allowing inappropriate vectorization.
;
; This corresponds to the loop:
; for(int i = 0; i < n; ++i) {
; A[(i+1)*4] = 10; // First store: A[4, 8, 12, 16, ...]
; A[i] = 100; // Second store: A[0, 1, 2, 3, 4, ...]
; }
;
; The dependence distance from first store to second store is negative:
; A[i] - A[(i+1)*4] = {0,+,4} - {16,+,16} = {-16,+,-12}
; However, the dependence from second store to first store in the next iteration
; would be positive: A[(i+1)*4] - A[i] = {16,+,16} - {0,+,4} = {16,+,12}
;
; This bidirectional dependence pattern (negative in one direction, positive in the
; other) creates a Write-After-Write dependency that is unsafe for vectorization.
; DependenceAnalysis would report this as "output [<>]!" indicating the complex
; dependence direction. LAA must detect this as unsafe even when only checking
; the negative distance direction.

define void @test_waw_negative_dependence(i64 %n, ptr nocapture %A) {
; CHECK-LABEL: 'test_waw_negative_dependence'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 10, ptr %arrayidx1, align 4 ->
; CHECK-NEXT: store i32 100, ptr %arrayidx2, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%cmp8 = icmp sgt i64 %n, 0
br i1 %cmp8, label %loop, label %exit

loop:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1

; First store: A[(i+1)*4] = 10
%0 = shl nsw i64 %indvars.iv.next, 2 ; (i+1)*4
%arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %0
store i32 10, ptr %arrayidx1, align 4

; Second store: A[i] = 100
%arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
store i32 100, ptr %arrayidx2, align 4

%exitcond.not = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond.not, label %exit, label %loop

exit:
ret void
}

; Test a similar case but with different stride to ensure the fix is general.
define void @test_waw_negative_dependence_different_stride(i64 %n, ptr nocapture %A) {
; CHECK-LABEL: 'test_waw_negative_dependence_different_stride'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 10, ptr %arrayidx1, align 4 ->
; CHECK-NEXT: store i32 100, ptr %arrayidx2, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%cmp8 = icmp sgt i64 %n, 0
br i1 %cmp8, label %loop, label %exit

loop:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1

; First store: A[(i+2)*2] = 10
%0 = add nsw i64 %indvars.iv, 2 ; i+2
%1 = shl nsw i64 %0, 1 ; (i+2)*2
%arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %1
store i32 10, ptr %arrayidx1, align 4

; Second store: A[i] = 100
%arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
store i32 100, ptr %arrayidx2, align 4

%exitcond.not = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond.not, label %exit, label %loop

exit:
ret void
}
Loading
Loading