Skip to content

Commit b5ca793

Browse files
committed
fix gep-based inferrence
1 parent 72b2902 commit b5ca793

File tree

3 files changed

+74
-17
lines changed

3 files changed

+74
-17
lines changed

llvm/lib/Analysis/DependenceAnalysis.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3414,9 +3414,24 @@ SCEVSignedMonotonicityChecker::SCEVSignedMonotonicityChecker(
34143414
ScalarEvolution *SE, const Loop *OutermostLoop, const Value *Ptr)
34153415
: SE(SE), OutermostLoop(OutermostLoop) {
34163416
if (Ptr) {
3417-
// TODO: This seems incorrect. Maybe we should check the reachability from
3418-
// the GEP to the target instruction. E.g., in the following case, maybe
3419-
// no-wrap is not guaranteed:
3417+
// Perform reasoning similar to LoopAccessAnalysis. If an AddRec would wrap
3418+
// and the GEP would have nusw, the wrapped memory location would become
3419+
// like as follows (in the mathmatical sense, assuming the step recurrence
3420+
// is positive):
3421+
//
3422+
// (previously accessed location) + (step recurrence) - 2^N
3423+
//
3424+
// where N is the size of the pointer index type. Since the value of step
3425+
// recurrence is less than 2^(N-1), the distance between the previously
3426+
// accessed location and the wrapped location will be greater than 2^(N-1),
3427+
// which is larger than half the pointer index type space. The size of
3428+
// allocated object must not exceed the largest signed integer that fits
3429+
// into the index type, so the GEP value would be poison and any memory
3430+
// access using it would be immediate UB when executed.
3431+
//
3432+
// TODO: We don't check if the result of the GEP is always used. Maybe we
3433+
// should check the reachability from the GEP to the target instruction.
3434+
// E.g., in the following case, no-wrap would not trigger immediate UB:
34203435
//
34213436
// entry:
34223437
// ...
@@ -3441,8 +3456,6 @@ MonotonicityType SCEVSignedMonotonicityChecker::checkMonotonicity(
34413456
const Value *Ptr) {
34423457
SCEVSignedMonotonicityChecker Checker(SE, OutermostLoop, Ptr);
34433458
MonotonicityType MT = Checker.visit(Expr);
3444-
if (MT == MonotonicityType::Unknown && Checker.NoWrapFromGEP)
3445-
MT = MonotonicityType::NoSignedWrap;
34463459

34473460
#ifndef NDEBUG
34483461
switch (MT) {
@@ -3532,8 +3545,6 @@ SCEVSignedMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
35323545
const SCEV *Start = Expr->getStart();
35333546
const SCEV *Step = Expr->getStepRecurrence(*SE);
35343547

3535-
bool IsNSW = Expr->hasNoSignedWrap();
3536-
35373548
MonotonicityType StartRes = visit(Start);
35383549
if (StartRes == MonotonicityType::Unknown)
35393550
return unknownMonotonicity(Expr);
@@ -3543,7 +3554,7 @@ SCEVSignedMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
35433554
return unknownMonotonicity(Expr);
35443555

35453556
// TODO: Enhance the inference here.
3546-
if (!IsNSW) {
3557+
if (!Expr->hasNoSignedWrap() && !NoWrapFromGEP) {
35473558
if (!SE->isKnownNegative(Step))
35483559
// If the coefficient can be positive value, ensure that the AddRec is
35493560
// monotonically increasing.
@@ -3787,9 +3798,10 @@ bool DependenceInfo::tryDelinearizeParametricSize(
37873798
LI->getLoopFor(Src->getParent())->getOutermostLoop();
37883799

37893800
// TODO: In general, reasoning about monotonicity of a subscript from the
3790-
// base pointer would not be allowed. Probably we need to check the loops
3791-
// associated with this subscript are disjoint from those associated with
3792-
// the other subscripts. The validation would be something like:
3801+
// base pointer would lead incorrect result. Probably we need to check
3802+
// the loops associated with this subscript are disjoint from those
3803+
// associated with the other subscripts. The validation would be
3804+
// something like:
37933805
//
37943806
// LoopsI = collectCommonLoops(SrcSubscripts[I])
37953807
// LoopsOthers = collectCommonLoops(SrcSCEV - SrcSubscripts[I])

llvm/test/Analysis/DependenceAnalysis/GCD.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -465,13 +465,13 @@ for.end12: ; preds = %for.end12.loopexit,
465465
define void @gcd7(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
466466
; CHECK-LABEL: 'gcd7'
467467
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %7, ptr %arrayidx6, align 4
468-
; CHECK-NEXT: da analyze - output [* *]!
468+
; CHECK-NEXT: da analyze - confused!
469469
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
470-
; CHECK-NEXT: da analyze - flow [* *|<]!
470+
; CHECK-NEXT: da analyze - confused!
471471
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
472472
; CHECK-NEXT: da analyze - confused!
473473
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
474-
; CHECK-NEXT: da analyze - input [* *]!
474+
; CHECK-NEXT: da analyze - confused!
475475
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
476476
; CHECK-NEXT: da analyze - confused!
477477
; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
@@ -640,13 +640,13 @@ for.end15: ; preds = %for.end15.loopexit,
640640
define void @gcd9(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
641641
; CHECK-LABEL: 'gcd9'
642642
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %7, ptr %arrayidx6, align 4
643-
; CHECK-NEXT: da analyze - output [* *]!
643+
; CHECK-NEXT: da analyze - confused!
644644
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
645-
; CHECK-NEXT: da analyze - flow [* *|<]!
645+
; CHECK-NEXT: da analyze - confused!
646646
; CHECK-NEXT: Src: store i32 %7, ptr %arrayidx6, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
647647
; CHECK-NEXT: da analyze - confused!
648648
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: %11 = load i32, ptr %arrayidx12, align 4
649-
; CHECK-NEXT: da analyze - input [* *]!
649+
; CHECK-NEXT: da analyze - confused!
650650
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
651651
; CHECK-NEXT: da analyze - confused!
652652
; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4

llvm/test/Analysis/DependenceAnalysis/monotonic.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,3 +332,48 @@ loop:
332332
exit:
333333
ret void
334334
}
335+
336+
; The value of step reccurence is not invariant with respect to the outer most
337+
; loop (the i-loop).
338+
;
339+
; offset_i = 0;
340+
; for (int i = 0; i < 100; i++) {
341+
; for (int j = 0; j < 100; j++)
342+
; a[offset_i + j] = 0;
343+
; offset_i += (i % 2 == 0) ? 0 : 3;
344+
; }
345+
;
346+
define void @step_is_variant(ptr %a) {
347+
; CHECK-LABEL: 'step_is_variant'
348+
; CHECK: Failed to prove monotonicity for: %offset.i
349+
; CHECK: Failed to prove monotonicity for: {%offset.i,+,1}<nuw><nsw><%loop.j>
350+
; CHECK: Monotonicity: Unknown expr: {%offset.i,+,1}<nuw><nsw><%loop.j>
351+
;
352+
entry:
353+
br label %loop.i.header
354+
355+
loop.i.header:
356+
%i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
357+
%offset.i = phi i64 [ 0, %entry ], [ %offset.i.next, %loop.i.latch ]
358+
%step.i.0 = phi i64 [ 0, %entry ], [ %step.i.1, %loop.i.latch ]
359+
%step.i.1 = phi i64 [ 3, %entry ], [ %step.i.0, %loop.i.latch ]
360+
br label %loop.j
361+
362+
loop.j:
363+
%j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
364+
%offset = add nsw i64 %offset.i, %j
365+
%idx = getelementptr inbounds i8, ptr %a, i64 %offset
366+
store i8 0, ptr %idx
367+
%j.inc = add nsw i64 %j, 1
368+
%exitcond.j = icmp eq i64 %j.inc, 100
369+
br i1 %exitcond.j, label %loop.i.latch, label %loop.j
370+
371+
loop.i.latch:
372+
%i.inc = add nsw i64 %i, 1
373+
%offset.i.next = add nsw i64 %offset.i, %step.i.0
374+
%exitcond.i = icmp eq i64 %i.inc, 100
375+
br i1 %exitcond.i, label %exit, label %loop.i.header
376+
377+
exit:
378+
ret void
379+
}

0 commit comments

Comments
 (0)