Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12778,10 +12778,23 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// The positive stride case is the same as isKnownPositive(Stride) returning
// true (original behavior of the function).
//
if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) ||
if (PredicatedIV || !loopIsFiniteByAssumption(L) ||
!loopHasNoAbnormalExits(L))
return getCouldNotCompute();

// Adding Stride equal to one Predicate when there is no wrap flags.
// It might enable strided access versioning in LAA and calculate BECount
// with Stride = 1.
if (!NoWrap) {
if (AllowPredicates) {
const auto *One =
static_cast<const SCEVConstant *>(getOne(Stride->getType()));
Predicates.insert(getEqualPredicate(Stride, One));
Stride = One;
} else
return getCouldNotCompute();
}

if (!isKnownNonZero(Stride)) {
// If we have a step of zero, and RHS isn't invariant in L, we don't know
// if it might eventually be greater than start and if so, on which
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/Transforms/LoopVectorize/version-mem-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,57 @@ for.end.loopexit:
for.end:
ret void
}

; We can vectorize the loop by using stride = 1 to calculate iteration count
; and generate the runtime check to guard the vectorized loop.

; CHECK-LABEL: s172
; CHECK: vector.scevcheck:
; CHECK: [[CHECK:%.*]] = icmp ne i32 %xb, 1
; CHECK: br i1 [[CHECK]], label %scalar.ph, label %vector.ph
; CHECK: vector.body
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the checks are a bit minimal, personally I would like to see a bit more context, but I see there's precedent in this file for just checking for stride 1 compare.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Sjoerdmeijer,

I added more check lines to bring more context.
Thanks for the review. :-)


@b = global [32000 x float] zeroinitializer, align 64
@a = global [32000 x float] zeroinitializer, align 64

; for (int i = xa - 1; i < 32000; i += xb)
; a[i] += b[i];
;
define float @s172(i32 signext %xa, i32 signext %xb) mustprogress {
entry:
%cmp214 = icmp slt i32 %xa, 32001
br i1 %cmp214, label %for.body.us.preheader, label %for.cond.cleanup

for.body.us.preheader: ; preds = %entry
%sub = add i32 %xa, -1
%0 = sext i32 %sub to i64
%1 = sext i32 %xb to i64
br label %for.body.us

for.body.us: ; preds = %for.body.us.preheader, %for.cond1.for.cond.cleanup3_crit_edge.us
%nl.016.us = phi i32 [ %inc.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.body.us.preheader ]
br label %for.body4.us

for.body4.us: ; preds = %for.body.us, %for.body4.us
%indvars.iv = phi i64 [ %0, %for.body.us ], [ %indvars.iv.next, %for.body4.us ]
%arrayidx.us = getelementptr inbounds [32000 x float], ptr @b, i64 0, i64 %indvars.iv
%2 = load float, ptr %arrayidx.us, align 4
%arrayidx6.us = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv
%3 = load float, ptr %arrayidx6.us, align 4
%add.us = fadd fast float %3, %2
store float %add.us, ptr %arrayidx6.us, align 4
%indvars.iv.next = add i64 %indvars.iv, %1
%cmp2.us = icmp slt i64 %indvars.iv.next, 32000
br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us

for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us
%inc.us = add nuw nsw i32 %nl.016.us, 1
%exitcond.not = icmp eq i32 %inc.us, 100000
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body.us

for.cond.cleanup.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret float undef
}