Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3020,10 +3020,14 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
{ValNumActiveBitsOffset, Start},
/*FMFSource=*/nullptr, "iv.final");

// Check if the offset was added with NUW flag
bool OffsetAddHasNUW = OffsetIsZero;
if (auto *OffsetAddInst = dyn_cast<BinaryOperator>(ValNumActiveBitsOffset))
OffsetAddHasNUW |= OffsetAddInst->hasNoUnsignedWrap();

auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
/*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
// FIXME: or when the offset was `add nuw`
/*HasNUW=*/OffsetAddHasNUW, /*HasNSW=*/true));

// We know loop's backedge-taken count, but what's loop's trip count?
Value *LoopTripCount =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s

declare void @escape_inner(i8, i8, i8, i1, i8)
declare void @escape_outer(i8, i8, i8, i1, i8)

; Test cases where the offset addition has the nuw flag and this property
; should be preserved in the backedge taken count calculation.

; Basic case: add nuw - check that HasNUW is preserved
define i8 @preserve_nuw_flag(i8 %val, i8 %start, i8 %extraoffset) {
; CHECK-LABEL: @preserve_nuw_flag(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
; CHECK-NEXT: ret i8 [[IV_RES]]
;
entry:
br label %loop

loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nuw i8 %iv, %extraoffset
%val.shifted = lshr i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1

call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)

br i1 %val.shifted.iszero, label %end, label %loop

end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]

call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)

ret i8 %iv.res
}

; Test with both nuw and nsw flags on the offset addition
define i8 @preserve_nuw_nsw_flags(i8 %val, i8 %start, i8 %extraoffset) {
; CHECK-LABEL: @preserve_nuw_nsw_flags(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
; CHECK-NEXT: [[NBITS:%.*]] = add nuw nsw i8 [[IV]], [[EXTRAOFFSET]]
; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
; CHECK-NEXT: ret i8 [[IV_RES]]
;
entry:
br label %loop

loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nuw nsw i8 %iv, %extraoffset
%val.shifted = lshr i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1

call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)

br i1 %val.shifted.iszero, label %end, label %loop

end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]

call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)

ret i8 %iv.res
}

; Test with left shift instead of logical right shift
define i8 @preserve_nuw_flag_shl(i8 %val, i8 %start, i8 %extraoffset) {
; CHECK-LABEL: @preserve_nuw_flag_shl(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.cttz.i8(i8 [[VAL:%.*]], i1 false)
; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = shl i8 [[VAL]], [[NBITS]]
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
; CHECK-NEXT: ret i8 [[IV_RES]]
;
entry:
br label %loop

loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nuw i8 %iv, %extraoffset
%val.shifted = shl i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1

call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)

br i1 %val.shifted.iszero, label %end, label %loop

end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]

call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)

ret i8 %iv.res
}

; Test with arithmetic right shift
define i8 @preserve_nuw_flag_ashr(i8 %val, i8 %start, i8 %extraoffset) {
; CHECK-LABEL: @preserve_nuw_flag_ashr(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET:%.*]]
; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]]
; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]])
; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV]], [[LOOP]] ]
; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
; CHECK-NEXT: ret i8 [[IV_RES]]
;
entry:
br label %loop

loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nuw i8 %iv, %extraoffset
%val.shifted = ashr i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1

call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)

br i1 %val.shifted.iszero, label %end, label %loop

end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]

call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)

ret i8 %iv.res
}