Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
if (StartC && !DivInt.urem(StepInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
if (StartC && !DivInt.urem(StepInt)) {
const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
if (StartRem != 0) {
const SCEV *NewLHS =
getAddRecExpr(getConstant(StartInt - StartRem), Step,
AR->getLoop(), SCEV::FlagNW);
bool NoWrap =
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
SCEV::FlagAnyWrap);

// With X < N <= C and both N, C as powers-of-2, the transformation
// {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
// if wrapping occurs, as the division results remain equivalent for
// all offsets in [[(X - X%N), X).
bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
StartInt.ult(StepInt) && // X < N
StepInt.sle(DivInt) && // N <= C
StepInt.isPowerOf2() && DivInt.isPowerOf2();
if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
const SCEV *NewLHS = getAddRecExpr(
getConstant(StartInt - StartRem), Step, AR->getLoop(),
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
if (LHS != NewLHS) {
LHS = NewLHS;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
Expand Down Expand Up @@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
Expand Down
28 changes: 27 additions & 1 deletion llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
entry:
br label %body

Expand All @@ -21,5 +21,31 @@ body:

exit:
ret void
}

; CHECK: 'shift_and_masked_load_store'
; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
define void @shift_and_masked_load_store(i64 %trip.count) #0 {
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%shifted = lshr i64 %iv, 2
%masked.idx = and i64 %shifted, 1
%load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
%val = load i16, ptr %load.ptr, align 2
%store.idx = shl nuw i64 %iv, 2
%store.ptr = getelementptr i8, ptr poison, i64 %store.idx
store i16 %val, ptr %store.ptr, align 2
%iv.next = add i64 %iv, 1
%cmp = icmp eq i64 %iv, %trip.count
br i1 %cmp, label %exit, label %loop

exit:
ret void
}

attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }