Skip to content

Commit 5d87609

Browse files
authored
[SCEV] Allow udiv canonicalization of potentially-wrapping AddRecs (#169576)
Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle AddRecs that may wrap, when X < N <= C and both N,C are powers of 2. The alignment and power-of-2 properties ensure division results remain equivalent for all offsets [(X - X%N), X). Alive2 Proof: https://alive2.llvm.org/ce/z/iu2tav Fixes #168709 PR: #169576
1 parent 8a40d08 commit 5d87609

File tree

3 files changed

+50
-16
lines changed

3 files changed

+50
-16
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3492,17 +3492,25 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
34923492
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
34933493
// We can currently only fold X%N if X is constant.
34943494
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3495-
if (StartC && !DivInt.urem(StepInt) &&
3496-
getZeroExtendExpr(AR, ExtTy) ==
3497-
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3498-
getZeroExtendExpr(Step, ExtTy),
3499-
AR->getLoop(), SCEV::FlagAnyWrap)) {
3495+
if (StartC && !DivInt.urem(StepInt)) {
35003496
const APInt &StartInt = StartC->getAPInt();
35013497
const APInt &StartRem = StartInt.urem(StepInt);
3502-
if (StartRem != 0) {
3503-
const SCEV *NewLHS =
3504-
getAddRecExpr(getConstant(StartInt - StartRem), Step,
3505-
AR->getLoop(), SCEV::FlagNW);
3498+
bool NoWrap =
3499+
getZeroExtendExpr(AR, ExtTy) ==
3500+
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3501+
getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
3502+
SCEV::FlagAnyWrap);
3503+
3504+
// With N <= C and both N, C as powers-of-2, the transformation
3505+
// {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
3506+
// if wrapping occurs, as the division results remain equivalent for
3507+
// all offsets in [[(X - X%N), X).
3508+
bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C
3509+
StepInt.isPowerOf2() && DivInt.isPowerOf2();
3510+
if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
3511+
const SCEV *NewLHS = getAddRecExpr(
3512+
getConstant(StartInt - StartRem), Step, AR->getLoop(),
3513+
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
35063514
if (LHS != NewLHS) {
35073515
LHS = NewLHS;
35083516

llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ define void @test_step2_div4(i64 %n) {
1313
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
1414
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1515
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
16-
; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
16+
; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1717
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
1818
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1919
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
2020
; CHECK-NEXT: --> ({2,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2121
; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1
2222
; CHECK-NEXT: --> {-1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2323
; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4
24-
; CHECK-NEXT: --> ({-1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
24+
; CHECK-NEXT: --> ({-2,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2525
; CHECK-NEXT: %iv.next = add i64 %iv, 2
2626
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2727
; CHECK-NEXT: Determining loop execution counts for: @test_step2_div4
@@ -114,23 +114,23 @@ define void @test_step4_div4(i64 %n) {
114114
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
115115
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
116116
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
117-
; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
117+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
118118
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
119119
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
120120
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
121-
; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
121+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
122122
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
123123
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
124124
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
125-
; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
125+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
126126
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
127127
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
128128
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
129129
; CHECK-NEXT: --> ({4,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
130130
; CHECK-NEXT: %iv.5 = add i64 %iv, 5
131131
; CHECK-NEXT: --> {5,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
132132
; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4
133-
; CHECK-NEXT: --> ({5,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
133+
; CHECK-NEXT: --> ({4,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
134134
; CHECK-NEXT: %iv.next = add i64 %iv, 4
135135
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
136136
; CHECK-NEXT: Determining loop execution counts for: @test_step4_div4

llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
66
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
77
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
8-
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
8+
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
99
entry:
1010
br label %body
1111

@@ -21,5 +21,31 @@ body:
2121

2222
exit:
2323
ret void
24+
}
25+
26+
; CHECK: 'shift_and_masked_load_store'
27+
; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
28+
; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
29+
; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
30+
define void @shift_and_masked_load_store(i64 %trip.count) #0 {
31+
entry:
32+
br label %loop
33+
34+
loop:
35+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
36+
%shifted = lshr i64 %iv, 2
37+
%masked.idx = and i64 %shifted, 1
38+
%load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
39+
%val = load i16, ptr %load.ptr, align 2
40+
%store.idx = shl nuw i64 %iv, 2
41+
%store.ptr = getelementptr i8, ptr poison, i64 %store.idx
42+
store i16 %val, ptr %store.ptr, align 2
43+
%iv.next = add i64 %iv, 1
44+
%cmp = icmp eq i64 %iv, %trip.count
45+
br i1 %cmp, label %exit, label %loop
2446

47+
exit:
48+
ret void
2549
}
50+
51+
attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }

0 commit comments

Comments
 (0)