Skip to content

Commit 34a6be4

Browse files
committed
[SCEV] Allow udiv canonicalization of potentially-wrapping AddRecs
Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle AddRecs that may wrap, when X < N <= C and both N,C are powers of 2. The alignment and power-of-2 properties ensure division results remain equivalent for all offsets [(X - X%N), X). Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3 Fixes #168709
1 parent 5f777b2 commit 34a6be4

File tree

3 files changed

+50
-14
lines changed

3 files changed

+50
-14
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
34923492
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
34933493
// We can currently only fold X%N if X is constant.
34943494
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3495-
if (StartC && !DivInt.urem(StepInt) &&
3496-
getZeroExtendExpr(AR, ExtTy) ==
3497-
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3498-
getZeroExtendExpr(Step, ExtTy),
3499-
AR->getLoop(), SCEV::FlagAnyWrap)) {
3495+
if (StartC && !DivInt.urem(StepInt)) {
35003496
const APInt &StartInt = StartC->getAPInt();
35013497
const APInt &StartRem = StartInt.urem(StepInt);
3502-
if (StartRem != 0) {
3503-
const SCEV *NewLHS =
3504-
getAddRecExpr(getConstant(StartInt - StartRem), Step,
3505-
AR->getLoop(), SCEV::FlagNW);
3498+
bool NoWrap =
3499+
getZeroExtendExpr(AR, ExtTy) ==
3500+
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3501+
getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
3502+
SCEV::FlagAnyWrap);
3503+
3504+
// With X < N <= C and both N, C as powers-of-2, the transformation
3505+
// {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
3506+
// if wrapping occurs, as the division results remain equivalent for
3507+
// all offsets in [[(X - X%N), X).
3508+
bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
3509+
StartInt.ult(StepInt) && // X < N
3510+
StepInt.sle(DivInt) && // N <= C
3511+
StepInt.isPowerOf2() && DivInt.isPowerOf2();
3512+
if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
3513+
const SCEV *NewLHS = getAddRecExpr(
3514+
getConstant(StartInt - StartRem), Step, AR->getLoop(),
3515+
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
35063516
if (LHS != NewLHS) {
35073517
LHS = NewLHS;
35083518

llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
1313
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
1414
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1515
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
16-
; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
16+
; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1717
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
1818
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1919
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
@@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
114114
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
115115
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
116116
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
117-
; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
117+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
118118
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
119119
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
120120
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
121-
; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
121+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
122122
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
123123
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
124124
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
125-
; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
125+
; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
126126
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
127127
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
128128
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4

llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
66
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
77
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
8-
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
8+
define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
99
entry:
1010
br label %body
1111

@@ -21,5 +21,31 @@ body:
2121

2222
exit:
2323
ret void
24+
}
25+
26+
; CHECK: 'shift_and_masked_load_store'
27+
; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
28+
; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
29+
; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
30+
define void @shift_and_masked_load_store(i64 %trip.count) #0 {
31+
entry:
32+
br label %loop
33+
34+
loop:
35+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
36+
%shifted = lshr i64 %iv, 2
37+
%masked.idx = and i64 %shifted, 1
38+
%load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
39+
%val = load i16, ptr %load.ptr, align 2
40+
%store.idx = shl nuw i64 %iv, 2
41+
%store.ptr = getelementptr i8, ptr poison, i64 %store.idx
42+
store i16 %val, ptr %store.ptr, align 2
43+
%iv.next = add i64 %iv, 1
44+
%cmp = icmp eq i64 %iv, %trip.count
45+
br i1 %cmp, label %exit, label %loop
2446

47+
exit:
48+
ret void
2549
}
50+
51+
attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }

0 commit comments

Comments
 (0)