-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[SCEV] Allow udiv canonicalization of potentially-wrapping AddRecs #169576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle
AddRecs that may wrap, when X < N <= C and both N,C are powers of 2.
The alignment and power-of-2 properties ensure division results remain
equivalent for all offsets [(X - X%N), X).
Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3
Fixes llvm#168709
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesExtend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle AddRecs that may wrap, when X < N <= C and both N,C are powers of 2. The alignment and power-of-2 properties ensure division results remain equivalent for all offsets [(X - X%N), X). Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3 Fixes #168709 Full diff: https://github.com/llvm/llvm-project/pull/169576.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a31f17b1936d6..e6be22a8e4148 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
- if (StartC && !DivInt.urem(StepInt) &&
- getZeroExtendExpr(AR, ExtTy) ==
- getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
- getZeroExtendExpr(Step, ExtTy),
- AR->getLoop(), SCEV::FlagAnyWrap)) {
+ if (StartC && !DivInt.urem(StepInt)) {
const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
- if (StartRem != 0) {
- const SCEV *NewLHS =
- getAddRecExpr(getConstant(StartInt - StartRem), Step,
- AR->getLoop(), SCEV::FlagNW);
+ bool NoWrap =
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
+ SCEV::FlagAnyWrap);
+
+ // With X < N <= C and both N, C as powers-of-2, the transformation
+ // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
+ // if wrapping occurs, as the division results remain equivalent for
+ // all offsets in [[(X - X%N), X).
+ bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
+ StartInt.ult(StepInt) && // X < N
+ StepInt.sle(DivInt) && // N <= C
+ StepInt.isPowerOf2() && DivInt.isPowerOf2();
+ if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
+ const SCEV *NewLHS = getAddRecExpr(
+ getConstant(StartInt - StartRem), Step, AR->getLoop(),
+ NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
if (LHS != NewLHS) {
LHS = NewLHS;
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 0a6ef0dad4569..bb0744f8c4550 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
@@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
-; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index 166875dd55aae..b612bfb88198e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,7 +5,7 @@
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
-define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
entry:
br label %body
@@ -21,5 +21,31 @@ body:
exit:
ret void
+}
+
+; CHECK: 'shift_and_masked_load_store'
+; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
+define void @shift_and_masked_load_store(i64 %trip.count) #0 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %shifted = lshr i64 %iv, 2
+ %masked.idx = and i64 %shifted, 1
+ %load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
+ %val = load i16, ptr %load.ptr, align 2
+ %store.idx = shl nuw i64 %iv, 2
+ %store.ptr = getelementptr i8, ptr poison, i64 %store.idx
+ store i16 %val, ptr %store.ptr, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp eq i64 %iv, %trip.count
+ br i1 %cmp, label %exit, label %loop
+exit:
+ ret void
}
+
+attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }
|
|
@llvm/pr-subscribers-llvm-analysis Author: Florian Hahn (fhahn) ChangesExtend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle AddRecs that may wrap, when X < N <= C and both N,C are powers of 2. The alignment and power-of-2 properties ensure division results remain equivalent for all offsets [(X - X%N), X). Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3 Fixes #168709 Full diff: https://github.com/llvm/llvm-project/pull/169576.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a31f17b1936d6..e6be22a8e4148 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
- if (StartC && !DivInt.urem(StepInt) &&
- getZeroExtendExpr(AR, ExtTy) ==
- getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
- getZeroExtendExpr(Step, ExtTy),
- AR->getLoop(), SCEV::FlagAnyWrap)) {
+ if (StartC && !DivInt.urem(StepInt)) {
const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
- if (StartRem != 0) {
- const SCEV *NewLHS =
- getAddRecExpr(getConstant(StartInt - StartRem), Step,
- AR->getLoop(), SCEV::FlagNW);
+ bool NoWrap =
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
+ SCEV::FlagAnyWrap);
+
+ // With X < N <= C and both N, C as powers-of-2, the transformation
+ // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
+ // if wrapping occurs, as the division results remain equivalent for
+ // all offsets in [[(X - X%N), X).
+ bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
+ StartInt.ult(StepInt) && // X < N
+ StepInt.sle(DivInt) && // N <= C
+ StepInt.isPowerOf2() && DivInt.isPowerOf2();
+ if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
+ const SCEV *NewLHS = getAddRecExpr(
+ getConstant(StartInt - StartRem), Step, AR->getLoop(),
+ NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
if (LHS != NewLHS) {
LHS = NewLHS;
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 0a6ef0dad4569..bb0744f8c4550 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
@@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
-; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index 166875dd55aae..b612bfb88198e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,7 +5,7 @@
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
-define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
entry:
br label %body
@@ -21,5 +21,31 @@ body:
exit:
ret void
+}
+
+; CHECK: 'shift_and_masked_load_store'
+; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
+define void @shift_and_masked_load_store(i64 %trip.count) #0 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %shifted = lshr i64 %iv, 2
+ %masked.idx = and i64 %shifted, 1
+ %load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
+ %val = load i16, ptr %load.ptr, align 2
+ %store.idx = shl nuw i64 %iv, 2
+ %store.ptr = getelementptr i8, ptr poison, i64 %store.idx
+ store i16 %val, ptr %store.ptr, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp eq i64 %iv, %trip.count
+ br i1 %cmp, label %exit, label %loop
+exit:
+ ret void
}
+
+attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }
|
Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle AddRecs that may wrap, when X < N <= C and both N,C are powers of 2. The alignment and power-of-2 properties ensure division results remain equivalent for all offsets [(X - X%N), X).
Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3
Fixes #168709