diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a31f17b1936d6..e6be22a8e4148 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. // We can currently only fold X%N if X is constant. const SCEVConstant *StartC = dyn_cast(AR->getStart()); - if (StartC && !DivInt.urem(StepInt) && - getZeroExtendExpr(AR, ExtTy) == - getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), - getZeroExtendExpr(Step, ExtTy), - AR->getLoop(), SCEV::FlagAnyWrap)) { + if (StartC && !DivInt.urem(StepInt)) { const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); - if (StartRem != 0) { - const SCEV *NewLHS = - getAddRecExpr(getConstant(StartInt - StartRem), Step, - AR->getLoop(), SCEV::FlagNW); + bool NoWrap = + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), AR->getLoop(), + SCEV::FlagAnyWrap); + + // With X < N <= C and both N, C as powers-of-2, the transformation + // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even + // if wrapping occurs, as the division results remain equivalent for + // all offsets in [[(X - X%N), X). + bool CanFoldWithWrap = StepInt.isStrictlyPositive() && + StartInt.ult(StepInt) && // X < N + StepInt.sle(DivInt) && // N <= C + StepInt.isPowerOf2() && DivInt.isPowerOf2(); + if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) { + const SCEV *NewLHS = getAddRecExpr( + getConstant(StartInt - StartRem), Step, AR->getLoop(), + NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap); if (LHS != NewLHS) { LHS = NewLHS; diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll index 0a6ef0dad4569..bb0744f8c4550 100644 --- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll +++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll @@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 @@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 -; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.3 = add i64 %iv, 3 ; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 -; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.4 = add i64 %iv, 4 ; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll index 166875dd55aae..b612bfb88198e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll @@ -5,7 +5,7 @@ ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k ; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k> ; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k> -define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 { +define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr { entry: br label %body @@ -21,5 +21,31 @@ body: exit: ret void +} + +; CHECK: 'shift_and_masked_load_store' +; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2> +; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2> +; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2> +define void @shift_and_masked_load_store(i64 %trip.count) #0 { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %shifted = lshr i64 %iv, 2 + %masked.idx = and i64 %shifted, 1 + %load.ptr = getelementptr i16, ptr poison, i64 %masked.idx + %val = load i16, ptr %load.ptr, align 2 + %store.idx = shl nuw i64 %iv, 2 + %store.ptr = getelementptr i8, ptr poison, i64 %store.idx + store i16 %val, ptr %store.ptr, align 2 + %iv.next = add i64 %iv, 1 + %cmp = icmp eq i64 %iv, %trip.count + br i1 %cmp, label %exit, label %loop +exit: + ret void } + +attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }