-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[SCEV] Try to push op into ZExt: C * zext (A + B) -> zext (A*C + B*C) #155300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-analysis Author: Florian Hahn (fhahn) ChangesTry to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to #151227 which did the same for additions. For now this is restricted to adds with a constant operand, which is similar to some of the logic above. This enables some additional simplifications. Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL Full diff: https://github.com/llvm/llvm-project/pull/155300.diff 6 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f60a1e9f22704..40e263efecec6 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1818,7 +1818,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
// zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
- if (SM->hasNoUnsignedWrap()) {
+ if (SM->hasNoUnsignedWrap() /*|| getUnsignedRange(SM->getOperand(0)).unsignedMulMayOverflow(getUnsignedRange()) ==ConstantRange::OverflowResult::NeverOverflows*/) {
// If the multiply does not unsign overflow then we can, by definition,
// commute the zero extension with the multiply operation.
SmallVector<const SCEV *, 4> Ops;
@@ -3199,6 +3199,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->getNoWrapFlags(FlagsMask));
}
}
+
+ // Try to push the constant operand into a ZExt: C + zext (A + B) ->
+ // zext (C*A + C*B) if trunc (C) * (A + B) does not unsigned-wrap.
+ const SCEVAddExpr *InnerAdd;
+ if (match(Ops[1], m_scev_ZExt(m_scev_Add(InnerAdd)))) {
+ const SCEV *NarrowC = getTruncateExpr(LHSC, InnerAdd->getType());
+ if (isa<SCEVConstant>(InnerAdd->getOperand(0)) &&
+ getZeroExtendExpr(NarrowC, Ops[1]->getType()) == LHSC &&
+ hasFlags(StrengthenNoWrapFlags(this, scMulExpr, {NarrowC, InnerAdd},
+ SCEV::FlagAnyWrap),
+ SCEV::FlagNUW)) {
+ auto *Res =
+ getMulExpr(NarrowC, InnerAdd, SCEV::FlagAnyWrap, Depth + 1);
+ return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1);
+ };
+ }
}
}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 9bf2427eddb9c..7cdf3a2d5fd58 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) {
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,3) S: [0,3) Exits: (-1 + %N)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv
-; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N)<nsw> to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((zext i32 (-2 + (2 * %N)<nuw><nsw>)<nsw> to i64) + %pred) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index e1a042747e0ac..84ae79d53e25e 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -76,18 +76,18 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
-; CHECK-NEXT: [[MUL_Y:%.*]] = shl i64 [[Y:%.*]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_MUL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[IV_MUL_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_MUL_NEXT]] = add i64 [[IV_MUL]], [[MUL_Y]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ [[IV_MUL_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: ret i64 [[TMP6]]
;
entry:
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index ea2cfe74be264..0123f15334281 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -16,10 +16,10 @@ define void @test(ptr %ptr) {
; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[UMAX]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[TMP2]], -8
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index df32e60d5065a..fc2b5571250ac 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -9,10 +9,10 @@ define void @fold_add_zext_to_sext(ptr %dst, i1 %start) {
; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[START]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 25, [[START_EXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i32 [[START_EXT]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 100, [[TMP4]]
; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START_EXT]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index f4f29689d9cfb..5f92a281ed7c6 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -492,14 +492,12 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 12
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[LEN]], -7
-; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[LEN]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[TMP3]]
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TMP4]], i64 14
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[TMP4]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -511,11 +509,11 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]]
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META42:![0-9]+]], !noalias [[META45:![0-9]+]]
-; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META45]]
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]]
+; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -530,7 +528,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: store i32 0, ptr [[IN]], align 4
; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]]
-; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP48:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
|
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesTry to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to #151227 which did the same for additions. For now this is restricted to adds with a constant operand, which is similar to some of the logic above. This enables some additional simplifications. Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL Full diff: https://github.com/llvm/llvm-project/pull/155300.diff 6 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f60a1e9f22704..40e263efecec6 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1818,7 +1818,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
// zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
- if (SM->hasNoUnsignedWrap()) {
+ if (SM->hasNoUnsignedWrap() /*|| getUnsignedRange(SM->getOperand(0)).unsignedMulMayOverflow(getUnsignedRange()) ==ConstantRange::OverflowResult::NeverOverflows*/) {
// If the multiply does not unsign overflow then we can, by definition,
// commute the zero extension with the multiply operation.
SmallVector<const SCEV *, 4> Ops;
@@ -3199,6 +3199,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->getNoWrapFlags(FlagsMask));
}
}
+
+ // Try to push the constant operand into a ZExt: C + zext (A + B) ->
+ // zext (C*A + C*B) if trunc (C) * (A + B) does not unsigned-wrap.
+ const SCEVAddExpr *InnerAdd;
+ if (match(Ops[1], m_scev_ZExt(m_scev_Add(InnerAdd)))) {
+ const SCEV *NarrowC = getTruncateExpr(LHSC, InnerAdd->getType());
+ if (isa<SCEVConstant>(InnerAdd->getOperand(0)) &&
+ getZeroExtendExpr(NarrowC, Ops[1]->getType()) == LHSC &&
+ hasFlags(StrengthenNoWrapFlags(this, scMulExpr, {NarrowC, InnerAdd},
+ SCEV::FlagAnyWrap),
+ SCEV::FlagNUW)) {
+ auto *Res =
+ getMulExpr(NarrowC, InnerAdd, SCEV::FlagAnyWrap, Depth + 1);
+ return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1);
+ };
+ }
}
}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 9bf2427eddb9c..7cdf3a2d5fd58 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) {
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,3) S: [0,3) Exits: (-1 + %N)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv
-; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N)<nsw> to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((zext i32 (-2 + (2 * %N)<nuw><nsw>)<nsw> to i64) + %pred) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index e1a042747e0ac..84ae79d53e25e 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -76,18 +76,18 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
-; CHECK-NEXT: [[MUL_Y:%.*]] = shl i64 [[Y:%.*]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_MUL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[IV_MUL_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_MUL_NEXT]] = add i64 [[IV_MUL]], [[MUL_Y]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ [[IV_MUL_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: ret i64 [[TMP6]]
;
entry:
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index ea2cfe74be264..0123f15334281 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -16,10 +16,10 @@ define void @test(ptr %ptr) {
; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[UMAX]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[TMP2]], -8
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index df32e60d5065a..fc2b5571250ac 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -9,10 +9,10 @@ define void @fold_add_zext_to_sext(ptr %dst, i1 %start) {
; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[START]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 25, [[START_EXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i32 [[START_EXT]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 100, [[TMP4]]
; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START_EXT]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index f4f29689d9cfb..5f92a281ed7c6 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -492,14 +492,12 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 12
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[LEN]], -7
-; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[LEN]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[TMP3]]
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TMP4]], i64 14
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[TMP4]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -511,11 +509,11 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]]
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META42:![0-9]+]], !noalias [[META45:![0-9]+]]
-; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META45]]
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]]
+; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -530,7 +528,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: store i32 0, ptr [[IN]], align 4
; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]]
-; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP48:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
|
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM assuming no significant compile-time impact.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leftover?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Woops, thought I removed that already. Stripped now, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Try to push the constant operand into a ZExt: C + zext (A + B) -> | |
| // Try to push the constant operand into a ZExt: C * zext (A + B) -> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May as well pass NUW here, as you already proved it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
Try to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to llvm#151227 which did the same for additions. For now this is restricted to adds with a constant operand, which is similar to some of the logic above. This enables some additional simplifications. Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL
3e8ded2 to
f1fc8ab
Compare
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compile-time results look neutral overall http://llvm-compile-time-tracker.com/compare.php?from=bac8c8784c848f1dd7ddf44a9a463e6c15c6e594&to=89977e0dec139faae854c94fe932b7b768e60537&stat=instructions:u
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Woops, thought I removed that already. Stripped now, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
…(A*C + B*C) (#155300) Try to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to llvm/llvm-project#151227 which did the same for additions. For now this is restricted to adds with a constant operand, which is similar to some of the logic above. This enables some additional simplifications. Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL PR: llvm/llvm-project#155300
Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (llvm#155300).
Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (llvm#155300).
#155672) Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (#155300). PR: #155672
…cking deref. (#155672) Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (llvm/llvm-project#155300). PR: llvm/llvm-project#155672
…ing deref. (#155672)" This reverts commit f0df1e3. Recommit with extra check for SCEVCouldNotCompute. Test has been added in b169302. Original message: Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (#155300). PR: #155672
… when checking deref. (#155672)" This reverts commit f0df1e3. Recommit with extra check for SCEVCouldNotCompute. Test has been added in b169302. Original message: Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (llvm/llvm-project#155300). PR: llvm/llvm-project#155672
…llvm#155300) Try to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to llvm#151227 which did the same for additions. For now this is restricted to adds with a constant operand, which is similar to some of the logic above. This enables some additional simplifications. Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL PR: llvm#155300 (cherry-picked from 9143746)
…ing deref. (llvm#155672)" This reverts commit f0df1e3. Recommit with extra check for SCEVCouldNotCompute. Test has been added in b169302. Original message: Remove the fall-back to constant max BTC if the backedge-taken-count cannot be computed. The constant max backedge-taken count is computed considering loop guards, so to avoid regressions we need to apply loop guards as needed. Also remove the special handling for Mul in willNotOverflow, as this should not longer be needed after 9143746 (llvm#155300). PR: llvm#155672 (cherry picked from commit a434a7a)
Try to push constant multiply operand into a ZExt containing an add, if possible. In general we are trying to push down ops through ZExt if possible. This is similar to #151227 which did the same for additions.
For now this is restricted to adds with a constant operand, which is similar to some of the logic above.
This enables some additional simplifications.
Alive2 Proof: https://alive2.llvm.org/ce/z/97pbSL