From 9ef23464d0f51278c5aecdea36edb6a71b65172f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Jun 2025 10:38:19 +0100 Subject: [PATCH 1/4] [SCEV] Fold zext(C+A) -> (sext(C) + zext(A)) if possible. Simplify zext(C+A) -> (sext(C) + zext(A)) if * zext (C + A) >=s 0 and * A >=s V. For now this is limited to cases where the first operand is a constant, so the SExt can be folded to a new constant. This can be relaxed in the future. Alive2 proof of the general pattern and the test changes in zext-nuw.ll (times out in the online instance but verifies locally) https://alive2.llvm.org/ce/z/_BtyGy --- llvm/lib/Analysis/ScalarEvolution.cpp | 12 +++++++++++ .../max-backedge-taken-count-guard-info.ll | 2 +- .../LoopIdiom/X86/memset-size-compute.ll | 8 ++++---- .../Transforms/LoopVectorize/reduction.ll | 20 +++++++++---------- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 13b9aa28c827b..9248bf007d025 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1795,6 +1795,18 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } + const SCEVConstant *C; + const SCEV *A; + // zext (C + A) -> (sext(C) + zext(A)) if zext (C + A) >=s 0 + // and A >=s V. + if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) && + match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) && + isKnownPredicate(CmpInst::ICMP_SGE, A, C)) { + SmallVector Ops = {getSignExtendExpr(C, Ty, Depth + 1), + getZeroExtendExpr(A, Ty, Depth + 1)}; + return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); + } + // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) // if D + (C - D + x + y + ...) could be proven to not unsigned wrap // where D maximizes the number of trailing zeros of (C - D + x + y + ...) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 9bf2427eddb9c..1a04b0c72cf2c 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) { ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,3) S: [0,3) Exits: (-1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv -; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64)) + %pred) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: (-2 + (2 * (zext i32 %N to i64)) + %pred) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3 diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll index ea2cfe74be264..feef268bc7412 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll @@ -15,11 +15,11 @@ define void @test(ptr %ptr) { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8 -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LIM_0]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], -8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index aa1ac25182bb5..28bdc77409927 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -1199,13 +1199,13 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1222,7 +1222,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -1277,13 +1277,13 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1300,7 +1300,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] From a6c01871a4f0f31ca54a03a6716964460b4df836 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Jun 2025 16:37:46 +0100 Subject: [PATCH 2/4] !fixup drop A >= C requierement --- llvm/lib/Analysis/ScalarEvolution.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 9248bf007d025..a14598bb702e8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1797,13 +1797,11 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, const SCEVConstant *C; const SCEV *A; - // zext (C + A) -> (sext(C) + zext(A)) if zext (C + A) >=s 0 - // and A >=s V. + // zext (C + A) -> (sext(C) + sext(A)) if zext (C + A) >=s 0. if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) && - match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) && - isKnownPredicate(CmpInst::ICMP_SGE, A, C)) { + match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A)))) { SmallVector Ops = {getSignExtendExpr(C, Ty, Depth + 1), - getZeroExtendExpr(A, Ty, Depth + 1)}; + getSignExtendExpr(A, Ty, Depth + 1)}; return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); } From 9a6bedbc99bcd0dca0cddb93e35e551b53a4246c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Jun 2025 19:58:47 +0100 Subject: [PATCH 3/4] !fixup add test showing regression. --- .../Transforms/IndVarSimplify/add-nsw-zext-fold.ll | 13 +++++++------ llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll | 7 ++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll index 1de41e47a8569..c263b3f2d060b 100644 --- a/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll +++ b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll @@ -12,14 +12,15 @@ define void @add_nsw_zext_fold_results_in_sext(i64 %len) { ; CHECK-NEXT: [[LEN_TRUNC:%.*]] = trunc i64 [[LEN]] to i32 ; CHECK-NEXT: [[LZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[LEN_TRUNC]], i1 false) ; CHECK-NEXT: [[SUB_I:%.*]] = lshr i32 [[LZ]], 3 -; CHECK-NEXT: [[ADD_I:%.*]] = sub i32 5, [[SUB_I]] ; CHECK-NEXT: [[PRECOND:%.*]] = icmp eq i32 [[SUB_I]], 5 ; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] ; CHECK: [[LOOP_PREHEADER]]: -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[ADD_I]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[SUB_I]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 5 ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], %[[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[SH_PROM:%.*]] = zext nneg i32 [[IV_NEXT]] to i64 @@ -65,9 +66,9 @@ define void @add_nsw_zext_fold_results_in_sext_known_positive(i32 %mask, ptr %sr ; CHECK-NEXT: [[PRECOND:%.*]] = icmp slt i32 [[ADD]], 0 ; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[PH:.*]] ; CHECK: [[PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 78, [[SPEC_SELECT]] -; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 0, [[SPEC_SELECT]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 79 ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP2]] diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll index df32e60d5065a..2efb72a017899 100644 --- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll +++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll @@ -9,9 +9,10 @@ define void @fold_add_zext_to_sext(ptr %dst, i1 %start) { ; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[START]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 25, [[START_EXT]] -; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[START_EXT]] +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP5]], 100 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: From 6923420f57d7b7bae7fe2edb62a0b42e0fab21c8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 5 Jun 2025 20:09:55 +0100 Subject: [PATCH 4/4] !fixup use getAddExpr taking 2 SCEVs. --- llvm/lib/Analysis/ScalarEvolution.cpp | 6 +++--- .../LoopVectorize/AArch64/predicated-costs.ll | 4 ++-- .../Transforms/LoopVectorize/runtime-check.ll | 16 +++++++--------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a14598bb702e8..350425eeb7c45 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1800,9 +1800,9 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // zext (C + A) -> (sext(C) + sext(A)) if zext (C + A) >=s 0. if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) && match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A)))) { - SmallVector Ops = {getSignExtendExpr(C, Ty, Depth + 1), - getSignExtendExpr(A, Ty, Depth + 1)}; - return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); + return getAddExpr(getSignExtendExpr(C, Ty, Depth + 1), + getSignExtendExpr(A, Ty, Depth + 1), SCEV::FlagNSW, + Depth + 1); } // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index 019d2ee9886a6..a2ee1f3323489 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -47,8 +47,8 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[OFF]], 3 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP18]] ; CHECK-NEXT: [[SMAX7:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4) -; CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[SMAX7]], -1 -; CHECK-NEXT: [[TMP20:%.*]] = zext nneg i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = zext nneg i32 [[SMAX7]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[TMP19]], -1 ; CHECK-NEXT: [[TMP21:%.*]] = lshr i64 [[TMP20]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP21]], 9 ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index f4f29689d9cfb..5f92a281ed7c6 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -492,14 +492,12 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 12 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[LEN]], -7 -; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[LEN]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[TMP3]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TMP4]], i64 14 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[IN]], [[TMP4]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -511,11 +509,11 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META42:![0-9]+]], !noalias [[META45:![0-9]+]] -; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META45]] +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -530,7 +528,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: store i32 0, ptr [[IN]], align 4 ; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]] -; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP48:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ;