From e5374bdb9f893a22785fa72b2bc002e92afb9c2b Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Wed, 13 Aug 2025 18:18:57 -0700 Subject: [PATCH 1/4] [VPlan] optimize BranchCond for canonical WidenIntOrFpInduction recipe --- .../Transforms/Vectorize/VPlanTransforms.cpp | 17 +- .../AArch64/conditional-branches-cost.ll | 180 ++++-------- ...eave-to-widen-memory-remove-loop-region.ll | 35 +-- .../LoopVectorize/X86/constant-fold.ll | 50 ++-- .../X86/drop-poison-generating-flags.ll | 259 +++++------------- .../LoopVectorize/X86/interleave-cost.ll | 32 +-- ...outer_loop_test1_no_explicit_vect_width.ll | 15 +- .../LoopVectorize/cast-induction.ll | 6 +- .../Transforms/LoopVectorize/uniform-blend.ll | 48 +--- 9 files changed, 201 insertions(+), 441 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c999ef2d666ba..5d0225508f259 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1498,10 +1498,21 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, // the region, otherwise replace the terminator controlling the latch with // (BranchOnCond true). auto *Header = cast(VectorRegion->getEntry()); - if (all_of(Header->phis(), - IsaPred)) { + if (all_of(Header->phis(), [](VPRecipeBase &Phi) { + if (auto *R = dyn_cast(&Phi)) + return R->isCanonical(); + return isa(&Phi); + })) { for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) { + if (auto *R = dyn_cast(&HeaderR)) { + VPBuilder Builder(Plan.getVectorPreheader()); + VPValue *StepV = Builder.createNaryOp(VPInstruction::StepVector, {}, + R->getScalarType()); + HeaderR.getVPSingleValue()->replaceAllUsesWith(StepV); + HeaderR.eraseFromParent(); + continue; + } auto *Phi = cast(&HeaderR); HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0)); HeaderR.eraseFromParent(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index a2c6a21796e8f..62c1b7b1fcf2f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -771,85 +771,55 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6) -; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] -; DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0 -; DEFAULT-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 +; DEFAULT-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 +; DEFAULT-NEXT: store i8 0, ptr [[TMP0]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; DEFAULT: [[PRED_STORE_IF1]]: -; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] -; DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1 -; DEFAULT-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 +; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1 +; DEFAULT-NEXT: store i8 1, ptr [[TMP1]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; DEFAULT: [[PRED_STORE_CONTINUE2]]: -; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; DEFAULT: [[PRED_STORE_IF3]]: -; DEFAULT-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] -; DEFAULT-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2 -; DEFAULT-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2 +; DEFAULT-NEXT: store i8 2, ptr [[TMP2]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; DEFAULT: [[PRED_STORE_CONTINUE4]]: -; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; DEFAULT: [[PRED_STORE_IF5]]: -; DEFAULT-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3 -; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]] -; DEFAULT-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3 -; DEFAULT-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3 +; DEFAULT-NEXT: store i8 3, ptr [[TMP3]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; DEFAULT: [[PRED_STORE_CONTINUE6]]: -; DEFAULT-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; DEFAULT: [[PRED_STORE_IF7]]: -; DEFAULT-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] -; DEFAULT-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4 -; DEFAULT-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1 +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4 +; DEFAULT-NEXT: store i8 4, ptr [[TMP4]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]] ; DEFAULT: [[PRED_STORE_CONTINUE8]]: -; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; DEFAULT: [[PRED_STORE_IF9]]: -; DEFAULT-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5 -; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]] -; DEFAULT-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5 -; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5 +; DEFAULT-NEXT: store i8 5, ptr [[TMP5]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]] ; DEFAULT: [[PRED_STORE_CONTINUE10]]: -; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] ; DEFAULT: [[PRED_STORE_IF11]]: -; DEFAULT-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6 -; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]] -; DEFAULT-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6 -; DEFAULT-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6 +; DEFAULT-NEXT: store i8 6, ptr [[TMP6]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; DEFAULT: [[PRED_STORE_CONTINUE12]]: -; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] +; DEFAULT-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; DEFAULT: [[PRED_STORE_IF13]]: -; DEFAULT-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7 -; DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]] -; DEFAULT-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7 -; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 +; DEFAULT-NEXT: store i8 7, ptr [[TMP7]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; DEFAULT: [[PRED_STORE_CONTINUE14]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[EXIT:.*]] ; DEFAULT: [[SCALAR_PH]]: @@ -861,7 +831,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 -; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: [[EXIT]]: ; DEFAULT-NEXT: ret void ; @@ -872,85 +842,55 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ] -; PRED-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6) -; PRED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; PRED-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; PRED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] -; PRED-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0 -; PRED-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 +; PRED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 +; PRED-NEXT: store i8 0, ptr [[TMP0]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] ; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; PRED-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; PRED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] -; PRED-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1 -; PRED-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 +; PRED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1 +; PRED-NEXT: store i8 1, ptr [[TMP1]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; PRED: [[PRED_STORE_IF3]]: -; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 -; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] -; PRED-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2 -; PRED-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1 +; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2 +; PRED-NEXT: store i8 2, ptr [[TMP2]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; PRED: [[PRED_STORE_CONTINUE4]]: -; PRED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; PRED-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; PRED: [[PRED_STORE_IF5]]: -; PRED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3 -; PRED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]] -; PRED-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3 -; PRED-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1 +; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3 +; PRED-NEXT: store i8 3, ptr [[TMP3]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; PRED: [[PRED_STORE_IF7]]: -; PRED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4 -; PRED-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1 +; PRED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4 +; PRED-NEXT: store i8 4, ptr [[TMP4]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]] ; PRED: [[PRED_STORE_CONTINUE8]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; PRED: [[PRED_STORE_IF9]]: -; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5 -; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]] -; PRED-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5 -; PRED-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1 +; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5 +; PRED-NEXT: store i8 5, ptr [[TMP5]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]] ; PRED: [[PRED_STORE_CONTINUE10]]: -; PRED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; PRED-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] ; PRED: [[PRED_STORE_IF11]]: -; PRED-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6 -; PRED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]] -; PRED-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6 -; PRED-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1 +; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6 +; PRED-NEXT: store i8 6, ptr [[TMP6]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; PRED: [[PRED_STORE_CONTINUE12]]: -; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; PRED-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] +; PRED-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; PRED: [[PRED_STORE_IF13]]: -; PRED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7 -; PRED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]] -; PRED-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7 -; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 +; PRED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 +; PRED-NEXT: store i8 7, ptr [[TMP7]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; PRED: [[PRED_STORE_CONTINUE14]]: -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) -; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; PRED-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -962,7 +902,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -1163,7 +1103,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -1194,7 +1134,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT: [[LOOP_LATCH]]: ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP27:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] ; DEFAULT: [[EXIT]]: ; DEFAULT-NEXT: ret void ; @@ -1383,7 +1323,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -1413,7 +1353,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: [[LOOP_LATCH]]: ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -1470,7 +1410,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: @@ -1486,7 +1426,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21 -; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP28:![0-9]+]] ; DEFAULT: [[EXIT]]: ; DEFAULT-NEXT: ret void ; @@ -1533,7 +1473,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -1548,7 +1488,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 21 -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 000e09004368e..764ac9a103170 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -48,14 +48,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4: [[VECTOR_PH]]: ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4: [[VECTOR_BODY]]: -; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] -; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) -; VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; VF4-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF4: [[PRED_STORE_IF]]: -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 1 +; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 0, 1 ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]] ; VF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 ; VF4-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 8 @@ -65,11 +60,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF4: [[PRED_STORE_CONTINUE]]: -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; VF4-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; VF4: [[PRED_STORE_IF1]]: -; VF4-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; VF4-NEXT: [[TMP11:%.*]] = shl nsw i64 [[TMP10]], 1 +; VF4-NEXT: [[TMP11:%.*]] = shl nsw i64 1, 1 ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP11]] ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 @@ -79,11 +72,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; VF4: [[PRED_STORE_CONTINUE2]]: -; VF4-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; VF4-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; VF4-NEXT: br i1 false, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; VF4: [[PRED_STORE_IF3]]: -; VF4-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP18]], 1 +; VF4-NEXT: [[TMP19:%.*]] = shl nsw i64 2, 1 ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]] ; VF4-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 ; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8 @@ -93,11 +84,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; VF4: [[PRED_STORE_CONTINUE4]]: -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] +; VF4-NEXT: br i1 false, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; VF4: [[PRED_STORE_IF5]]: -; VF4-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP27:%.*]] = shl nsw i64 [[TMP26]], 1 +; VF4-NEXT: [[TMP27:%.*]] = shl nsw i64 3, 1 ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]] ; VF4-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 @@ -107,9 +96,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; VF4: [[PRED_STORE_CONTINUE6]]: -; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) -; VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] ; VF4: [[SCALAR_PH]]: @@ -126,7 +113,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[L_1]], ptr [[DATA_1]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 2 -; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: [[EXIT]]: ; VF4-NEXT: ret void ; @@ -318,7 +305,7 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF4-NEXT: store float [[ADD_1]], ptr [[GEP_RES_1]], align 4 ; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 -; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: [[EXIT]]: ; VF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index c7a0bcb71d112..b06119cb0c7cc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -71,46 +71,37 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2) -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: @@ -130,7 +121,7 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -173,46 +164,37 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2) -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: @@ -232,7 +214,7 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 84e36cbb33552..22667ae324d03 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -27,19 +27,11 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0:![0-9]+]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -77,19 +69,11 @@ define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %ou ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -127,19 +111,11 @@ define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -178,19 +154,11 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[TMP3]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -228,20 +196,11 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[TMP2]], splat (i64 2) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -280,22 +239,13 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> [[TMP2]] -; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> +; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[PTRS]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP5]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -337,18 +287,11 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 0, 1 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1, 1 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 2, 1 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 3, 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]] @@ -358,14 +301,11 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 ; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -406,16 +346,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: store <4 x i64> , ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -451,22 +383,10 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = sdiv i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[INPUT]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -519,7 +439,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -558,22 +478,11 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -615,16 +524,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: store <4 x i64> , ptr [[OUTPUT]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; entry: @@ -718,15 +619,12 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], poison +; CHECK-NEXT: [[TMP4:%.*]] = add i64 0, poison ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0 @@ -736,8 +634,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], poison +; CHECK-NEXT: [[TMP11:%.*]] = add i64 1, poison ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP11]] ; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP8]], i8 [[TMP26]], i32 1 @@ -747,8 +644,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison +; CHECK-NEXT: [[TMP18:%.*]] = add i64 2, poison ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP29]], i8 [[TMP20]], i32 2 @@ -756,10 +652,9 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK: [[PRED_LOAD_CONTINUE4]]: ; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP29]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP30]], poison +; CHECK-NEXT: [[TMP12:%.*]] = add i64 3, poison ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]] ; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 @@ -767,11 +662,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK: [[PRED_LOAD_CONTINUE6]]: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP15]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[DST]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -814,15 +706,12 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP9]], poison +; CHECK-NEXT: [[TMP16:%.*]] = add i64 0, poison ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]] ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0 @@ -832,8 +721,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP26]], poison +; CHECK-NEXT: [[TMP29:%.*]] = add i64 1, poison ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP30]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP25]], i8 [[TMP13]], i32 1 @@ -843,8 +731,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison +; CHECK-NEXT: [[TMP18:%.*]] = add i64 2, poison ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP20]], i32 2 @@ -852,10 +739,9 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK: [[PRED_LOAD_CONTINUE4]]: ; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP15]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP31]], poison +; CHECK-NEXT: [[TMP7:%.*]] = add i64 3, poison ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]] ; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 @@ -866,11 +752,8 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[DST]], align 4 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -926,7 +809,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 6d562be03a9b0..74bb8415d0ebb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -512,16 +512,8 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -535,7 +527,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -633,16 +625,8 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -656,7 +640,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -696,8 +680,6 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} -; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]} -; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 1225e0fc583e1..732983a708c51 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -67,24 +67,19 @@ define void @foo(i32 %n) { ; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX-NEXT: br label %[[VECTOR_BODY:.*]] ; AVX: [[VECTOR_BODY]]: -; AVX-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] -; AVX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> [[VEC_IND]] -; AVX-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> -; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true)) -; AVX-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> -; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP7]], [[BROADCAST_SPLAT]] +; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> , <8 x ptr> getelementptr inbounds ([8 x i32], ptr @arr2, <8 x i64> zeroinitializer, <8 x i64> ), i32 4, <8 x i1> splat (i1 true)) +; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> , [[BROADCAST_SPLAT]] ; AVX-NEXT: br label %[[FOR_BODY31:.*]] ; AVX: [[FOR_BODY31]]: ; AVX-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] -; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> [[VEC_PHI]], <8 x i64> [[VEC_IND]] +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> [[VEC_PHI]], <8 x i64> ; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP2]], <8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true)) ; AVX-NEXT: [[TMP4]] = add nuw nsw <8 x i64> [[VEC_PHI]], splat (i64 1) ; AVX-NEXT: [[TMP5:%.*]] = icmp eq <8 x i64> [[TMP4]], splat (i64 8) ; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 -; AVX-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[FOR_BODY31]] +; AVX-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH:.*]], label %[[FOR_BODY31]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX: [[VECTOR_LATCH]]: -; AVX-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; AVX-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; AVX-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; AVX: [[MIDDLE_BLOCK]]: ; AVX-NEXT: br i1 true, [[FOR_END10:label %.*]], label %[[SCALAR_PH]] ; AVX: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 937bc1807d8a8..0b58bb65e2bc5 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -114,10 +114,8 @@ exit: define void @cast_induction_tail_folding(ptr %A) { ; VF4-LABEL: @cast_induction_tail_folding( -; VF4: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] -; VF4-NEXT: [[VEC_IND:%.+]] = phi <4 x i32> [ , %vector.ph ] -; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], splat (i32 2) -; VF4-NEXT: = sext <4 x i32> [[VEC_IND]] to <4 x i64> +; VF4-LABEL: vector.body: +; VF4-NEXT: br i1 true, label %pred.store.if, label %pred.store.continue ; IC2-LABEL: @cast_induction_tail_folding( ; IC2: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index b500acb797828..ac530d2058ff1 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -216,47 +216,36 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: ; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP14]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP18]], i32 2 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] ; CHECK: [[PRED_LOAD_CONTINUE4]]: ; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP16]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] ; CHECK: [[PRED_LOAD_IF5]]: ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP22]], i32 3 @@ -265,37 +254,31 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP25:%.*]] = add <4 x i32> [[TMP24]], splat (i32 10) ; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i32> [[TMP24]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0 ; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; CHECK: [[PRED_STORE_IF7]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i32 1 ; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP6]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] ; CHECK: [[PRED_STORE_CONTINUE8]]: -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; CHECK: [[PRED_STORE_IF9]]: ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP26]], i32 2 ; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP7]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] ; CHECK: [[PRED_STORE_CONTINUE10]]: -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] ; CHECK: [[PRED_STORE_IF11]]: ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP26]], i32 3 ; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; CHECK: [[PRED_STORE_CONTINUE12]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -313,7 +296,7 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: store i32 [[RES]], ptr [[GEP_IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -349,6 +332,5 @@ exit: ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ;. From 851cf773896e8a8be73c505e2b8a3f380fcac7ef Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 15 Aug 2025 01:57:22 -0700 Subject: [PATCH 2/4] Add TODO --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5d0225508f259..be7a7c1db4d0c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1497,6 +1497,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, // The vector loop region only executes once. If possible, completely remove // the region, otherwise replace the terminator controlling the latch with // (BranchOnCond true). + // TODO: Support all widen induction recipes (e.g., + // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe). auto *Header = cast(VectorRegion->getEntry()); if (all_of(Header->phis(), [](VPRecipeBase &Phi) { if (auto *R = dyn_cast(&Phi)) From 12608094df734ee7e268485a28e3f86e65549869 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 19 Aug 2025 00:04:36 -0700 Subject: [PATCH 3/4] Update TODO --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index be7a7c1db4d0c..ef863b6f55d7b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1497,8 +1497,9 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, // The vector loop region only executes once. If possible, completely remove // the region, otherwise replace the terminator controlling the latch with // (BranchOnCond true). - // TODO: Support all widen induction recipes (e.g., - // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe). + // TODO: VPWidenIntOrFpInductionRecipe is only partially supported; add + // support for other non-canonical widen induction recipes (e.g., + // VPWidenPointerInductionRecipe). auto *Header = cast(VectorRegion->getEntry()); if (all_of(Header->phis(), [](VPRecipeBase &Phi) { if (auto *R = dyn_cast(&Phi)) From d90c9ec03ab55f567802fb5b72a8f6b6188fc98c Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 19 Aug 2025 16:24:06 -0700 Subject: [PATCH 4/4] Update tests after rebase --- .../AArch64/conditional-branches-cost.ll | 250 +++++------------- .../single-early-exit-cond-poison.ll | 30 +-- 2 files changed, 74 insertions(+), 206 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 01f2ea080377d..c914aa3582e5c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -609,185 +609,65 @@ exit: } define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { -; DEFAULT-LABEL: define void @low_trip_count_fold_tail_scalarized_store( -; DEFAULT-SAME: ptr [[DST:%.*]]) { -; DEFAULT-NEXT: [[ENTRY:.*:]] -; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6) -; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] -; DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0 -; DEFAULT-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] -; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; DEFAULT: [[PRED_STORE_IF1]]: -; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] -; DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1 -; DEFAULT-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; DEFAULT: [[PRED_STORE_CONTINUE2]]: -; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; DEFAULT: [[PRED_STORE_IF3]]: -; DEFAULT-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] -; DEFAULT-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2 -; DEFAULT-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; DEFAULT: [[PRED_STORE_CONTINUE4]]: -; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] -; DEFAULT: [[PRED_STORE_IF5]]: -; DEFAULT-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3 -; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]] -; DEFAULT-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3 -; DEFAULT-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; DEFAULT: [[PRED_STORE_CONTINUE6]]: -; DEFAULT-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] -; DEFAULT: [[PRED_STORE_IF7]]: -; DEFAULT-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] -; DEFAULT-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4 -; DEFAULT-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]] -; DEFAULT: [[PRED_STORE_CONTINUE8]]: -; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] -; DEFAULT: [[PRED_STORE_IF9]]: -; DEFAULT-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5 -; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]] -; DEFAULT-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5 -; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]] -; DEFAULT: [[PRED_STORE_CONTINUE10]]: -; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] -; DEFAULT: [[PRED_STORE_IF11]]: -; DEFAULT-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6 -; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]] -; DEFAULT-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6 -; DEFAULT-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]] -; DEFAULT: [[PRED_STORE_CONTINUE12]]: -; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] -; DEFAULT: [[PRED_STORE_IF13]]: -; DEFAULT-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7 -; DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]] -; DEFAULT-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7 -; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] -; DEFAULT: [[PRED_STORE_CONTINUE14]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br [[EXIT:label %.*]] -; DEFAULT: [[SCALAR_PH]]: -; -; PRED-LABEL: define void @low_trip_count_fold_tail_scalarized_store( -; PRED-SAME: ptr [[DST:%.*]]) { -; PRED-NEXT: [[ENTRY:.*:]] -; PRED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ] -; PRED-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6) -; PRED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; PRED-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; PRED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] -; PRED-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0 -; PRED-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; PRED-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; PRED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] -; PRED-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1 -; PRED-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; PRED: [[PRED_STORE_IF3]]: -; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 -; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] -; PRED-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2 -; PRED-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; PRED: [[PRED_STORE_CONTINUE4]]: -; PRED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; PRED-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] -; PRED: [[PRED_STORE_IF5]]: -; PRED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3 -; PRED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]] -; PRED-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3 -; PRED-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] -; PRED: [[PRED_STORE_IF7]]: -; PRED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4 -; PRED-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]] -; PRED: [[PRED_STORE_CONTINUE8]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] -; PRED: [[PRED_STORE_IF9]]: -; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5 -; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]] -; PRED-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5 -; PRED-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]] -; PRED: [[PRED_STORE_CONTINUE10]]: -; PRED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; PRED-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] -; PRED: [[PRED_STORE_IF11]]: -; PRED-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6 -; PRED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]] -; PRED-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6 -; PRED-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]] -; PRED: [[PRED_STORE_CONTINUE12]]: -; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; PRED-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] -; PRED: [[PRED_STORE_IF13]]: -; PRED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7 -; PRED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]] -; PRED-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7 -; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] -; PRED: [[PRED_STORE_CONTINUE14]]: -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) -; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br [[EXIT:label %.*]] -; PRED: [[SCALAR_PH]]: +; COMMON-LABEL: define void @low_trip_count_fold_tail_scalarized_store( +; COMMON-SAME: ptr [[DST:%.*]]) { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; COMMON: [[VECTOR_PH]]: +; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] +; COMMON: [[VECTOR_BODY]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; COMMON: [[PRED_STORE_IF]]: +; COMMON-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 +; COMMON-NEXT: store i8 0, ptr [[TMP0]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]] +; COMMON: [[PRED_STORE_CONTINUE]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; COMMON: [[PRED_STORE_IF1]]: +; COMMON-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1 +; COMMON-NEXT: store i8 1, ptr [[TMP1]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; COMMON: [[PRED_STORE_CONTINUE2]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; COMMON: [[PRED_STORE_IF3]]: +; COMMON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2 +; COMMON-NEXT: store i8 2, ptr [[TMP2]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; COMMON: [[PRED_STORE_CONTINUE4]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; COMMON: [[PRED_STORE_IF5]]: +; COMMON-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3 +; COMMON-NEXT: store i8 3, ptr [[TMP3]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; COMMON: [[PRED_STORE_CONTINUE6]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; COMMON: [[PRED_STORE_IF7]]: +; COMMON-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4 +; COMMON-NEXT: store i8 4, ptr [[TMP4]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; COMMON: [[PRED_STORE_CONTINUE8]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; COMMON: [[PRED_STORE_IF9]]: +; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5 +; COMMON-NEXT: store i8 5, ptr [[TMP5]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; COMMON: [[PRED_STORE_CONTINUE10]]: +; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; COMMON: [[PRED_STORE_IF11]]: +; COMMON-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6 +; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; COMMON: [[PRED_STORE_CONTINUE12]]: +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; COMMON: [[PRED_STORE_IF13]]: +; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 +; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; COMMON: [[PRED_STORE_CONTINUE14]]: +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br [[EXIT:label %.*]] +; COMMON: [[SCALAR_PH]]: ; entry: br label %loop @@ -986,7 +866,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -1177,7 +1057,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: @@ -1235,7 +1115,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: @@ -1283,7 +1163,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: @@ -1409,7 +1289,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: store [[TMP23]], ptr [[TMP24]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; DEFAULT-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -1471,7 +1351,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true -; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index 660212378ae69..ad702595fdcfe 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -16,20 +16,13 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; VF4IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4IC2: [[VECTOR_BODY]]: -; VF4IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4IC2-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4IC2-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; VF4IC2-NEXT: [[TMP0:%.*]] = shl nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3) -; VF4IC2-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i32> [[STEP_ADD]], splat (i32 3) -; VF4IC2-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]] -; VF4IC2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], [[TMP1]] +; VF4IC2-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], +; VF4IC2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], ; VF4IC2-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer ; VF4IC2-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer -; VF4IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; VF4IC2-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP5]] ; VF4IC2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) -; VF4IC2-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) -; VF4IC2-NEXT: br i1 true, label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4IC2-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF4IC2: [[MIDDLE_SPLIT]]: ; VF4IC2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; VF4IC2-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] @@ -43,7 +36,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4 ; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]] ; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; VF4IC2-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] +; VF4IC2-NEXT: [[TMP16:%.*]] = add i32 0, [[TMP15]] ; VF4IC2-NEXT: br label %[[RETURN]] ; VF4IC2: [[SCALAR_PH]]: ; VF4IC2-NEXT: br label %[[LOOP_HEADER:.*]] @@ -56,7 +49,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2: [[LOOP_LATCH]]: ; VF4IC2-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; VF4IC2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 -; VF4IC2-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4IC2-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4IC2: [[RETURN]]: ; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT]] ] ; VF4IC2-NEXT: ret i32 [[RES]] @@ -70,15 +63,10 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; VF8IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8IC1: [[VECTOR_BODY]]: -; VF8IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8IC1-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8IC1-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 3) -; VF8IC1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], [[TMP0]] +; VF8IC1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], ; VF8IC1-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer -; VF8IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) -; VF8IC1-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) -; VF8IC1-NEXT: br i1 true, label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8IC1-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF8IC1: [[MIDDLE_SPLIT]]: ; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 ; VF8IC1-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] @@ -87,7 +75,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1: [[VECTOR_EARLY_EXIT]]: ; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], [[TMP6]] +; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]] ; VF8IC1-NEXT: br label %[[RETURN]] ; VF8IC1: [[SCALAR_PH]]: ; VF8IC1-NEXT: br label %[[LOOP_HEADER:.*]] @@ -100,7 +88,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1: [[LOOP_LATCH]]: ; VF8IC1-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; VF8IC1-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 -; VF8IC1-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8IC1-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; VF8IC1: [[RETURN]]: ; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ] ; VF8IC1-NEXT: ret i32 [[RES]]