diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6c382549ee099..8eb2d21dfeb25 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -27,8 +27,8 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" @@ -939,12 +939,12 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } -/// Try to fold \p R using TargetFolder to a constant. Will succeed and return a -/// non-nullptr Value for a handled \p Opcode if all \p Operands are constant. -static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode, - ArrayRef Operands, - const DataLayout &DL, - VPTypeAnalysis &TypeInfo) { +/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a +/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are +/// foldable live-ins. +static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, + ArrayRef Operands, + const DataLayout &DL, VPTypeAnalysis &TypeInfo) { SmallVector Ops; for (VPValue *Op : Operands) { if (!Op->isLiveIn() || !Op->getLiveInIRValue()) @@ -952,7 +952,7 @@ static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode, Ops.push_back(Op->getLiveInIRValue()); } - TargetFolder Folder(DL); + InstSimplifyFolder Folder(DL); if (Instruction::isBinaryOp(Opcode)) return Folder.FoldBinOp(static_cast(Opcode), Ops[0], Ops[1]); @@ -994,15 +994,16 @@ static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode, static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { using namespace llvm::VPlanPatternMatch; - // Constant folding. + // Simplification of live-in IR values for SingleDef recipes using + // InstSimplifyFolder. if (TypeSwitch(&R) .Case([&](auto *I) { VPlan *Plan = R.getParent()->getPlan(); const DataLayout &DL = Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); - Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL, - TypeInfo); + Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL, + TypeInfo); if (V) I->replaceAllUsesWith(Plan->getOrAddLiveIn(V)); return V; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index 1c91b4956b309..8c2a48aa38695 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -79,11 +79,10 @@ define void @powi_call(ptr %P) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[P]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[WIDE_LOAD]], i32 3) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[P]], i32 0 ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP4]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 3e3d88e3cea2c..73ef8534bb66c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -160,45 +160,45 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw <16 x i16> [[TMP4]] to <16 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], [[BROADCAST_SPLATINSERT]] +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw <16 x i16> [[TMP3]] to <16 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16 -; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 -; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP7]], align 1 +; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[A]] to i16 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = trunc nuw <8 x i16> [[TMP15]] to <8 x i8> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> poison, i16 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], [[BROADCAST_SPLATINSERT3]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[TMP12]], +; CHECK-NEXT: [[TMP14:%.*]] = trunc nuw <8 x i16> [[TMP13]] to <8 x i8> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i8> [[TMP14]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[INDEX7]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]] -; CHECK-NEXT: store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[INDEX7]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP16]] +; CHECK-NEXT: store <8 x i8> [[TMP15]], ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX7]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 -; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index cc972d3040adb..e97bb857fdba3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -23,16 +23,16 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -47,16 +47,16 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[WIDE_LOAD5]], splat (i8 2) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX4]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD5]], splat (i8 2) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -70,8 +70,8 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] @@ -124,16 +124,16 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -148,16 +148,16 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i8> [[WIDE_LOAD5]], splat (i8 2) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX4]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw <4 x i8> [[WIDE_LOAD5]], splat (i8 2) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -171,8 +171,8 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[TMP15]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[TMP13]], 2 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -216,16 +216,16 @@ define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[TMP5]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -239,8 +239,8 @@ define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV8:%.*]] = zext i16 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[CONV8:%.*]] = zext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV8]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] @@ -291,17 +291,17 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i16> [[TMP3]], splat (i16 2) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -316,17 +316,17 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[WIDE_LOAD5]] to <4 x i16> -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i16> [[TMP12]], splat (i16 2) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0 -; CHECK-NEXT: store <4 x i16> [[TMP13]], ptr [[TMP15]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[WIDE_LOAD5]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i16> [[TMP10]], splat (i16 2) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX4]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 +; CHECK-NEXT: store <4 x i16> [[TMP11]], ptr [[TMP13]], align 2 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -340,8 +340,8 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP17]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP15]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] @@ -389,17 +389,17 @@ define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i32> [[TMP3]], splat (i32 2) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -413,8 +413,8 @@ define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 @@ -462,32 +462,30 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[ARG1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i8> poison, i8 [[ARG2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4) -; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32) -; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51) -; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60) -; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4) -; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4) +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[TMP3]], splat (i8 32) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51) +; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], splat (i8 60) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -4) +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[TMP8]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP10:%.*]] = mul <16 x i8> [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP12]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -499,32 +497,30 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[ARG2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX10]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shl <4 x i8> [[WIDE_LOAD11]], splat (i8 4) -; CHECK-NEXT: [[TMP23:%.*]] = add <4 x i8> [[TMP22]], splat (i8 32) -; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i8> [[WIDE_LOAD11]], splat (i8 51) -; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i8> [[TMP24]], splat (i8 60) -; CHECK-NEXT: [[TMP26:%.*]] = and <4 x i8> [[TMP23]], [[TMP17]] -; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP25]], splat (i8 -4) -; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i8> [[TMP27]], [[TMP18]] -; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], [[TMP26]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX10]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX10]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = shl <4 x i8> [[WIDE_LOAD11]], splat (i8 4) +; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i8> [[TMP16]], splat (i8 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i8> [[WIDE_LOAD11]], splat (i8 51) +; CHECK-NEXT: [[TMP19:%.*]] = mul <4 x i8> [[TMP18]], splat (i8 60) +; CHECK-NEXT: [[TMP20:%.*]] = and <4 x i8> [[TMP17]], [[BROADCAST_SPLAT7]] +; CHECK-NEXT: [[TMP21:%.*]] = and <4 x i8> [[TMP19]], splat (i8 -4) +; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i8> [[TMP21]], [[BROADCAST_SPLAT9]] +; CHECK-NEXT: [[TMP23:%.*]] = mul <4 x i8> [[TMP22]], [[TMP20]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX10]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0 +; CHECK-NEXT: store <4 x i8> [[TMP23]], ptr [[TMP25]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -538,8 +534,8 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP27]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4 ; CHECK-NEXT: [[CONV2:%.*]] = add nuw nsw i32 [[ADD]], 32 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], 51 @@ -608,34 +604,32 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[ARG1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i8> poison, i8 [[ARG2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4) -; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32) -; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52) -; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51) -; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60) -; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]] -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4) -; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i8> [[TMP3]], splat (i8 4) +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP4]], splat (i8 32) +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i8> [[TMP3]], splat (i8 -52) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i8> [[TMP6]], splat (i8 51) +; CHECK-NEXT: [[TMP8:%.*]] = mul <16 x i8> [[TMP7]], splat (i8 60) +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP8]], splat (i8 -4) +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i8> [[TMP10]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP12:%.*]] = mul <16 x i8> [[TMP11]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -647,34 +641,32 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[ARG2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX10]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i16>, ptr [[TMP23]], align 2 -; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i16> [[WIDE_LOAD11]] to <4 x i8> -; CHECK-NEXT: [[TMP25:%.*]] = shl <4 x i8> [[TMP24]], splat (i8 4) -; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i8> [[TMP25]], splat (i8 32) -; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP24]], splat (i8 -52) -; CHECK-NEXT: [[TMP28:%.*]] = or <4 x i8> [[TMP27]], splat (i8 51) -; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], splat (i8 60) -; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], [[TMP19]] -; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP29]], splat (i8 -4) -; CHECK-NEXT: [[TMP32:%.*]] = xor <4 x i8> [[TMP31]], [[TMP20]] -; CHECK-NEXT: [[TMP33:%.*]] = mul <4 x i8> [[TMP32]], [[TMP30]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX10]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP33]], ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX10]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i16>, ptr [[TMP17]], align 2 +; CHECK-NEXT: [[TMP18:%.*]] = trunc <4 x i16> [[WIDE_LOAD11]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = shl <4 x i8> [[TMP18]], splat (i8 4) +; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i8> [[TMP19]], splat (i8 32) +; CHECK-NEXT: [[TMP21:%.*]] = and <4 x i8> [[TMP18]], splat (i8 -52) +; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i8> [[TMP21]], splat (i8 51) +; CHECK-NEXT: [[TMP23:%.*]] = mul <4 x i8> [[TMP22]], splat (i8 60) +; CHECK-NEXT: [[TMP24:%.*]] = and <4 x i8> [[TMP20]], [[BROADCAST_SPLAT7]] +; CHECK-NEXT: [[TMP25:%.*]] = and <4 x i8> [[TMP23]], splat (i8 -4) +; CHECK-NEXT: [[TMP26:%.*]] = xor <4 x i8> [[TMP25]], [[BROADCAST_SPLAT9]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <4 x i8> [[TMP26]], [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX10]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 +; CHECK-NEXT: store <4 x i8> [[TMP27]], ptr [[TMP29]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4 -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -688,8 +680,8 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP37:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP37]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4 ; CHECK-NEXT: [[CONV2:%.*]] = add nsw i32 [[ADD]], 32 ; CHECK-NEXT: [[OR:%.*]] = and i32 [[CONV]], 204 @@ -758,21 +750,21 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2) -; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i32> [[TMP3]], splat (i32 2) +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP4]] to <16 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP3]], i32 15 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -787,8 +779,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP9]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] @@ -840,22 +832,22 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8> -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1 +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP5]], splat (i32 2) +; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP5]], i32 14 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP5]], i32 15 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -870,8 +862,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 04b859337e663..4d3afd71921d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -24,11 +24,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; DEFAULT-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; DEFAULT-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4 ; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: @@ -57,11 +56,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; OPTSIZE: [[VECTOR_BODY]]: -; OPTSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; OPTSIZE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; OPTSIZE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; OPTSIZE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; OPTSIZE-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; OPTSIZE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; OPTSIZE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; OPTSIZE-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4 ; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; OPTSIZE: [[MIDDLE_BLOCK]]: @@ -90,11 +88,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; MINSIZE: [[VECTOR_BODY]]: -; MINSIZE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; MINSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; MINSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; MINSIZE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; MINSIZE-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; MINSIZE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; MINSIZE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; MINSIZE-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 ; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; MINSIZE: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 72a1bb2c00e54..b92d8406f614a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -18,7 +18,6 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -36,7 +35,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP9]], i32 [[TMP17]] -; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14]], align 1 +; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14:%.*]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -48,7 +47,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_INVAR:%.*]] = getelementptr i8, ptr [[DST]], i64 0 +; CHECK-NEXT: [[GEP_INVAR:%.*]] = getelementptr i8, ptr [[TMP14]], i64 0 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_INVAR]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 220e450cf30d5..022789ad9de70 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -13,13 +13,12 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF2: [[VECTOR_PH]]: ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2: [[VECTOR_BODY]]: -; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0 -; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[DATA]], align 8 ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> ; VF2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> -; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA]], align 8 ; VF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -285,20 +284,17 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF4: [[VECTOR_PH]]: ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4: [[VECTOR_BODY]]: -; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 0 -; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 0 -; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 4 +; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[A]], align 4 ; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> ; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> -; VF4-NEXT: [[WIDE_VEC2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 +; VF4-NEXT: [[WIDE_VEC2:%.*]] = load <8 x float>, ptr [[B]], align 4 ; VF4-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> ; VF4-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> ; VF4-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[STRIDED_VEC]], [[STRIDED_VEC3]] ; VF4-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[STRIDED_VEC1]], [[STRIDED_VEC4]] -; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 0 ; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <8 x i32> ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> -; VF4-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 +; VF4-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[RES]], align 4 ; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index 2b2ab8da6638b..cb966e4088dbb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -18,11 +18,10 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 48 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 1ce272bcfb783..a7a7b1af5953b 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -24,11 +24,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; DEFAULT-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; DEFAULT-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4 ; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: @@ -57,11 +56,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; OPTSIZE: [[VECTOR_BODY]]: -; OPTSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; OPTSIZE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; OPTSIZE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; OPTSIZE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; OPTSIZE-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; OPTSIZE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; OPTSIZE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; OPTSIZE-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4 ; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; OPTSIZE: [[MIDDLE_BLOCK]]: @@ -90,11 +88,10 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; MINSIZE: [[VECTOR_BODY]]: -; MINSIZE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 0 -; MINSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; MINSIZE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; MINSIZE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; MINSIZE-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; MINSIZE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; MINSIZE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0 ; MINSIZE-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 ; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; MINSIZE: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 0f2b6b30ae0e1..17c737157a56a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -136,12 +136,10 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP10:%.*]] = shl [[WIDE_MASKED_LOAD]], splat (i8 1) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 @@ -154,10 +152,10 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[I_08]] ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[I_08]] ; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 @@ -194,12 +192,10 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP2]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 @@ -212,10 +208,10 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[I_08]] ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I_08]] ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 @@ -253,12 +249,10 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1) -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <32 x i8> [[TMP2]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 @@ -271,10 +265,10 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[I_08]] ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I_08]] ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 29e32131a491f..d9598bc662ac9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -8,8 +8,7 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TMP4:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 @@ -22,7 +21,7 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 [[IV]] ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1 ; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4 @@ -56,8 +55,7 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TMP6:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 @@ -70,7 +68,7 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 [[IV]] ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1 ; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll index 7816c4918761f..753ef69cb5f20 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -16,11 +16,9 @@ define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly % ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2:%.*]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 @@ -33,9 +31,9 @@ define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly % ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP6]], [[TMP7]] ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 85bcd97d50c7d..e5883351ae30d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -12,11 +12,10 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d) ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2 +; CHECK-NEXT: store i16 0, ptr [[DST]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 16eaf705f17de..ed1c67c082134 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -22,31 +22,25 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] ; CHECK: [[PRED_UREM_IF]]: -; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]] ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]] ; CHECK: [[PRED_UREM_CONTINUE]]: -; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]] ; CHECK: [[PRED_UREM_IF1]]: -; CHECK-NEXT: [[TMP6:%.*]] = urem i64 [[MUL]], [[X]] ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]] ; CHECK: [[PRED_UREM_CONTINUE2]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]] ; CHECK: [[PRED_UREM_IF3]]: -; CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[MUL]], [[X]] ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]] ; CHECK: [[PRED_UREM_CONTINUE4]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP8]], %[[PRED_UREM_IF3]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_IF5]]: -; CHECK-NEXT: [[TMP11:%.*]] = urem i64 [[MUL]], [[X]] ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) +; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) ; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll index 6a7dc51700192..1bcaa21315dc4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -32,8 +32,7 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr [[TMP2]], i32 1, <16 x i1> [[TMP0]]) ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index a9a1f5c9b3d0a..074ea6b17e94f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -25,11 +25,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @inc, align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 @@ -38,10 +37,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP14]], align 4 ; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll index c4281360ee802..df926fa6d189c 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll @@ -7,11 +7,10 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2 +; CHECK-NEXT: store i16 0, ptr [[DST]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -63,11 +62,10 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2 +; CHECK-NEXT: store i16 0, ptr [[DST]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index 0ceb76af20c5d..36dbc96a336ac 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -41,7 +41,6 @@ define i16 @test(ptr %arg, i64 %N) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[L_2]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -50,7 +49,7 @@ define i16 @test(ptr %arg, i64 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP6]], align 2, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1 -; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP7]], align 2, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: store i16 [[TMP8]], ptr [[L_2]], align 2, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index e29b15b8991e0..1a8361ce2bac3 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -55,8 +55,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) @@ -91,8 +90,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 -; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 +; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer ; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) @@ -190,8 +188,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) @@ -228,8 +225,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 -; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 +; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer ; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 849b9f9f32e56..78f5cc7cb6bc5 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -11,9 +11,8 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[DST]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP1]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -57,9 +56,8 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[DST]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP1]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -104,9 +102,8 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[DST]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP1]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -155,9 +152,8 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[DST]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP1]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 476ba3d5d1f35..001bf0cb16d7b 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -62,15 +62,14 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 0 -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 0 +; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i32 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 +; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 0 +; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8 ; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -106,11 +105,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; VF16UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 0 -; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10) -; VF16UF1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; VF16UF1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i32 0 ; VF16UF1-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: @@ -594,7 +592,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[ENTRY:.*]]: ; VF8UF2-NEXT: br label %[[OUTER_HEADER:.*]] ; VF8UF2: [[OUTER_HEADER]]: -; VF8UF2-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; VF8UF2-NEXT: [[TMP0:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] ; VF8UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 ; VF8UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF2: [[VECTOR_PH]]: @@ -602,14 +600,12 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 0 ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 ; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; VF8UF2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0 -; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 8 +; VF8UF2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 0 +; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i32 8 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -621,7 +617,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: br label %[[INNER:.*]] ; VF8UF2: [[INNER]]: ; VF8UF2-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ] -; VF8UF2-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]] +; VF8UF2-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[INNER_IV]] ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 ; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]] ; VF8UF2-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 @@ -629,7 +625,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; VF8UF2-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP4:![0-9]+]] ; VF8UF2: [[OUTER_LATCH]]: -; VF8UF2-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1 +; VF8UF2-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[TMP0]], i64 1 ; VF8UF2-NEXT: [[C_2:%.*]] = call i1 @cond() ; VF8UF2-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] ; VF8UF2: [[EXIT]]: @@ -640,7 +636,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF16UF1-NEXT: [[ENTRY:.*]]: ; VF16UF1-NEXT: br label %[[OUTER_HEADER:.*]] ; VF16UF1: [[OUTER_HEADER]]: -; VF16UF1-NEXT: [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; VF16UF1-NEXT: [[TMP0:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] ; VF16UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 ; VF16UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF16UF1: [[VECTOR_PH]]: @@ -648,11 +644,9 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 0 ; VF16UF1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 -; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; VF16UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0 +; VF16UF1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 0 ; VF16UF1-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1 ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: @@ -663,7 +657,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF16UF1-NEXT: br label %[[INNER:.*]] ; VF16UF1: [[INNER]]: ; VF16UF1-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ] -; VF16UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]] +; VF16UF1-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[INNER_IV]] ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 ; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]] ; VF16UF1-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 @@ -671,7 +665,7 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF16UF1-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; VF16UF1-NEXT: br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP4:![0-9]+]] ; VF16UF1: [[OUTER_LATCH]]: -; VF16UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1 +; VF16UF1-NEXT: [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[TMP0]], i64 1 ; VF16UF1-NEXT: [[C_2:%.*]] = call i1 @cond() ; VF16UF1-NEXT: br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] ; VF16UF1: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 30e0acb4d7bf6..8d854dd1164c3 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -487,8 +487,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 -3 ; CHECK-NEXT: store <4 x i16> splat (i16 -1), ptr [[TMP7]], align 2 ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 083583786bd3d..5a3cfdd28cc6d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -77,15 +77,14 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[PADD1:%.+]]> = ptradd ir<%A>, ir<0> -; CHECK-NEXT: vp<[[VPTR1:%.]]> = vector-pointer vp<[[PADD1]]> -; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer vp<[[PADD1]]>, ir<1> +; CHECK-NEXT: vp<[[VPTR1:%.]]> = vector-pointer ir<%A> +; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VPTR1]]> ; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]> ; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10> ; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10> -; CHECK-NEXT: vp<[[VPTR3:%.+]]> = vector-pointer vp<[[PADD1]]> -; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer vp<[[PADD1]]>, ir<1> +; CHECK-NEXT: vp<[[VPTR3:%.+]]> = vector-pointer ir<%A> +; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer ir<%A>, ir<1> ; CHECK-NEXT: WIDEN store vp<[[VPTR3]]>, ir<%add> ; CHECK-NEXT: WIDEN store vp<[[VPTR4]]>, ir<%add>.1 ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 12f4cf1ddcd11..d3003f4283661 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -262,7 +262,6 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -272,7 +271,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]>, vp<[[BTC]]> -; CHECK-NEXT: CLONE ir<%lv> = load ir<%gep.A.uniform> +; CHECK-NEXT: CLONE ir<%lv> = load ir<%A> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp uge ir<%iv>, ir<%k> ; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK]]>, ir<%cmp> ; CHECK-NEXT: Successor(s): pred.store