From bdb7fae1acdd094b4abba7668798ea72295e953f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 26 Nov 2025 09:57:36 +0000 Subject: [PATCH 1/6] [LV] Pre-commit vector-pointer-gep-idxty-addrspace test --- .../vector-pointer-gep-idxty-addrspace.ll | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll diff --git a/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll new file mode 100644 index 0000000000000..ed3b91725561f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=4 -S %s | FileCheck %s + +target datalayout = "p1:16:16" + +define void @vector_pointer_gep_idxty_addrspace(ptr addrspace(1) noalias %a, ptr addrspace(1) noalias %b) { +; CHECK-LABEL: define void @vector_pointer_gep_idxty_addrspace( +; CHECK-SAME: ptr addrspace(1) noalias [[A:%.*]], ptr addrspace(1) noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 6 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[WIDE_LOAD1]], splat (i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD3]], splat (i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 6 +; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(1) [[TMP9]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP10]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr addrspace(1) [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.b.iv = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %iv + %ld.b = load i32, ptr addrspace(1) %gep.b.iv + %add = add i32 %ld.b, 1 + %gep.a.iv = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %iv + store i32 %add, ptr addrspace(1) %gep.a.iv + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1024 + br i1 %exitcond, label %end, label %loop + +end: + ret void +} From 83f85691e17c0a9bb9344b98ff2a9ac4254fc313 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 24 Nov 2025 19:49:23 +0000 Subject: [PATCH 2/6] [VPlan] Use DL index type consistently for GEPs In preparation to strip VPUnrollPartAccessor and unroll recipes directly, strip unnecessary complication in getGEPIndexTy, as the unroll part will no longer be available in follow-ups. Narrowing the GEP index type conditionally does not yield any benefit, and the change is non-functional in terms of emitted assembly. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 +- .../LoopVectorize/AArch64/call-costs.ll | 4 +- .../AArch64/conditional-branches-cost.ll | 4 +- .../AArch64/deterministic-type-shrinkage.ll | 17 +- .../AArch64/epilog-iv-select-cmp.ll | 2 +- .../AArch64/epilog-vectorization-factors.ll | 108 ++--- .../epilog-vectorization-widen-inductions.ll | 10 +- ...-vectorization-fix-scalar-resume-values.ll | 2 +- .../AArch64/f128-fmuladd-reduction.ll | 12 +- .../AArch64/fixed-order-recurrence.ll | 8 +- .../AArch64/fixed-wide-lane-mask.ll | 6 +- .../AArch64/fmax-without-fast-math-flags.ll | 6 +- .../AArch64/fmin-without-fast-math-flags.ll | 2 +- .../LoopVectorize/AArch64/fminimumnum.ll | 36 +- .../AArch64/force-target-instruction-cost.ll | 4 +- .../LoopVectorize/AArch64/induction-costs.ll | 10 +- .../AArch64/interleaving-load-store.ll | 151 +++++- .../AArch64/interleaving-reduction.ll | 8 +- .../LoopVectorize/AArch64/intrinsiccost.ll | 17 +- .../LoopVectorize/AArch64/licm-calls.ll | 2 +- .../LoopVectorize/AArch64/optsize_minsize.ll | 14 +- .../partial-reduce-dot-product-mixed.ll | 24 +- .../partial-reduce-dot-product-neon.ll | 30 +- .../AArch64/partial-reduce-dot-product.ll | 66 +-- .../AArch64/partial-reduce-interleave.ll | 8 +- .../AArch64/partial-reduce-no-dotprod.ll | 4 +- .../AArch64/partial-reduce-sub.ll | 4 +- .../LoopVectorize/AArch64/partial-reduce.ll | 32 +- .../pr151664-cost-hoisted-vector-scalable.ll | 2 +- .../AArch64/replicating-load-store-costs.ll | 6 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 2 +- .../AArch64/sve-fixed-width-inorder-core.ll | 18 +- ...-narrow-interleave-to-widen-memory-cost.ll | 17 +- ...-interleave-to-widen-memory-multi-block.ll | 4 +- ...arrow-interleave-to-widen-memory-unroll.ll | 2 +- ...ctor-loop-backedge-elimination-epilogue.ll | 17 +- .../LoopVectorize/AArch64/vector-reverse.ll | 16 +- .../LoopVectorize/LoongArch/defaults.ll | 2 +- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 25 +- .../PowerPC/optimal-epilog-vectorization.ll | 184 +++---- .../LoopVectorize/PowerPC/small-loop-rdx.ll | 14 +- .../RISCV/partial-reduce-dot-product.ll | 32 +- .../LoopVectorize/RISCV/reductions.ll | 72 +-- .../RISCV/tail-folding-inloop-reduction.ll | 58 +-- .../RISCV/tail-folding-reduction.ll | 58 +-- .../RISCV/tail-folding-reverse-load-store.ll | 12 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 16 +- .../LoopVectorize/X86/conversion-cost.ll | 6 +- .../LoopVectorize/X86/cost-model.ll | 2 +- ...bounds-flags-for-reverse-vector-pointer.ll | 4 +- .../X86/epilog-vectorization-inductions.ll | 6 +- .../X86/fixed-order-recurrence.ll | 8 +- .../LoopVectorize/X86/float-induction-x86.ll | 18 +- .../LoopVectorize/X86/fminimumnum.ll | 24 +- .../X86/imprecise-through-phis.ll | 8 +- .../LoopVectorize/X86/induction-costs.ll | 16 +- .../LoopVectorize/X86/induction-step.ll | 4 +- .../LoopVectorize/X86/intrinsiccost.ll | 24 +- .../X86/invariant-store-vectorization.ll | 6 +- .../LoopVectorize/X86/iv-live-outs.ll | 6 +- .../X86/limit-vf-by-tripcount.ll | 12 +- .../LoopVectorize/X86/load-deref-pred.ll | 66 +-- .../LoopVectorize/X86/masked-store-cost.ll | 8 +- .../LoopVectorize/X86/masked_load_store.ll | 456 +++++++++--------- .../LoopVectorize/X86/metadata-enable.ll | 8 +- .../LoopVectorize/X86/multi-exit-cost.ll | 4 +- .../Transforms/LoopVectorize/X86/pr23997.ll | 12 +- .../Transforms/LoopVectorize/X86/pr35432.ll | 2 +- .../Transforms/LoopVectorize/X86/pr47437.ll | 8 +- .../Transforms/LoopVectorize/X86/pr81872.ll | 4 +- .../LoopVectorize/X86/predicate-switch.ll | 18 +- .../LoopVectorize/X86/reduction-fastmath.ll | 10 +- .../LoopVectorize/X86/strided_load_cost.ll | 24 +- .../LoopVectorize/X86/uniform_load.ll | 6 +- .../X86/vect.omp.force.small-tc.ll | 30 +- .../X86/vectorize-force-tail-with-evl.ll | 18 +- ...ned-value-used-as-scalar-and-first-lane.ll | 24 +- llvm/test/Transforms/LoopVectorize/assume.ll | 12 +- .../LoopVectorize/consecutive-ptr-uniforms.ll | 8 +- .../Transforms/LoopVectorize/cse-casts.ll | 2 +- .../cse-gep-source-element-type.ll | 12 +- .../LoopVectorize/dead_instructions.ll | 8 +- .../dont-fold-tail-for-const-TC.ll | 4 +- .../LoopVectorize/expand-scev-after-invoke.ll | 2 +- .../LoopVectorize/fcmp-uno-fold-interleave.ll | 18 +- ...irst-order-recurrence-dead-instructions.ll | 4 +- .../LoopVectorize/first-order-recurrence.ll | 26 +- ...fmax-without-fast-math-flags-interleave.ll | 6 +- .../Transforms/LoopVectorize/if-reduction.ll | 4 +- .../LoopVectorize/induction-wrapflags.ll | 4 +- .../Transforms/LoopVectorize/induction.ll | 32 +- .../interleave-with-i65-induction.ll | 8 +- .../LoopVectorize/iv-select-cmp-decreasing.ll | 48 +- .../iv-select-cmp-nested-loop.ll | 6 +- .../LoopVectorize/iv-select-cmp-trunc.ll | 24 +- .../Transforms/LoopVectorize/iv-select-cmp.ll | 78 +-- .../LoopVectorize/iv_outside_user.ll | 4 +- .../LoopVectorize/load-deref-pred-align.ll | 16 +- .../test/Transforms/LoopVectorize/metadata.ll | 40 +- .../minimumnum-maximumnum-reductions.ll | 8 +- .../LoopVectorize/narrow-to-single-scalar.ll | 4 +- .../nested-loops-scev-expansion.ll | 4 +- .../LoopVectorize/noalias-scope-decl.ll | 12 +- .../optimal-epilog-vectorization.ll | 8 +- .../LoopVectorize/pointer-induction.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 8 +- .../LoopVectorize/predicate-switch.ll | 26 +- .../LoopVectorize/reduction-inloop.ll | 146 +++--- .../reduction-odd-interleave-counts.ll | 12 +- .../reuse-lcssa-phi-scev-expansion.ll | 4 +- .../reverse-induction-gep-nowrap-flags.ll | 12 +- .../LoopVectorize/reverse_induction.ll | 40 +- .../LoopVectorize/runtime-check-known-true.ll | 8 +- .../runtime-check-needed-but-empty.ll | 4 +- .../LoopVectorize/runtime-checks-hoist.ll | 8 +- .../scalar_after_vectorization.ll | 2 +- .../LoopVectorize/select-cmp-multiuse.ll | 10 +- .../Transforms/LoopVectorize/select-cmp.ll | 30 +- .../single-early-exit-interleave-hint.ll | 6 +- .../single-early-exit-interleave.ll | 136 +++--- .../single_early_exit_live_outs.ll | 8 +- .../LoopVectorize/skeleton-lcssa-crash.ll | 4 +- .../LoopVectorize/struct-return-replicate.ll | 28 +- ...oop-backedge-elimination-branch-weights.ll | 6 +- ...or-loop-backedge-elimination-early-exit.ll | 6 +- ...p-backedge-elimination-outside-iv-users.ll | 8 +- .../vector-loop-backedge-elimination.ll | 6 +- 127 files changed, 1517 insertions(+), 1413 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e41f67103e096..2b4cd75bbea19 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2593,22 +2593,15 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, - unsigned CurrentPart, IRBuilderBase &Builder) { - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. +static Type *getGEPIndexTy(IRBuilderBase &Builder) { const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0)) - ? DL.getIndexType(Builder.getPtrTy(0)) - : Builder.getInt32Ty(); + return DL.getIndexType(Builder.getPtrTy(0)); } void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - bool IsUnitStride = Stride == 1 || Stride == -1; - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, - IsUnitStride, CurrentPart, Builder); + Type *IndexTy = getGEPIndexTy(Builder); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -2644,8 +2637,7 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, - /*IsUnitStride*/ true, CurrentPart, Builder); + Type *IndexTy = getGEPIndexTy(Builder); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index 23918427e7003..95b4dcb23dd47 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -14,7 +14,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> @@ -22,7 +22,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 9609982b2c68f..2e3ddc8899ec7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]]) @@ -259,7 +259,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 0a433ec76acf4..f0664197dcb94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -137,7 +137,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -210,7 +210,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -279,7 +279,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> @@ -287,7 +287,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1> @@ -427,15 +427,16 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4 ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 2180f18750bf2..580c568c373f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -27,7 +27,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index a3b7392dd280f..549df337e6907 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -19,17 +19,17 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 @@ -39,9 +39,9 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i8> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 32 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 48 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 32 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP18]], align 1 @@ -54,7 +54,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 8 @@ -71,7 +71,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -89,7 +89,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -128,17 +128,17 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 24 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 24 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP9]], align 1 @@ -148,22 +148,22 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i16> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 24 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 24 ; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -180,7 +180,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -198,7 +198,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -237,17 +237,17 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP9]], align 1 @@ -257,22 +257,22 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 12 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -289,7 +289,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -307,7 +307,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -347,9 +347,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -358,9 +358,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10) ; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10) ; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 @@ -369,7 +369,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -382,7 +382,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -397,7 +397,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -447,16 +447,16 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 12 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP8]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -464,7 +464,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4 @@ -479,7 +479,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC2]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -491,7 +491,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store i32 0, ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[PTR_END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 5e92123891b31..85726c161cc54 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -29,7 +29,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -117,7 +117,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -201,7 +201,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -285,7 +285,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -399,7 +399,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll index cb4e99332c04b..4eacc55a99f72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll @@ -20,7 +20,7 @@ define void @epilogue_vectorization_fix_scalar_resume_values(ptr %dst, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll index 35d7e2cc8c586..feb0175e75542 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll @@ -21,16 +21,16 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x fp128>, ptr [[TMP1]], align 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x fp128>, ptr [[TMP24]], align 16 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x fp128>, ptr [[TMP4]], align 16 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x fp128>, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP28]], align 16 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP35]], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index c94b3a4c49555..c692ba5b06690 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index faee4c1194018..591bdabca65e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -56,9 +56,9 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-UF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 8 -; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 12 +; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 +; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 8 +; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 12 ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP8]], <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP17]], <4 x i1> [[ACTIVE_LANE_MASK4]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP18]], <4 x i1> [[ACTIVE_LANE_MASK5]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 6902dd990509e..a04367f32dd01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -128,10 +128,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index 193424d3eb70a..0bddc498f9e83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll index f15f04fe5f6f2..3a9d5c34bacab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll @@ -20,17 +20,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -97,17 +97,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -174,17 +174,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -251,17 +251,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -328,17 +328,17 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -405,17 +405,17 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 56edee44fe3b1..21b21774d18cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -62,7 +62,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST1: [[VECTOR_BODY]]: ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] -; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -328,7 +328,7 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i64 0 ; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; COST1-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] -; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 2 +; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i64 2 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 42a1940925968..7b42e565e127d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -25,7 +25,7 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1) @@ -106,7 +106,7 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -161,7 +161,7 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -205,7 +205,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8 ; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 ; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -287,7 +287,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index 9b4151f30d640..dba32e350e0f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -35,9 +35,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4: vector.body: ; INTERLEAVE-4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 48 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 @@ -55,9 +55,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP15]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP19]] ; INTERLEAVE-4-NEXT: [[TMP24:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP20]] ; INTERLEAVE-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 48 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP21]], ptr [[TMP25]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP22]], ptr [[TMP30]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP23]], ptr [[TMP31]], align 1 @@ -70,7 +70,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-4: vec.epilog.iter.check: ; INTERLEAVE-4-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-4: vec.epilog.ph: ; INTERLEAVE-4-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-4-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 @@ -91,7 +91,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store <8 x i8> [[TMP39]], ptr [[TMP40]], align 1 ; INTERLEAVE-4-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX12]], 8 ; INTERLEAVE-4-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT18]], [[N_VEC10]] -; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-4: vec.epilog.middle.block: ; INTERLEAVE-4-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -109,7 +109,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 ; INTERLEAVE-4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; INTERLEAVE-4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; INTERLEAVE-4: exit: ; INTERLEAVE-4-NEXT: ret void ; @@ -137,7 +137,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2: vector.body: ; INTERLEAVE-2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; INTERLEAVE-2-NEXT: [[TMP7:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -147,7 +147,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP9]] ; INTERLEAVE-2-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP8]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP10]] ; INTERLEAVE-2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 16 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP13]], align 1 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP16]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -158,7 +158,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-2: vec.epilog.iter.check: ; INTERLEAVE-2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-2: vec.epilog.ph: ; INTERLEAVE-2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-2-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[N]], 8 @@ -179,7 +179,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: store <8 x i8> [[TMP23]], ptr [[TMP24]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX10]], 8 ; INTERLEAVE-2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC8]] -; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-2: vec.epilog.middle.block: ; INTERLEAVE-2-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC8]] ; INTERLEAVE-2-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -197,15 +197,130 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 ; INTERLEAVE-2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; INTERLEAVE-2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-2-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; INTERLEAVE-2-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; INTERLEAVE-2: exit: ; INTERLEAVE-2-NEXT: ret void ; ; INTERLEAVE-4-VLA-LABEL: @interleave_single_load_store( -; INTERLEAVE-4-VLA: call @llvm.smax.nxv16i8( -; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( -; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( -; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( +; INTERLEAVE-4-VLA-NEXT: iter.check: +; INTERLEAVE-4-VLA-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 +; INTERLEAVE-4-VLA-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64 +; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 +; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; INTERLEAVE-4-VLA: vector.memcheck: +; INTERLEAVE-4-VLA-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]] +; INTERLEAVE-4-VLA-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; INTERLEAVE-4-VLA: vector.main.loop.iter.check: +; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 6 +; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], [[TMP5]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; INTERLEAVE-4-VLA: vector.ph: +; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 64 +; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] +; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[B:%.*]], i64 0 +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i8 [[A:%.*]], i64 0 +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer +; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] +; INTERLEAVE-4-VLA: vector.body: +; INTERLEAVE-4-VLA-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] +; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP10]] +; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 +; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP13]] +; INTERLEAVE-4-VLA-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 48 +; INTERLEAVE-4-VLA-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP16]] +; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP11]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP14]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP17]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[TMP18:%.*]] = icmp sgt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; INTERLEAVE-4-VLA-NEXT: [[TMP19:%.*]] = icmp sgt [[WIDE_LOAD6]], [[BROADCAST_SPLAT]] +; INTERLEAVE-4-VLA-NEXT: [[TMP20:%.*]] = icmp sgt [[WIDE_LOAD7]], [[BROADCAST_SPLAT]] +; INTERLEAVE-4-VLA-NEXT: [[TMP21:%.*]] = icmp sgt [[WIDE_LOAD8]], [[BROADCAST_SPLAT]] +; INTERLEAVE-4-VLA-NEXT: [[TMP22:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD]], [[BROADCAST_SPLAT5]]) +; INTERLEAVE-4-VLA-NEXT: [[TMP23:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD6]], [[BROADCAST_SPLAT5]]) +; INTERLEAVE-4-VLA-NEXT: [[TMP24:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD7]], [[BROADCAST_SPLAT5]]) +; INTERLEAVE-4-VLA-NEXT: [[TMP25:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD8]], [[BROADCAST_SPLAT5]]) +; INTERLEAVE-4-VLA-NEXT: [[TMP26:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], [[TMP22]] +; INTERLEAVE-4-VLA-NEXT: [[TMP27:%.*]] = select [[TMP19]], [[BROADCAST_SPLAT]], [[TMP23]] +; INTERLEAVE-4-VLA-NEXT: [[TMP28:%.*]] = select [[TMP20]], [[BROADCAST_SPLAT]], [[TMP24]] +; INTERLEAVE-4-VLA-NEXT: [[TMP29:%.*]] = select [[TMP21]], [[BROADCAST_SPLAT]], [[TMP25]] +; INTERLEAVE-4-VLA-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] +; INTERLEAVE-4-VLA-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP32]] +; INTERLEAVE-4-VLA-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP35:%.*]] = shl nuw i64 [[TMP34]], 5 +; INTERLEAVE-4-VLA-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP35]] +; INTERLEAVE-4-VLA-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-4-VLA-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 48 +; INTERLEAVE-4-VLA-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP38]] +; INTERLEAVE-4-VLA-NEXT: store [[TMP26]], ptr [[TMP30]], align 1 +; INTERLEAVE-4-VLA-NEXT: store [[TMP27]], ptr [[TMP33]], align 1 +; INTERLEAVE-4-VLA-NEXT: store [[TMP28]], ptr [[TMP36]], align 1 +; INTERLEAVE-4-VLA-NEXT: store [[TMP29]], ptr [[TMP39]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; INTERLEAVE-4-VLA-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; INTERLEAVE-4-VLA: middle.block: +; INTERLEAVE-4-VLA-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; INTERLEAVE-4-VLA: vec.epilog.iter.check: +; INTERLEAVE-4-VLA-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 +; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] +; INTERLEAVE-4-VLA: vec.epilog.ph: +; INTERLEAVE-4-VLA-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 +; INTERLEAVE-4-VLA-NEXT: [[N_VEC10:%.*]] = sub i64 [[N]], [[N_MOD_VF9]] +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0 +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT11]], <8 x i8> poison, <8 x i32> zeroinitializer +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0 +; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT13]], <8 x i8> poison, <8 x i32> zeroinitializer +; INTERLEAVE-4-VLA-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; INTERLEAVE-4-VLA: vec.epilog.vector.body: +; INTERLEAVE-4-VLA-NEXT: [[INDEX15:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; INTERLEAVE-4-VLA-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX15]] +; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i8>, ptr [[TMP41]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[TMP42:%.*]] = icmp sgt <8 x i8> [[WIDE_LOAD16]], [[BROADCAST_SPLAT12]] +; INTERLEAVE-4-VLA-NEXT: [[TMP43:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[WIDE_LOAD16]], <8 x i8> [[BROADCAST_SPLAT14]]) +; INTERLEAVE-4-VLA-NEXT: [[TMP44:%.*]] = select <8 x i1> [[TMP42]], <8 x i8> [[BROADCAST_SPLAT12]], <8 x i8> [[TMP43]] +; INTERLEAVE-4-VLA-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX15]] +; INTERLEAVE-4-VLA-NEXT: store <8 x i8> [[TMP44]], ptr [[TMP45]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX15]], 8 +; INTERLEAVE-4-VLA-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC10]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[TMP46]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; INTERLEAVE-4-VLA: vec.epilog.middle.block: +; INTERLEAVE-4-VLA-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[CMP_N18]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; INTERLEAVE-4-VLA: vec.epilog.scalar.ph: +; INTERLEAVE-4-VLA-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; INTERLEAVE-4-VLA-NEXT: br label [[LOOP:%.*]] +; INTERLEAVE-4-VLA: loop: +; INTERLEAVE-4-VLA-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; INTERLEAVE-4-VLA-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] +; INTERLEAVE-4-VLA-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[CMP:%.*]] = icmp sgt i8 [[L]], [[B]] +; INTERLEAVE-4-VLA-NEXT: [[MAX:%.*]] = tail call i8 @llvm.smax.i8(i8 [[L]], i8 [[A]]) +; INTERLEAVE-4-VLA-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[B]], i8 [[MAX]] +; INTERLEAVE-4-VLA-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] +; INTERLEAVE-4-VLA-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 +; INTERLEAVE-4-VLA-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; INTERLEAVE-4-VLA-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; INTERLEAVE-4-VLA-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; INTERLEAVE-4-VLA: exit: +; INTERLEAVE-4-VLA-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index aa94763b44a30..53cb0653fd241 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -29,9 +29,9 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 @@ -103,7 +103,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-2-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index ee3a4a04566c9..89cc8e3461031 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -7,10 +7,6 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" ; CHECK-COST-LABEL: sadd -; CHECK-COST: Found an estimated cost of 6 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) -; CHECK-COST: Cost of 4 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) -; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) -; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @saddsat( @@ -36,12 +32,12 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD4]], <8 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -131,11 +127,6 @@ while.end: ; preds = %while.body, %entry } ; CHECK-COST-LABEL: umin -; CHECK-COST: Found an estimated cost of 2 for VF 1 For instruction: %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset) -; CHECK-COST: Cost of 1 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) -; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) -; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) -; CHECK-COST: Cost of 1 for VF 16: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { @@ -160,12 +151,12 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD3]], <16 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index 0a9494e4c7ade..c43d62404006d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -22,7 +22,7 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index c768fec31a497..bdbf08aecf6b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -103,7 +103,7 @@ define void @vectorize_without_optsize(ptr %p, i32 %x, i64 %n) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -621,17 +621,17 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> @@ -737,17 +737,17 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 0ee6b52a2450b..3142227815383 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -61,13 +61,13 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -164,13 +164,13 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -223,11 +223,11 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -259,13 +259,13 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -318,11 +318,11 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -354,13 +354,13 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index c6c5c5105d540..b2be0e1d7a442 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -249,7 +249,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -513,13 +513,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> @@ -791,10 +791,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP38]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -805,10 +805,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP21]]) -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP26]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -819,10 +819,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = mul nsw <16 x i32> [[TMP27]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP29]]) -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -833,10 +833,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = mul nsw <16 x i32> [[TMP35]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP37]]) -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP42]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP46]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1811,13 +1811,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index ab593f6f8bb6b..71eb5476b7ac5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -162,16 +162,16 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1 @@ -299,16 +299,16 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX2]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP10]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[NEXT_GEP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP18]], align 2 @@ -525,7 +525,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -789,13 +789,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> @@ -1080,10 +1080,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP43]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -1094,10 +1094,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nsw <16 x i32> [[TMP16]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP18]]) -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -1108,10 +1108,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP24]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP26]]) -; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -1122,10 +1122,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul nsw <16 x i32> [[TMP48]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE23]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP34]]) -; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP35]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1450,13 +1450,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -1572,12 +1572,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP15]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i64> @@ -1880,7 +1880,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2009,7 +2009,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2146,10 +2146,10 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE6:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll index bd9fae6cd610b..80edfb5f0b6ff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll @@ -20,7 +20,7 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC2-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -73,9 +73,9 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll index 672d19b1edeba..a439f5189794a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll @@ -16,13 +16,13 @@ define i32 @not_dotp(ptr %a, ptr %b) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 6dae09ef97e1c..1f5d5f0ea218e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -45,11 +45,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 46ec858d7455c..dd2fe09a9e593 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -41,7 +41,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -134,7 +134,7 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -223,7 +223,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64> @@ -317,7 +317,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -413,9 +413,9 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -703,9 +703,9 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -807,7 +807,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -921,7 +921,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] @@ -1042,7 +1042,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> @@ -1156,9 +1156,9 @@ define i64 @sext_reduction_i32_to_i64(ptr %arr, i64 %n) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 12 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 5355a9772ef10..73dbefeb10413 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -15,7 +15,7 @@ define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 68cfc659e1e94..fceab6f823d5a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -229,9 +229,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 ; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 6 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 6 ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP12]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP14]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 1596b60f48567..bf4ab32fbf9e4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -142,7 +142,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll index 76a7536501bd6..389f91f878534 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll @@ -29,17 +29,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA510: [[VECTOR_BODY]]: ; CHECK-CA510-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA510-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA510-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA510-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA510-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA510-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA510-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -93,17 +93,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA520: [[VECTOR_BODY]]: ; CHECK-CA520-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA520-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA520-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA520-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA520-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA520-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA520-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -157,17 +157,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA320: [[VECTOR_BODY]]: ; CHECK-CA320-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA320-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA320-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 4 +; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA320-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA320-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA320-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 4 +; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-CA320-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 2a19402347e40..6eb8242bf7975 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -178,9 +178,9 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP13]], align 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP15]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP16]], align 8 @@ -323,9 +323,9 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP23]], align 8 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP25]], align 8 ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP26]], align 8 @@ -456,8 +456,9 @@ define void @test_interleave_after_narrowing(i32 %n, ptr %x, ptr noalias %y) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll index 46b0ebdd2fa62..99c735f777b66 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -88,7 +88,7 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; VF2IC2: [[PRED_STORE_CONTINUE11]]: ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2 +; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 2 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -199,7 +199,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr % ; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> ; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8 ; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 2 +; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 ; VF2IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP8]], align 1 ; VF2IC2-NEXT: [[TMP9:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll index d290f2d4f5bc3..b14b1783c97e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll @@ -62,7 +62,7 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index 44b4e5a8c2bc7..4ede21040f393 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -17,9 +17,9 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 48 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[DST]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 4 @@ -30,7 +30,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF0:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -49,7 +49,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: store i8 0, ptr [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -69,7 +69,8 @@ exit: ret void } ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 8, i32 56} +; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 2abc787061b53..ec874d0b48030 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -11,14 +11,14 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_f64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x double>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x double> %[[WIDE]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = fadd <8 x double> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds double, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x double> %[[FADD]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: store <8 x double> %[[REVERSE6]], ptr %[[GEP4]], align 8 @@ -44,14 +44,14 @@ for.body: ; preds = %entry, %for.body define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_i64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x i64>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x i64> %[[WIDE]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = add <8 x i64> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i64, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x i64> %[[FADD]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: store <8 x i64> %[[REVERSE6]], ptr %[[GEP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 7afa8ce998121..e05332abcee61 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -22,7 +22,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index abbd176a1df6e..478c9c1141949 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -51,17 +51,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2) ; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2) ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 10 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 14 -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 18 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 20 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 22 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 10 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 14 +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 18 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 20 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 22 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <2 x i8>, ptr [[TMP13]], align 1 @@ -193,6 +193,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_I166_I:%.*]] = icmp ult ptr [[PTR_IV]], [[END]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[IV]], [[N]] ; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP_I166_I]], i1 [[CMP2]], i1 false +; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i64 [[RED_NEXT_LCSSA]], 0 @@ -226,4 +227,6 @@ exit: ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 2, i32 22} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 7677c9666455a..f1fbf1dd5d942 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -22,13 +22,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK: [[VECTOR_BODY]]: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -38,13 +38,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -62,13 +62,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -124,13 +124,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK: [[VECTOR_BODY]]: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -140,13 +140,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -164,13 +164,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -261,22 +261,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -302,13 +302,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -340,8 +340,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 +; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) @@ -384,22 +384,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -425,13 +425,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -463,8 +463,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll index d82a3cde4639a..dc9c154b3fe05 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll @@ -34,13 +34,13 @@ define void @test(ptr %arr, i32 %len) { ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 10 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 12 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 14 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 14 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 8d3026e63748a..1ae1ba6795c01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -87,13 +87,13 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -123,11 +123,11 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -280,13 +280,13 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -316,11 +316,11 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -473,13 +473,13 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -509,11 +509,11 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -665,13 +665,13 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -701,11 +701,11 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 735fb769de8b9..671a929e6fa35 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -69,7 +69,7 @@ define i32 @sub(ptr %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP4]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -116,7 +116,7 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP6]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -166,7 +166,7 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -212,7 +212,7 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -258,7 +258,7 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -305,7 +305,7 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -353,7 +353,7 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -400,7 +400,7 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -444,7 +444,7 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -483,14 +483,14 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x half> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x half> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[BIN_RDX]]) @@ -508,7 +508,7 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[ADD_LCSSA]] @@ -545,14 +545,14 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x bfloat> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x bfloat> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> [[BIN_RDX]]) @@ -570,7 +570,7 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[ADD]] = fadd fast bfloat [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi bfloat [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[ADD_LCSSA]] @@ -615,7 +615,7 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -661,7 +661,7 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -707,7 +707,7 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -755,7 +755,7 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -801,7 +801,7 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -847,7 +847,7 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -891,14 +891,14 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]]) @@ -916,7 +916,7 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -963,7 +963,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD1]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -986,7 +986,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -1036,7 +1036,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1084,7 +1084,7 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1128,18 +1128,18 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ splat (half 0xH8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x half>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x half>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], <16 x half> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], <16 x half> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]]) @@ -1159,7 +1159,7 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP8]], half [[TMP9]], half [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[MULADD_LCSSA]] @@ -1198,18 +1198,18 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ splat (bfloat 0xR8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], <16 x bfloat> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], <16 x bfloat> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]]) @@ -1229,7 +1229,7 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[MULADD]] = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat [[TMP8]], bfloat [[TMP9]], bfloat [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi bfloat [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[MULADD_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index b9a4e97cd9f24..cc1b2380bc532 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -108,7 +108,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD2]]) @@ -117,7 +117,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP5]] = mul i32 [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -134,7 +134,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL1]] = mul nsw i32 [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -152,7 +152,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -219,7 +219,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -303,7 +303,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -387,7 +387,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -471,7 +471,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -557,7 +557,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -643,7 +643,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -729,7 +729,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -815,7 +815,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -895,7 +895,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD2]]) @@ -904,7 +904,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc float [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = fmul reassoc float [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -921,7 +921,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL1]] = fmul reassoc float [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -939,7 +939,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1007,7 +1007,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1095,7 +1095,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1179,14 +1179,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1204,7 +1204,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1224,7 +1224,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1287,14 +1287,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1312,7 +1312,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1332,7 +1332,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1401,7 +1401,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1492,7 +1492,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] @@ -1584,7 +1584,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 7179e7dc48c8d..d1a2303e35e68 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -110,14 +110,14 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = mul <8 x i32> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP6]]) @@ -135,7 +135,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -154,7 +154,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] @@ -221,7 +221,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -308,7 +308,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -395,7 +395,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -484,7 +484,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -577,7 +577,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -670,7 +670,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -763,7 +763,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -854,7 +854,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -937,14 +937,14 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = fmul reassoc <8 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = fmul reassoc <8 x float> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP6]]) @@ -962,7 +962,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -981,7 +981,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -1050,7 +1050,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1143,7 +1143,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1230,14 +1230,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1255,7 +1255,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1275,7 +1275,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1338,14 +1338,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1363,7 +1363,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1383,7 +1383,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1452,7 +1452,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1544,7 +1544,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] @@ -1636,7 +1636,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 7b0ac78fb365c..13990000585ea 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -331,20 +331,20 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[INDEX]] ; NO-VP-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -15 +; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -15 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; NO-VP-NEXT: [[REVERSE:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], <16 x i8> [[REVERSE]] ; NO-VP-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> align 1 [[TMP3]], <16 x i1> splat (i1 true), <16 x i8> poison) ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 -15 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -15 ; NO-VP-NEXT: [[REVERSE1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_GATHER]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP6]], align 1 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 -15 +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -15 ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP9]], align 1 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 0375f0a8fd132..03377f10c2283 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -44,7 +44,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -160,7 +160,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -294,7 +294,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER1]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -417,7 +417,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -517,7 +517,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -630,7 +630,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 ; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -756,7 +756,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP1]]) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -878,7 +878,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0287645d9d7f9..94ebf01509ec2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -126,9 +126,9 @@ define void @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwi ; CHECK-NEXT: [[TMP19:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> ; CHECK-NEXT: [[TMP20:%.*]] = sitofp <2 x i64> [[TMP11]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 6 ; CHECK-NEXT: store <2 x float> [[TMP12]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP18]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[TMP16]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index b3c45a565a8fe..c70a3aa249919 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -385,7 +385,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8, !alias.scope [[META9:![0-9]+]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = and <2 x i1> [[TMP23]], [[TMP26]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 3165422dcc539..d19ae728cc913 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -23,8 +23,8 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP6]], <4 x i1> [[REVERSE]], <4 x i32> poison) ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 6e940ee58fabe..a1b92e0658bd3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -161,9 +161,9 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP21]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 12b8d1e15b523..84579d97b38e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 39217e51ab117..41249c595f9eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -41,9 +41,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[STEP_ADD]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[STEP_ADD2]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 16 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 24 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 16 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 24 ; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4 @@ -208,9 +208,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[STEP_ADD]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[STEP_ADD_2]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i32 4 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i32 12 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i64 4 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i64 12 ; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 @@ -326,9 +326,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 16 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 24 ; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll index a0637ceb53cf2..137c09b653f2c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll @@ -22,17 +22,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,17 +99,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -176,17 +176,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -253,17 +253,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 877fcd4d638eb..34a99b07ee93e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -75,7 +75,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[VEC_PHI1:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 +; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 ; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) @@ -106,9 +106,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[VEC_PHI2:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI8:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[VEC_PHI3:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i32 4 -; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i32 8 -; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i32 12 +; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i64 4 +; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i64 8 +; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i64 12 ; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 ; AVX-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 ; AVX-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 4028dd87e34b3..04bff3c393f62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -125,13 +125,13 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP4]], align 2, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] -; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META6]], !noalias [[META9]] +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP3]], align 2, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP8]], align 4, !alias.scope [[META9]] -; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META9]] +; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP5]], align 4, !alias.scope [[META9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4) @@ -192,7 +192,7 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -477,9 +477,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 61f07eff768c1..d25d9f81de985 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -27,7 +27,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,7 +99,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP6]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index d75fd0e0023f7..ad6dfb054b726 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -32,9 +32,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i16>, ptr [[TMP2]], align 2 @@ -43,9 +43,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD4]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD5]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD6]], <16 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP9]], align 2 @@ -160,9 +160,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 96 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <32 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP2]], align 2 @@ -171,9 +171,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD3]], <32 x i8> [[WIDE_LOAD3]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD4]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD5]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 32 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 96 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 32 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 96 ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP6]], ptr [[TMP9]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index b710236c026d2..751e885733f17 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -38,9 +38,9 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll index bcb6b5c422343..a247285317a1e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -18,9 +18,9 @@ define i64 @test_pr98660(ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 6e3b2a5390948..ea3ec99cf46e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -193,17 +193,17 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 64 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 64 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 64 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 12 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], ptr [[TMP6]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD1]], ptr [[TMP8]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD2]], ptr [[TMP9]], align 64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 6605338771c47..78363e13595cb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -39,9 +39,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 @@ -171,9 +171,9 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -718,9 +718,9 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP65]], <4 x i1> [[TMP40]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP48]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP71]], <4 x i1> [[TMP56]], <4 x i32> poison) @@ -877,9 +877,9 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -1231,9 +1231,9 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1362,9 +1362,9 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1493,9 +1493,9 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1633,9 +1633,9 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 @@ -1793,9 +1793,9 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1925,9 +1925,9 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -2067,9 +2067,9 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index 2c172b2aecd16..1d0906902ad62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -35,9 +35,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) @@ -199,7 +199,7 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[NEXT_GEP]], <4 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP11]], <4 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 932153a23bdbd..e4977ee642b09 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -72,9 +72,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -84,9 +84,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -96,9 +96,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP17]], ptr align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP18]], ptr align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP19]], ptr align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -151,9 +151,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -163,9 +163,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -175,9 +175,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP17]], ptr align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP18]], ptr align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP19]], ptr align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -293,9 +293,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -305,9 +305,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -317,9 +317,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -372,9 +372,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -384,9 +384,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -396,9 +396,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -524,9 +524,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -536,9 +536,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x float> poison) @@ -552,9 +552,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP23:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX2-NEXT: [[TMP24:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 8 -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 24 +; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 8 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP22]], ptr align 4 [[TMP27]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP23]], ptr align 4 [[TMP28]], <8 x i1> [[TMP10]]) @@ -608,9 +608,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -620,9 +620,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x float> poison) @@ -636,9 +636,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP23:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX512-NEXT: [[TMP24:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 32 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 48 +; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 32 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP21]], ptr align 4 [[TMP25]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP22]], ptr align 4 [[TMP27]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP23]], ptr align 4 [[TMP28]], <16 x i1> [[TMP10]]) @@ -732,25 +732,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] ; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX1-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX1-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX1-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META11:![0-9]+]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META11]] -; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] +; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX1-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX1-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -760,13 +760,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX1-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX1-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX1-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX1-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX1-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -795,25 +795,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META15]] -; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX2-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX2-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -823,13 +823,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX2-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX2-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -860,25 +860,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 16 -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 24 +; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 16 +; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP10]], <8 x i1> [[TMP6]], <8 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP12]], <8 x i1> [[TMP7]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP13]], <8 x i1> [[TMP8]], <8 x double> poison), !alias.scope [[META15]] -; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP14]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP11]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[TMP15:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double> ; AVX512-NEXT: [[TMP16:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x double> ; AVX512-NEXT: [[TMP17:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x double> @@ -888,13 +888,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX512-NEXT: [[TMP22:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 16 -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 24 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 16 +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP19]], ptr align 8 [[TMP23]], <8 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP25]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP26]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP27]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP24]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP25]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP26]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 ; AVX512-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1117,68 +1117,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -4 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -12 -; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]] +; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -12 +; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]] ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD10]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[REVERSE7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -3 -; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -4 -; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -3 -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -3 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -12 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -3 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -3 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -4 +; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -3 +; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -3 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -12 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -3 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP16]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP15]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP18]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP20]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP19]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP22]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP21]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 -; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -4 -; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 -; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -3 -; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -12 -; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -3 +; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -3 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -4 +; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -3 +; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -3 +; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -12 +; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -3 ; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE30:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1208,68 +1208,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -7 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -7 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -16 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -7 -; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -24 -; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -7 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META34:![0-9]+]] +; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -7 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -24 +; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -7 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34:![0-9]+]] ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD8]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE11:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD10]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP10:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp sgt <8 x i32> [[REVERSE7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -7 -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -7 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -16 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -7 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -24 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -7 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -7 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -7 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -16 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -24 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -7 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP16]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP15]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP18]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP20]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP19]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP22]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP21]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -16 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -7 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -24 -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -7 +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -7 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -7 +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -16 +; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -7 +; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -24 +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -7 ; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE30:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX512-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -1332,9 +1332,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1348,9 +1348,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1364,9 +1364,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1424,9 +1424,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1440,9 +1440,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1456,9 +1456,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1516,9 +1516,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1532,9 +1532,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1548,9 +1548,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) @@ -1653,9 +1653,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1669,9 +1669,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1685,9 +1685,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1745,9 +1745,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1761,9 +1761,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP17]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP19]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1777,9 +1777,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1837,9 +1837,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1853,9 +1853,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1869,9 +1869,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index e23f8a9b63ef0..d514ab6bc72b7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1186,13 +1186,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2: vector.body: ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; O1VEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1214,13 +1214,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2: vector.body: ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; OzVEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index de6418066dea0..2809a77b36f1a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -30,8 +30,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -7 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP28]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i8> [[WIDE_LOAD]], <8 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP29]] = xor <8 x i8> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 31269b1b8c221..85d77eaadc632 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -35,17 +35,17 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP8]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 12 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 12 ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope [[META3]], !noalias [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 3c618d71fc974..9217c905945ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -67,7 +67,7 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[DOTPROMOTED]], [[INDEX]] ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP20]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP22]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll index 737bcf35fbd2c..38db41271d1f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -124,7 +124,7 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; SSE41-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP16]] ; SSE41-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP17]] ; SSE41-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 4 +; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 4 ; SSE41-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4 ; SSE41-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP29]], align 4 ; SSE41-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -250,9 +250,9 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[TMP69:%.*]] = add nsw <4 x i32> [[TMP67]], [[TMP46]] ; AVX1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[TMP68]], [[TMP47]] ; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 4 -; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 8 -; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 12 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 4 +; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 8 +; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 12 ; AVX1-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP26]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP69]], ptr [[TMP71]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 08855fe9ecba5..c756a54ec6d2b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -30,8 +30,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index 2aceb279d47db..5a396f88b1a64 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -76,7 +76,7 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -214,7 +214,7 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -337,7 +337,7 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -527,7 +527,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -687,7 +687,7 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -836,7 +836,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1014,7 +1014,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1167,7 +1167,7 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1319,7 +1319,7 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 52e90e4475208..3afdf947081b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -60,7 +60,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -111,7 +111,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -162,7 +162,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc contract <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -220,7 +220,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -301,7 +301,7 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 602a3921eb34c..da48f984cb329 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -59,13 +59,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] @@ -290,13 +290,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index 8081c0e17f865..692ab3db0aa42 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -25,9 +25,9 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @inc, align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index fda944e072d4a..714d01315e507 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -29,29 +29,29 @@ define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index c8e3766aa936e..a792d2463e647 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -56,17 +56,17 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 -; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 32 -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 48 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 16 +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 32 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i32>, ptr [[TMP11]], align 4 ; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 16 -; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 32 -; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 48 +; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 16 +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 32 +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP12]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP18]], align 4 @@ -76,9 +76,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; NO-VP-NEXT: [[TMP23:%.*]] = add nsw <16 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 16 -; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 32 -; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 48 +; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 16 +; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 32 +; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 48 ; NO-VP-NEXT: store <16 x i32> [[TMP20]], ptr [[TMP24]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP21]], ptr [[TMP29]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP22]], ptr [[TMP30]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 8184cad22ae8b..26268f1ff4e94 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -18,9 +18,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -36,9 +36,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128) ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i32 4 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i32 8 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i32 12 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i64 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i64 8 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP12]], ptr align 4 [[TMP28]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP13]], ptr align 4 [[TMP33]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr align 4 [[TMP34]], <4 x i1> [[TMP18]]) @@ -88,9 +88,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -102,9 +102,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i32 8 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i32 12 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i64 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i64 8 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD]], ptr align 4 [[TMP24]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD1]], ptr align 4 [[TMP29]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD2]], ptr align 4 [[TMP30]], <4 x i1> [[TMP18]]) diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index a9a0b33f542af..eddd5f9ddc584 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -11,7 +11,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) @@ -27,7 +27,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -80,13 +80,13 @@ define void @test2(ptr noalias %a, ptr noalias %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -151,13 +151,13 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP2]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 1fe3962dfd072..6c63b823b7666 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -130,8 +130,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -177,8 +177,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; INTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; INTER-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; INTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index fb45745eff1cb..4737a56df2735 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -19,7 +19,7 @@ define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr no ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6]] = mul <4 x i8> [[VEC_PHI]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 5d92c127aff93..901652537a5c5 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -16,19 +16,19 @@ define void @cse_replicate_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD2]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD3]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -79,11 +79,11 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 02e1d0e9e7004..6e5213568c735 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -25,7 +25,7 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -133,13 +133,13 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]] -; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP1]], align 4, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274bd043cd86b..c23d28cdd0f3a 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -15,8 +15,8 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP3]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll index 4af9f4a13b62b..50e55f6051485 100644 --- a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll +++ b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll @@ -26,7 +26,7 @@ define void @test(ptr %dst) personality ptr null { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll index 22226a711bcf0..5edd83bd1e0d1 100644 --- a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll @@ -19,8 +19,8 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC3-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 +; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 ; IC3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC3-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC3-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -71,9 +71,9 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 +; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -129,10 +129,10 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC5-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 -; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 16 +; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 +; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 16 ; IC5-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC5-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC5-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index eca39e6f0b6ba..cf2e7ccd1b2f0 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -98,7 +98,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -160,7 +160,7 @@ define void @sink_dead_inst(ptr %a) { ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i16> [[TMP5]], splat (i16 10) ; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[A]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP7]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index cebd52fa7f866..063f47ce2b32d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -33,7 +33,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -41,7 +41,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -231,7 +231,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -485,7 +485,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP6]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -499,7 +499,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP15]], ptr [[TMP17]], align 8 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP16]], ptr [[TMP19]], align 8 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1700,7 +1700,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -1712,7 +1712,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1915,7 +1915,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP8]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 @@ -1943,7 +1943,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2146,7 +2146,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -2160,7 +2160,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP17]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2449,7 +2449,7 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -3218,7 +3218,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 0745f286b2608..0d9d28d079b92 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -127,10 +127,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 73a2203c3115b..eab9df558f608 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1648,8 +1648,8 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll index 4e51d6e9ba708..dee377d61ba30 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll @@ -81,8 +81,8 @@ define i32 @induction_trunc_wrapflags(ptr %p) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 326, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -3 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[VEC_IND]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index bacb8eb10c755..b6fb378a042fd 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -469,20 +469,20 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[OFFSET]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[OFFSET2]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP11]], align 4, !alias.scope [[META7]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP13]], align 4, !alias.scope [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP12]], align 4, !alias.scope [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD5]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD6]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[WIDE_LOAD4]], [[TMP15]] ; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP16]], ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]] -; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -743,7 +743,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -2179,7 +2179,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] @@ -3512,7 +3512,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3897,7 +3897,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4151,7 +4151,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4373,7 +4373,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -4600,7 +4600,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4815,7 +4815,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -5774,7 +5774,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -6131,7 +6131,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 2c97bb7622740..ffe9da09ca680 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -19,10 +19,10 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VEC_IND]], <4 x i64> [[STEP_ADD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 69d2aa4c620c1..2200a7d0431d2 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -16,8 +16,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -52,14 +52,14 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -172,8 +172,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -498,8 +498,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -822,8 +822,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -858,14 +858,14 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll index 24c5602a580da..ba326e254f3cd 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll @@ -82,9 +82,9 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 [[TMP1]] -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b991d58eb2b8d..45c2abd43c36a 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -85,9 +85,9 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -281,9 +281,9 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -424,9 +424,9 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -573,9 +573,9 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 8d3bd267b9482..a071949f82062 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -69,9 +69,9 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -262,9 +262,9 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -455,9 +455,9 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -648,9 +648,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -841,9 +841,9 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -1038,17 +1038,17 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1257,17 +1257,17 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP9]], align 4 @@ -1481,17 +1481,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1706,17 +1706,17 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 162803a377bc0..b3368a46f706d 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1248,8 +1248,8 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 +; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 4b3f370f4cdaf..8d3d0ff7a6406 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -296,14 +296,14 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 @@ -388,8 +388,8 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) @@ -544,8 +544,8 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index ed027e8b9a895..fed5df2b65228 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -62,20 +62,20 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], splat (double 9.900000e+01), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp oge <2 x double> [[TMP4]], splat (double 1.000000e+01) ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp oge <2 x double> [[TMP5]], splat (double 1.000000e+01) -; INTERLEAVE-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x double> [[WIDE_LOAD1]], <2 x double> zeroinitializer, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float>, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float>, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = fptrunc <2 x double> [[TMP8]] to <2 x float>, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2 +; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 2 ; INTERLEAVE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP11]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -154,13 +154,13 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]]) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD1]]) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -220,13 +220,13 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -286,13 +286,13 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD]], i1 true) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD1]], i1 true) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -352,13 +352,13 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -449,10 +449,10 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[VEC_IND]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[STEP_ADD]] -; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 ; INTERLEAVE-NEXT: store <2 x i32> [[VEC_IND1]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <2 x i32> [[STEP_ADD3]], ptr [[TMP5]], align 4 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP0]], align 8 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -582,7 +582,7 @@ define void @noalias_metadata(ptr align 8 %dst, ptr align 8 %src) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP26]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8, !alias.scope [[META14:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i32 1 ; INTERLEAVE-NEXT: store ptr [[TMP8]], ptr [[DST]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll index 47a2a84b44601..120307629c82e 100644 --- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll @@ -14,7 +14,7 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -58,7 +58,7 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -102,7 +102,7 @@ define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -146,7 +146,7 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 440309d246899..5385a83dfac65 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -197,10 +197,10 @@ define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] ; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] -; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 4 -; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2 +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll index 3bf5c0d1d13a9..1f1bbf7c27ada 100644 --- a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll @@ -254,7 +254,7 @@ define void @pr52024(ptr %dst, i16 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 2 ; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -350,7 +350,7 @@ define void @test_expand_secv_in_entry_before_gep(ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OUTER_IV]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[GEP_M]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP3]], align 8 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 30d01e8b790a7..4fab2995f14bd 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -11,14 +11,14 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -78,14 +78,14 @@ define void @test2(ptr nocapture readonly %d) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -165,13 +165,13 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP2]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index b5d74687dc808..1a1c05187590e 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -173,8 +173,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) @@ -203,8 +203,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], [[N]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 5c62ca3ff3d01..d96134e8adf1d 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -38,8 +38,8 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 28d5ef552482b..33b3d263e634a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -56,8 +56,8 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -119,8 +119,8 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i32 -1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 565e203e68f72..3276528e54225 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -125,7 +125,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 ; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 -; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 ; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) @@ -344,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN1]] +; IC1-NEXT: i64 120, label %[[IF_THEN]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN1]]: +; IC1: [[IF_THEN]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN:.*:]] +; IC1: [[IF_THEN1:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -367,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN1]] +; IC2-NEXT: i64 120, label %[[IF_THEN]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN1]]: +; IC2: [[IF_THEN]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN:.*:]] +; IC2: [[IF_THEN1:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -437,7 +437,7 @@ define void @switch_all_to_default(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -497,7 +497,7 @@ define void @switch_unconditional(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index fafa82c211dc6..43dede0b612f3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -37,7 +37,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -115,11 +115,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -203,7 +203,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -286,11 +286,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]]) @@ -381,11 +381,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -469,11 +469,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -551,11 +551,11 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 120, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -632,11 +632,11 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -714,11 +714,11 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -794,11 +794,11 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -875,11 +875,11 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -958,11 +958,11 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1038,7 +1038,7 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 3, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -1122,11 +1122,11 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PREDPHI6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1207,11 +1207,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[_LR_PH:.*]]: +; CHECK-NEXT: [[_LR_PH1:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1231,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1322,11 +1322,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -1416,7 +1416,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1502,7 +1502,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1606,11 +1606,11 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1856,11 +1856,11 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -2005,7 +2005,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2162,7 +2162,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2268,10 +2268,10 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -2284,7 +2284,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2293,7 +2293,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP29]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] @@ -2302,7 +2302,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2 ; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP35]], i32 2 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] @@ -2311,7 +2311,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3 ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP41]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] @@ -2350,14 +2350,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -2379,7 +2379,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]: -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2388,7 +2388,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP47]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF2]]: -; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP50]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE3]] @@ -2397,7 +2397,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP53]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF4]]: -; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP56]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE5]] @@ -2406,7 +2406,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP59]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF6]]: -; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = insertelement <4 x i32> [[TMP58]], i32 [[TMP62]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE7]] @@ -2415,7 +2415,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP40]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP65]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF8]]: -; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE9]] @@ -2424,7 +2424,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP40]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP71]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF10]]: -; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]] ; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP74]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE11]] @@ -2433,7 +2433,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = extractelement <4 x i1> [[TMP40]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP77]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF12]]: -; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP80]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE13]] @@ -2442,7 +2442,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = extractelement <4 x i1> [[TMP40]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP83]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF14]]: -; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP86]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE15]] @@ -2543,10 +2543,10 @@ define i32 @reduction_add_sub(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD3]] @@ -2630,10 +2630,10 @@ define i32 @reduction_sub_add(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD]] @@ -2739,7 +2739,7 @@ define i64 @reduction_expression_same_operands(ptr nocapture readonly %x, ptr no ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64> @@ -2840,7 +2840,7 @@ define i32 @reduction_expression_ext_mulacc_livein(ptr %a, i16 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll index daf4cba197cc2..6a8ea3756b6b0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll @@ -8,8 +8,8 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF3-NEXT: [[SUM1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM1_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[SUM2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM2_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 +; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 ; UF3-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF3-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF3-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 @@ -34,10 +34,10 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF5-NEXT: [[SUM3:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM3_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[SUM4:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM4_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 -; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 12 -; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 16 +; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 +; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 12 +; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 16 ; UF5-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF5-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF5-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index faca86a41b023..a2649053680d2 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -36,8 +36,8 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[IV_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x ptr> [[WIDE_LOAD]], <2 x ptr> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[REVERSE]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 0896848905c6c..71c75e52d4050 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -14,8 +14,8 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -60,8 +60,8 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -106,8 +106,8 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 31129d3bcc2f4..d3e291e4f3ed2 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,10 +20,10 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -74,10 +74,10 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -134,10 +134,10 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 @@ -221,10 +221,10 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> @@ -274,10 +274,10 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 830bd92c70b16..5a1844ac450e7 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -39,12 +39,12 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2_DIFF]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i32 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 -3 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[REVERSE]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE4]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 1c16ef1114371..af272955abbd2 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -99,8 +99,8 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 25f40be238338..5be2b09a504c0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -996,14 +996,14 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 -3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 -3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll index 7811b17f1b7e1..306bcf336e5af 100644 --- a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll @@ -24,7 +24,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; NO-IC: %[[T4:.+]] = add nuw nsw i64 [[OFFSET_IDX]], %tmp0 ; NO-IC: %[[T6:.+]] = sub nsw i64 %[[T4]], %x ; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T6]] -; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i32 4 +; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i64 4 ; NO-IC: load <4 x i32>, ptr %[[T8]], align 4 ; NO-IC: load <4 x i32>, ptr %[[T12]], align 4 ; NO-IC: br {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 64e12cc8c9cb8..fa03c62bb4927 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -88,7 +88,7 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer @@ -307,7 +307,7 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer @@ -593,9 +593,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] -; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]] +; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] @@ -947,7 +947,7 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index e4922d3e4f627..2b352abe9f7a1 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -63,9 +63,9 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -249,9 +249,9 @@ define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -435,9 +435,9 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -621,9 +621,9 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -807,9 +807,9 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index d8e62c7b3b8d4..2183c520d9e81 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: [[VECTOR_BODY]]: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 053863117bdc8..bdf73d6a52c22 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 @@ -88,17 +88,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -183,9 +183,9 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 +; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[NEXT_GEP]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 @@ -268,17 +268,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -365,17 +365,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -467,17 +467,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -569,14 +569,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -586,14 +586,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 0 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 -3 -; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -12 -; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 0 +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 -3 +; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -12 +; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 @@ -698,17 +698,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 @@ -807,14 +807,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -824,14 +824,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 ; VF4IC4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i32 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -12 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 -3 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -12 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 55682bc410527..f76634d954dd3 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1573,13 +1573,13 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index 1e4598e756645..e10cb2794aadb 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -153,8 +153,8 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll index 8123092df1ccc..b9be77ff224ff 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll @@ -53,7 +53,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -81,7 +81,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP25:%.*]] = extractvalue { <2 x i64> } [[TMP13]], 0 ; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { <2 x i64> } [[TMP24]], 0 ; VF2IC2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i32 2 +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 2 ; VF2IC2-NEXT: store <2 x i64> [[TMP25]], ptr [[TMP27]], align 4 ; VF2IC2-NEXT: store <2 x i64> [[TMP26]], ptr [[TMP29]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -166,7 +166,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x float> [[TMP42]], ptr [[TMP45]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_2xf32_replicate( @@ -178,7 +178,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -224,16 +224,16 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP43:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP21]], 1 ; VF2IC2-NEXT: [[TMP44:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP40]], 1 ; VF2IC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds float, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i32 2 +; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP41]], ptr [[TMP45]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP42]], ptr [[TMP47]], align 4 ; VF2IC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[OUT_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 2 +; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP43]], ptr [[TMP48]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP44]], ptr [[TMP50]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -336,7 +336,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x i32> [[TMP63]], ptr [[TMP64]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_3xi32_replicate( @@ -348,7 +348,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 @@ -408,24 +408,24 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP57:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 0 ; VF2IC2-NEXT: [[TMP58:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 0 ; VF2IC2-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[DST_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i32 2 +; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP57]], ptr [[TMP59]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP58]], ptr [[TMP61]], align 4 ; VF2IC2-NEXT: [[TMP62:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 1 ; VF2IC2-NEXT: [[TMP63:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 1 ; VF2IC2-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[DST_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i32 2 +; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP62]], ptr [[TMP64]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP63]], ptr [[TMP66]], align 4 ; VF2IC2-NEXT: [[TMP67:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 2 ; VF2IC2-NEXT: [[TMP68:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 2 ; VF2IC2-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[DST_C]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 2 +; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP67]], ptr [[TMP69]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP68]], ptr [[TMP71]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP72:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -580,7 +580,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8 ; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 ; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll index 9adcba3c0d024..31c37a8abe845 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll @@ -26,7 +26,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF4:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF5:![0-9]+]] ; VF8UF1: [[SCALAR_PH]]: ; VF8UF1-NEXT: br label %[[LOOP:.*]] ; VF8UF1: [[LOOP]]: @@ -38,7 +38,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; VF8UF1: [[EXIT]]: ; VF8UF1-NEXT: ret void ; @@ -53,7 +53,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 2f6c2cc275b72..b1dc6bdcb1d38 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -42,7 +42,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -141,7 +141,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -257,7 +257,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 2317af5619749..cce9ed2783e4e 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -11,7 +11,7 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -44,7 +44,7 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -78,7 +78,7 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -114,7 +114,7 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index bba459f776050..5da6fc3179043 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -512,10 +512,10 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i32 8 +; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[DST]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -1140,7 +1140,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) From 3282c265bfc7ca44f0dfc3602a0b0663f690c085 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 26 Nov 2025 08:27:21 +0000 Subject: [PATCH 3/6] [VPlan] Inline getGEPIndexTy --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2b4cd75bbea19..be938056ea50c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2593,15 +2593,12 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -static Type *getGEPIndexTy(IRBuilderBase &Builder) { - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - return DL.getIndexType(Builder.getPtrTy(0)); -} - void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Type *IndexTy = getGEPIndexTy(Builder); + Value *Ptr = State.get(getOperand(0), VPLane(0)); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(Ptr->getType()); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -2614,7 +2611,6 @@ void VPVectorEndPointerRecipe::execute(VPTransformState &State) { Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1)); if (Stride != 1) LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane); - Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", @@ -2637,8 +2633,9 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Type *IndexTy = getGEPIndexTy(Builder); Value *Ptr = State.get(getOperand(0), VPLane(0)); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(Ptr->getType()); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment, From 9b4b63bcd1889b1b7a149f24cdd40ed6b330ecaf Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 26 Nov 2025 09:43:14 +0000 Subject: [PATCH 4/6] [LV] Revert bad test changes --- .../AArch64/interleaving-load-store.ll | 125 +----------------- .../LoopVectorize/AArch64/intrinsiccost.ll | 9 ++ 2 files changed, 14 insertions(+), 120 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index dba32e350e0f5..f7060ec3512ac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -197,130 +197,15 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 ; INTERLEAVE-2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; INTERLEAVE-2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-2-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; INTERLEAVE-2-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-2: exit: ; INTERLEAVE-2-NEXT: ret void ; ; INTERLEAVE-4-VLA-LABEL: @interleave_single_load_store( -; INTERLEAVE-4-VLA-NEXT: iter.check: -; INTERLEAVE-4-VLA-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 -; INTERLEAVE-4-VLA-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64 -; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; INTERLEAVE-4-VLA: vector.memcheck: -; INTERLEAVE-4-VLA-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]] -; INTERLEAVE-4-VLA-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; INTERLEAVE-4-VLA: vector.main.loop.iter.check: -; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 6 -; INTERLEAVE-4-VLA-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], [[TMP5]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; INTERLEAVE-4-VLA: vector.ph: -; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 64 -; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] -; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[B:%.*]], i64 0 -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i8 [[A:%.*]], i64 0 -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer -; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] -; INTERLEAVE-4-VLA: vector.body: -; INTERLEAVE-4-VLA-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP10]] -; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 -; INTERLEAVE-4-VLA-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP13]] -; INTERLEAVE-4-VLA-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 48 -; INTERLEAVE-4-VLA-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP16]] -; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP11]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP14]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP17]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[TMP18:%.*]] = icmp sgt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; INTERLEAVE-4-VLA-NEXT: [[TMP19:%.*]] = icmp sgt [[WIDE_LOAD6]], [[BROADCAST_SPLAT]] -; INTERLEAVE-4-VLA-NEXT: [[TMP20:%.*]] = icmp sgt [[WIDE_LOAD7]], [[BROADCAST_SPLAT]] -; INTERLEAVE-4-VLA-NEXT: [[TMP21:%.*]] = icmp sgt [[WIDE_LOAD8]], [[BROADCAST_SPLAT]] -; INTERLEAVE-4-VLA-NEXT: [[TMP22:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD]], [[BROADCAST_SPLAT5]]) -; INTERLEAVE-4-VLA-NEXT: [[TMP23:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD6]], [[BROADCAST_SPLAT5]]) -; INTERLEAVE-4-VLA-NEXT: [[TMP24:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD7]], [[BROADCAST_SPLAT5]]) -; INTERLEAVE-4-VLA-NEXT: [[TMP25:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_LOAD8]], [[BROADCAST_SPLAT5]]) -; INTERLEAVE-4-VLA-NEXT: [[TMP26:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], [[TMP22]] -; INTERLEAVE-4-VLA-NEXT: [[TMP27:%.*]] = select [[TMP19]], [[BROADCAST_SPLAT]], [[TMP23]] -; INTERLEAVE-4-VLA-NEXT: [[TMP28:%.*]] = select [[TMP20]], [[BROADCAST_SPLAT]], [[TMP24]] -; INTERLEAVE-4-VLA-NEXT: [[TMP29:%.*]] = select [[TMP21]], [[BROADCAST_SPLAT]], [[TMP25]] -; INTERLEAVE-4-VLA-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP32]] -; INTERLEAVE-4-VLA-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP35:%.*]] = shl nuw i64 [[TMP34]], 5 -; INTERLEAVE-4-VLA-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP35]] -; INTERLEAVE-4-VLA-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 48 -; INTERLEAVE-4-VLA-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i64 [[TMP38]] -; INTERLEAVE-4-VLA-NEXT: store [[TMP26]], ptr [[TMP30]], align 1 -; INTERLEAVE-4-VLA-NEXT: store [[TMP27]], ptr [[TMP33]], align 1 -; INTERLEAVE-4-VLA-NEXT: store [[TMP28]], ptr [[TMP36]], align 1 -; INTERLEAVE-4-VLA-NEXT: store [[TMP29]], ptr [[TMP39]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; INTERLEAVE-4-VLA-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; INTERLEAVE-4-VLA: middle.block: -; INTERLEAVE-4-VLA-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; INTERLEAVE-4-VLA: vec.epilog.iter.check: -; INTERLEAVE-4-VLA-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; INTERLEAVE-4-VLA: vec.epilog.ph: -; INTERLEAVE-4-VLA-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 -; INTERLEAVE-4-VLA-NEXT: [[N_VEC10:%.*]] = sub i64 [[N]], [[N_MOD_VF9]] -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0 -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT11]], <8 x i8> poison, <8 x i32> zeroinitializer -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0 -; INTERLEAVE-4-VLA-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT13]], <8 x i8> poison, <8 x i32> zeroinitializer -; INTERLEAVE-4-VLA-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; INTERLEAVE-4-VLA: vec.epilog.vector.body: -; INTERLEAVE-4-VLA-NEXT: [[INDEX15:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; INTERLEAVE-4-VLA-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX15]] -; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i8>, ptr [[TMP41]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[TMP42:%.*]] = icmp sgt <8 x i8> [[WIDE_LOAD16]], [[BROADCAST_SPLAT12]] -; INTERLEAVE-4-VLA-NEXT: [[TMP43:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[WIDE_LOAD16]], <8 x i8> [[BROADCAST_SPLAT14]]) -; INTERLEAVE-4-VLA-NEXT: [[TMP44:%.*]] = select <8 x i1> [[TMP42]], <8 x i8> [[BROADCAST_SPLAT12]], <8 x i8> [[TMP43]] -; INTERLEAVE-4-VLA-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX15]] -; INTERLEAVE-4-VLA-NEXT: store <8 x i8> [[TMP44]], ptr [[TMP45]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX15]], 8 -; INTERLEAVE-4-VLA-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC10]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[TMP46]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; INTERLEAVE-4-VLA: vec.epilog.middle.block: -; INTERLEAVE-4-VLA-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[CMP_N18]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; INTERLEAVE-4-VLA: vec.epilog.scalar.ph: -; INTERLEAVE-4-VLA-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; INTERLEAVE-4-VLA-NEXT: br label [[LOOP:%.*]] -; INTERLEAVE-4-VLA: loop: -; INTERLEAVE-4-VLA-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; INTERLEAVE-4-VLA-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] -; INTERLEAVE-4-VLA-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[CMP:%.*]] = icmp sgt i8 [[L]], [[B]] -; INTERLEAVE-4-VLA-NEXT: [[MAX:%.*]] = tail call i8 @llvm.smax.i8(i8 [[L]], i8 [[A]]) -; INTERLEAVE-4-VLA-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[B]], i8 [[MAX]] -; INTERLEAVE-4-VLA-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; INTERLEAVE-4-VLA-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 -; INTERLEAVE-4-VLA-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; INTERLEAVE-4-VLA-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-4-VLA-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] -; INTERLEAVE-4-VLA: exit: -; INTERLEAVE-4-VLA-NEXT: ret void +; INTERLEAVE-4-VLA: call @llvm.smax.nxv16i8( +; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( +; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( +; INTERLEAVE-4-VLA-NEXT: call @llvm.smax.nxv16i8( ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index 89cc8e3461031..3eb42845bec4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -7,6 +7,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" ; CHECK-COST-LABEL: sadd +; CHECK-COST: Found an estimated cost of 6 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) +; CHECK-COST: Cost of 4 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) +; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) +; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.sadd.sat(ir<%0>, ir<%offset>) define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @saddsat( @@ -127,6 +131,11 @@ while.end: ; preds = %while.body, %entry } ; CHECK-COST-LABEL: umin +; CHECK-COST: Found an estimated cost of 2 for VF 1 For instruction: %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset) +; CHECK-COST: Cost of 1 for VF 2: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) +; CHECK-COST: Cost of 1 for VF 4: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) +; CHECK-COST: Cost of 1 for VF 8: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) +; CHECK-COST: Cost of 1 for VF 16: WIDEN-INTRINSIC ir<%1> = call llvm.umin(ir<%0>, ir<%offset>) define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { From b30f5547f45d315df9aa7e3d87bd9e030909ece2 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 26 Nov 2025 09:59:35 +0000 Subject: [PATCH 5/6] [LV] Update to pre-commit test --- .../vector-pointer-gep-idxty-addrspace.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll index ed3b91725561f..4f459209b3683 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll @@ -13,9 +13,9 @@ define void @vector_pointer_gep_idxty_addrspace(ptr addrspace(1) noalias %a, ptr ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP2]], align 4 @@ -25,9 +25,9 @@ define void @vector_pointer_gep_idxty_addrspace(ptr addrspace(1) noalias %a, ptr ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD3]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 6 ; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[TMP8]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(1) [[TMP9]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP10]], align 4 From 1fbd67ab64289750e1330b50d7ed357925c1f0fa Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 26 Nov 2025 11:32:34 +0000 Subject: [PATCH 6/6] [VPlan] Use VPTypeAnalysis::inferScalarType --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index be938056ea50c..1c88b56ca89dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2596,9 +2596,8 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Value *Ptr = State.get(getOperand(0), VPLane(0)); const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = DL.getIndexType(Ptr->getType()); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -2611,6 +2610,7 @@ void VPVectorEndPointerRecipe::execute(VPTransformState &State) { Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1)); if (Stride != 1) LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane); + Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", @@ -2633,9 +2633,9 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Value *Ptr = State.get(getOperand(0), VPLane(0)); const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = DL.getIndexType(Ptr->getType()); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); + Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment,