From 5b9843fcec8aaac492a91a8d7405c9dc15dd10dd Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 28 Jan 2025 13:40:25 +0000 Subject: [PATCH] [LoopVectorize][NFC] Disable output for tests that don't need it There are a lot of tests that do not depend upon the IR output for validation, relying instead on the debug output. For these tests we can add the -disable-output command line argument. --- .../LoopVectorize/AArch64/no_vector_instructions.ll | 2 +- .../AArch64/scalable-vectorization-cost-tuning.ll | 10 +++++----- .../LoopVectorize/AArch64/scalable-vectorization.ll | 6 +++--- .../LoopVectorize/AArch64/streaming-vectorization.ll | 4 ++-- .../AArch64/sve2-histcnt-outerloop-scevaddrec.ll | 2 +- .../LoopVectorize/AArch64/sve2-histcnt-vplan.ll | 2 +- .../LoopVectorize/ARM/arm-ieee-vectorize.ll | 8 ++++---- .../Transforms/LoopVectorize/RISCV/force-vect-msg.ll | 2 +- .../Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll | 2 +- .../Transforms/LoopVectorize/RISCV/reg-usage-f16.ll | 4 ++-- .../test/Transforms/LoopVectorize/RISCV/reg-usage.ll | 10 +++++----- .../LoopVectorize/RISCV/riscv-interleaved.ll | 2 +- .../vectorize-force-tail-with-evl-reduction-cost.ll | 2 +- .../X86/CostModel/gather-i16-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/gather-i32-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/gather-i64-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/gather-i8-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/interleaved-load-f32-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f32-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-load-f64-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i16-stride-2.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-3.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-4.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-5.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-6.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-7.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i16-stride-8.ll | 10 +++++----- .../interleaved-load-i32-stride-2-indices-0u.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-2.ll | 8 ++++---- .../interleaved-load-i32-stride-3-indices-01u.ll | 8 ++++---- .../interleaved-load-i32-stride-3-indices-0uu.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-3.ll | 8 ++++---- .../interleaved-load-i32-stride-4-indices-012u.ll | 8 ++++---- .../interleaved-load-i32-stride-4-indices-01uu.ll | 8 ++++---- .../interleaved-load-i32-stride-4-indices-0uuu.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i32-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i64-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-load-i8-stride-2.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-3.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-4.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-5.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-6.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-7.ll | 10 +++++----- .../X86/CostModel/interleaved-load-i8-stride-8.ll | 10 +++++----- .../X86/CostModel/interleaved-store-f32-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f32-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-store-f64-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i16-stride-2.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-3.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-4.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-5.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-6.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-7.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i16-stride-8.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i32-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i32-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-2.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-3.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-4.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-5.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-6.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-7.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i64-stride-8.ll | 8 ++++---- .../X86/CostModel/interleaved-store-i8-stride-2.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-3.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-4.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-5.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-6.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-7.ll | 10 +++++----- .../X86/CostModel/interleaved-store-i8-stride-8.ll | 10 +++++----- .../X86/CostModel/masked-gather-i32-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/masked-gather-i64-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/masked-interleaved-load-i16.ll | 4 ++-- .../X86/CostModel/masked-interleaved-store-i16.ll | 4 ++-- .../LoopVectorize/X86/CostModel/masked-load-i16.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-load-i32.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-load-i64.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-load-i8.ll | 12 ++++++------ .../CostModel/masked-scatter-i32-with-i8-index.ll | 12 ++++++------ .../CostModel/masked-scatter-i64-with-i8-index.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-store-i16.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-store-i32.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-store-i64.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/masked-store-i8.ll | 12 ++++++------ .../X86/CostModel/scatter-i16-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/scatter-i32-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/scatter-i64-with-i8-index.ll | 12 ++++++------ .../X86/CostModel/scatter-i8-with-i8-index.ll | 12 ++++++------ .../LoopVectorize/X86/CostModel/strided-load-i16.ll | 2 +- .../LoopVectorize/X86/CostModel/strided-load-i32.ll | 2 +- .../LoopVectorize/X86/CostModel/strided-load-i64.ll | 2 +- .../LoopVectorize/X86/CostModel/strided-load-i8.ll | 2 +- .../LoopVectorize/explicit_outer_detection.ll | 2 +- .../LoopVectorize/explicit_outer_nonuniform_inner.ll | 2 +- .../explicit_outer_uniform_diverg_branch.ll | 2 +- .../Transforms/LoopVectorize/loop-vect-memdep.ll | 2 +- llvm/test/Transforms/LoopVectorize/nounroll.ll | 2 +- .../optimal-epilog-vectorization-limitations.ll | 2 +- .../optimal-epilog-vectorization-scalable.ll | 2 +- .../Transforms/LoopVectorize/scalable-vf-hint.ll | 2 +- .../Transforms/LoopVectorize/scalarized-bitcast.ll | 2 +- .../LoopVectorize/uncountable-single-exit-loops.ll | 2 +- llvm/test/Transforms/LoopVectorize/vect.stats.ll | 2 +- .../LoopVectorize/vplan-unused-interleave-group.ll | 2 +- 141 files changed, 556 insertions(+), 556 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll index 089d279d15245..ebc1f719203de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize --disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll index 1565d1ce982e3..c4aee69db70b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll @@ -1,22 +1,22 @@ ; REQUIRES: asserts ; RUN: opt -mtriple=aarch64 -mattr=+sve \ -; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=VSCALEFORTUNING1 ; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic \ -; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=VSCALEFORTUNING1 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 \ -; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=VSCALEFORTUNING2 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \ -; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=VSCALEFORTUNING1 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v2 \ -; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output < %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=NEOVERSEV2 ; VSCALEFORTUNING1: Cost for VF vscale x 2: 11 (Estimated cost per lane: 5. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll index e83eb729b521c..dcb2b9b08d1e9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON -; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON +; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW ; Test that the MaxVF for the following loop, that has no dependence distances, ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll index 924d4bfb7836a..a26404eabedef 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOVEC -; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -enable-scalable-autovec-in-streaming-mode < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,VEC +; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOVEC +; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize --disable-output -enable-scalable-autovec-in-streaming-mode < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,VEC target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-outerloop-scevaddrec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-outerloop-scevaddrec.ll index 02bbfa839e690..f118de1128a11 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-outerloop-scevaddrec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-outerloop-scevaddrec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; REQUIRES: asserts target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index c119248c0be43..55f82fd55daf4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; REQUIRES: asserts target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll index aee0aa4b271f0..44a48a9c262f5 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple armv7-linux-gnueabihf -passes=loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX -; RUN: opt -mtriple armv8-linux-gnu -passes=loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX -; RUN: opt -mtriple armv8.1.m-none-eabi -mattr=+mve.fp -passes=loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=MVE -; RUN: opt -mtriple armv7-unknwon-darwin -passes=loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=DARWIN +; RUN: opt -mtriple armv7-linux-gnueabihf -passes=loop-vectorize -S %s -debug-only=loop-vectorize --disable-output -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX +; RUN: opt -mtriple armv8-linux-gnu -passes=loop-vectorize -S %s -debug-only=loop-vectorize --disable-output -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX +; RUN: opt -mtriple armv8.1.m-none-eabi -mattr=+mve.fp -passes=loop-vectorize -S %s -debug-only=loop-vectorize --disable-output -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=MVE +; RUN: opt -mtriple armv7-unknwon-darwin -passes=loop-vectorize -S %s -debug-only=loop-vectorize --disable-output -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=DARWIN ; REQUIRES: asserts target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/force-vect-msg.ll b/llvm/test/Transforms/LoopVectorize/RISCV/force-vect-msg.ll index 09a1aaab6cc2d..f68ffd1aea3cd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/force-vect-msg.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/force-vect-msg.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -riscv-v-vector-bits-min=128 -mattr="+v" -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -riscv-v-vector-bits-min=128 -mattr="+v" -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; CHECK: LV: Loop hints: force=enabled ; CHECK: LV: Scalar loop costs: 4. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll index 71ee00fbf3ec5..40d6e8bc33471 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { ; CHECK-LABEL: add diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll index c58307d960786..e07c7b6b40729 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) { ; CHECK-LABEL: add diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 5e3de92bca885..a2f55e49c9e0e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -1,22 +1,22 @@ ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -force-vector-width=1 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 95eb67b3823ee..c332e2172b077 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+v -debug-only=loop-vectorize \ +; RUN: -mattr=+v -debug-only=loop-vectorize --disable-output \ ; RUN: -riscv-v-vector-bits-min=128 -scalable-vectorization=off -S < %s 2>&1 | FileCheck %s ; CHECK-LABEL: foo diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll index 69418d9fea00e..15f99931d165f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize --disable-output \ ; RUN: -force-tail-folding-style=data-with-evl \ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ ; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll index 1c427a0107df1..93d25a7ae9469 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB = load i16, ptr %inB, align 2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll index 8cde13761bc54..a23d57cfd4448 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB = load i32, ptr %inB, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll index 3435df1b494c6..5e52483aa93b3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB = load i64, ptr %inB, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll index 7aeeff6518416..81d646715160a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB = load i8, ptr %inB, align 1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll index 41ad1ebcf6ffb..a3aab8274391a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll index 2f6d958e87530..ed51006a89543 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll index 4ae991958deea..efa8cd7357684 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll index 57ae02abc1199..5a17a00d16d96 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll index 155f42f8f23c8..46ff5001d6572 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll index 2cc5150f3c887..5a145dafc831d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll index 1899741b8a3b4..1e7dde431db25 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll index 2d4b300a8100a..d76e101033f6a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll index 5dfb25e25d6e6..a7d5f0ca881ce 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll index bd88ca810728b..e7619f14f63f1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll index 9c0798631fdba..243a476f174da 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll index 99a735d3f552c..9849062fcb48e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll index 168e9166ea1dd..8b4ac35bed12e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll index 919a17e8729e0..6050edec2900c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll index 9fcd04bd52def..56ed11d68b117 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll index 9a4f8417921e7..f996f35240747 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll index 4a7a5a2dbc9b9..7ee418e7b12ad 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll index 6737c722b46ff..60db7a55eedd5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll index 1e5b242433b3e..00367457ae3eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll index 46d56a75f1c4d..b7f49ef1710f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll index 4d65abdaf688c..1d99bcd5bec02 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll index 12ffe4b1632ed..312ed6d8536ef 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll index 9c055eabe0816..94d1e685ee670 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll index 382e5bfcde341..063bf0819f0a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll index 3c6401715311e..2754efe4a85ab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll index ee3c510991503..fd8620757835a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll index 17dad2598dc82..c095dcc458c0b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll index 28a6443efcfb9..7fc088bebf9c3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll index 55339f958bc2a..3ea6f71ee88ba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll index d95ab28c1ff74..8d76e696ed615 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll index 5cad7bf662c5b..cd451d0cb70bc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll index 0f1fc532b6df4..58fba399064eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll index cfb83d4a02365..26613caf76faa 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll index 775785462de47..46f4351dff0bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll index cf350cc9f8307..866db993c2ec0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll index 9ca0d8c9d7e33..7c575b6dc8f37 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll index 86ee6c8b30bda..4089026b957ad 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll index f6143d4ae9f3b..60fb3bf6ea959 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll index 43dc53d683de3..f2d5bc8eafe85 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll index 70ed74dcc26da..c741e473e7739 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll index 401e4de111d73..68370852ce85a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll index 66430532d863f..2799ab4b5e82b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll index 508a8a6acecec..74f4a959d3964 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll index 509562c19114d..d2d73d33df7ca 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll index ef3c80c27550a..5207c5b2019fd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll index ed8bc84e771f8..fd267ddd19229 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll index 8e7c316b69b3a..bcf93dc2d005b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll index 752cc229922be..c03f09e6a003e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll index 6076aa274623a..e1698c4d0c6cf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll index ce0289998100b..787f448ad9651 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll index e0fa19298c577..4b85900e031d3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll index 940864a59a93a..0db57f73f5756 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll index 3d947e13b3732..3693ac5771929 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll index ffa4cab905cea..bb61c8810e292 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll index 2ad37bee35bed..c1fe64e324378 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v7, ptr %out7" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll index 30faeb74644be..d73f73b826c88 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll index a4405db7e8099..c8e6f78676d14 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll index aea5871ec8b87..cf1aabbd5d877 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll index 888ba09ea8452..bad099868de31 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll index 6200f8fec3201..c3b552b0811ad 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll index b8258ae845954..f195c6adf7743 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll index ed2bb3f750b01..fb0f86e1db7a6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v., ptr %out." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll index 70aa1a017605e..d21798f5bb70e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll index 03bffeb36eedc..a216313cf53ee 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll index 58b742521cfaa..b4d57ca288256 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll index b12b9ddf09529..bbe249e11501a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll index c41493d916fb9..2b5ad53a13244 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll index 2a4d1bbf77cd5..781aa39a2d961 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll index ada56b16827b1..f524cee862167 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v7, ptr %out7" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll index cc697305e3b0e..e27e45c753dc8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll index 47bed35fd42bc..0c0e746cb503d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll index 6e19bc197e240..0d4823d965ca4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll index d109b746efc42..4f04f41daec71 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll index 93ab0b7e10369..9e7570850976b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll index 8a01e2fbceaa2..70b60cb815fec 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll index cf0efab954813..570431577cf46 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v7, ptr %out7" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll index 895d7a7623dba..8352b2f415708 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll index c4cc65212ffa9..5b5d1ddb6a61c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll index 6b7be24b54f6b..78dbca343a217 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll index 52a4d4f9eb658..4b7fe9d580d0d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll index 14f36529a13b3..e8f66d10499e7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll index 41a45a3072c95..7e9e4347cd665 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll index a539877104129..6bf88fce8df80 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v., ptr %out." -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512 ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll index 78105db5e5ad3..eac3d14df3c40 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v1, ptr %out1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll index 8e6892d0158e7..e2fa7eb32fe75 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v2, ptr %out2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll index 3083dde7ac66f..21f010df4ee0f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v3, ptr %out3" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll index d788a7b8d469e..0ea9060365f58 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v4, ptr %out4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll index f3ec189573a77..318af2369c5f4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v5, ptr %out5" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll index ed04d806bf415..659d4308a0b5e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v6, ptr %out6" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll index 62f8734c487a0..c066a686efc43 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v7, ptr %out7" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512DQ +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512vl,+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX512BW ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll index d3d439f37c440..caf8f10d1169c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i32, ptr %inB, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll index f1dec928391eb..c70ab9962785b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i64, ptr %inB, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-SLOWGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2-FASTGATHER +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll index e867e5f5bcfc7..665b0c3bac040 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2" -; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED -; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED ; REQUIRES: asserts target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll index 41dd266d0a87a..a286df9bc2fc7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2" -; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED -; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED ; REQUIRES: asserts target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll index 5f910a6dfaa61..4f3f141de3adb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i16, ptr %inB, align 2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll index 095444a727fe8..6d613871639bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i32, ptr %inB, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll index cd161fcdeaeae..c0d1144fab800 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i64, ptr %inB, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll index 4c2cbc41e53ff..65075cd094527 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%valB.loaded = load i8, ptr %inB, align 1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll index 95d6d457ee4d8..5e67bd57754e4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll index 3ae93b14f0d62..629ee1d2711ae 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll index fa7c3d9fcdcab..1d51a32a520a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll index 0efaefb1db5ce..f011d06d319bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll index 9bd452fffcd3d..c004b16ae207d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll index 48130f0b57bec..8bbe624849783 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll index 3fd22a89d3cd0..bd6b16831d09c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %valB, ptr %out, align 2" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll index 45732f3d76745..de76eb0782c0d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %valB, ptr %out, align 4" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll index 08033ccd200e2..c69711d1b71d5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %valB, ptr %out, align 8" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll index 344d39631ff70..4f62383d67927 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %valB, ptr %out, align 1" -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse4.2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,-fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2,+fast-gather --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefixes=AVX512 ; REQUIRES: asserts diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll index fee6e279877ed..a7ddbe2ed2c6b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -S -mattr=avx512bw --debug-only=loop-vectorize < %s 2>&1| FileCheck %s +; RUN: opt -passes=loop-vectorize -S -mattr=avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1| FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll index 1ef918c83c359..89920bb83eca9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -S -mattr=avx512f --debug-only=loop-vectorize < %s 2>&1| FileCheck %s +; RUN: opt -passes=loop-vectorize -S -mattr=avx512f --debug-only=loop-vectorize --disable-output < %s 2>&1| FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll index 9efc0737d5592..8428eadf0d280 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -S -mattr=avx512f --debug-only=loop-vectorize < %s 2>&1| FileCheck %s +; RUN: opt -passes=loop-vectorize -S -mattr=avx512f --debug-only=loop-vectorize --disable-output < %s 2>&1| FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll index 018f6e94e02d9..85e189c88aca2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -S -mattr=avx512bw --debug-only=loop-vectorize < %s 2>&1| FileCheck %s +; RUN: opt -passes=loop-vectorize -S -mattr=avx512bw --debug-only=loop-vectorize --disable-output < %s 2>&1| FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll index e01115888446a..35bd58ccbe00e 100644 --- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll +++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; Verify that outer loops annotated only with the expected explicit diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll index 1253bbff85fd4..3fddc2fc3f108 100644 --- a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll +++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; Verify that LV bails out on explicit vectorization outer loops that contain diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll index ac87c1500114b..2131beb2932c9 100644 --- a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll +++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; Verify that LV can handle explicit vectorization outer loops with uniform branches diff --git a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll index cd4d63be4d1a8..789e9c0adb63f 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll @@ -1,6 +1,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; RUN: opt < %s -S -passes=loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s +; RUN: opt < %s -S -passes=loop-vectorize -debug-only=loop-vectorize --disable-output 2>&1 | FileCheck %s ; REQUIRES: asserts ; CHECK: LV: Can't vectorize due to memory conflicts diff --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll index 7e4decb4dedb0..f6349404536b7 100644 --- a/llvm/test/Transforms/LoopVectorize/nounroll.ll +++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll index 331aaed602aa2..a4b96c96c62f3 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll index 336bba76000c3..05bc0f4addec5 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes='loop-vectorize' -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-vectorize' -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize --disable-output -S -scalable-vectorization=on 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" diff --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll index e51e49e1cde7c..d956a24e0e64a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize --disable-output -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll index 0a82c0a3e4cc1..1a36807c1c1ac 100644 --- a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll +++ b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize --disable-output -S -o - < %s 2>&1 | FileCheck %s %struct.foo = type { i32, i64 } diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll index c3958262f0af8..de08e333192df 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -p loop-vectorize -debug-only=loop-vectorize -S %s 2>&1 | FileCheck %s +; RUN: opt -p loop-vectorize -debug-only=loop-vectorize --disable-output -S %s 2>&1 | FileCheck %s ; CHECK-LABEL: LV: Checking a loop in 'latch_exit_cannot_compute_btc_due_to_step' diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll index 05aa13b22b6f4..9a55dc99c316b 100644 --- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll +++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll index 9778ef6853a70..cd63ca3368e30 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -passes=loop-vectorize -S -force-vector-width=4 -enable-interleaved-mem-accesses=true -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -S -force-vector-width=4 -enable-interleaved-mem-accesses=true -debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s ; REQUIRES: asserts ; This test checks if an unused interleave group is removed by removeDeadRecipes.