From ddfb8a8dbbd253cfbae0dd74fdb5dd9dde784869 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 11 Dec 2024 15:35:05 +0000 Subject: [PATCH] [LoopVectorize] Make needsExtract notice scalarized instructions LoopVectorizationCostModel::needsExtract should recognise instructions that have been widened by scalarizing as scalar instructions, and thus not needing an extract when used by later scalarized instructions. This fixes an incorrect cost calculation in computePredInstDiscount, where we are adding a scalarization overhead cost when we shouldn't, though I haven't come up with a test case where it makes a difference. It will make a difference when the cost model switches to using the cost kind TCK_CodeSize for optsize, as not doing this causes the test LoopVectorize/X86/small-size.ll to get worse. --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../LoopVectorize/AArch64/interleaved_cost.ll | 4 +- .../LoopVectorize/ARM/mve-interleaved-cost.ll | 320 +++++++++--------- 3 files changed, 164 insertions(+), 163 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 754f597b7536b..48566676168fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1744,7 +1744,8 @@ class LoopVectorizationCostModel { bool needsExtract(Value *V, ElementCount VF) const { Instruction *I = dyn_cast(V); if (VF.isScalar() || !I || !TheLoop->contains(I) || - TheLoop->isLoopInvariant(I)) + TheLoop->isLoopInvariant(I) || + getWideningDecision(I, VF) == CM_Scalarize) return false; // Assume we can vectorize V (and hence we need extraction) if the diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index dec124b55cd4e..a550f1ca14c8b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -170,8 +170,8 @@ entry: ; VF_2-LABEL: Checking a loop in 'i64_factor_8' ; VF_2: Found an estimated cost of 8 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll index 976c6a9a570af..551b85b7d0357 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll @@ -17,8 +17,8 @@ entry: ; VF_2-LABEL: Checking a loop in 'i8_factor_2' ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 @@ -58,8 +58,8 @@ entry: ; VF_2-LABEL: Checking a loop in 'i16_factor_2' ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 @@ -99,8 +99,8 @@ entry: ; VF_2-LABEL: Checking a loop in 'i32_factor_2' ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 @@ -140,23 +140,23 @@ entry: ; VF_2-LABEL: Checking a loop in 'i64_factor_2' ; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_2' ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_2' ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_2' ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0 @@ -181,8 +181,8 @@ entry: ; VF_2-LABEL: Checking a loop in 'f16_factor_2' ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2 ; VF_4-LABEL: Checking a loop in 'f16_factor_2' ; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2 @@ -263,23 +263,23 @@ entry: ; VF_2-LABEL: Checking a loop in 'f64_factor_2' ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_2' ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_2' ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_2' ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0 @@ -309,30 +309,30 @@ entry: ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_3' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_8-LABEL: Checking a loop in 'i8_factor_3' ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_3' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -361,30 +361,30 @@ entry: ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_3' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_8-LABEL: Checking a loop in 'i16_factor_3' ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2 ; VF_16-LABEL: Checking a loop in 'i16_factor_3' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0 @@ -413,9 +413,9 @@ entry: ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_3' ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 @@ -427,16 +427,16 @@ entry: ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4 ; VF_16-LABEL: Checking a loop in 'i32_factor_3' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0 @@ -465,30 +465,30 @@ entry: ; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_3' ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_3' ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_3' ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0 @@ -517,9 +517,9 @@ entry: ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2 ; VF_4-LABEL: Checking a loop in 'f16_factor_3' ; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2 @@ -621,30 +621,30 @@ entry: ; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_3' ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_3' ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_3' ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0 @@ -677,37 +677,37 @@ entry: ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_4-LABEL: Checking a loop in 'i8_factor_4' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_8-LABEL: Checking a loop in 'i8_factor_4' ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_4' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 @@ -740,37 +740,37 @@ entry: ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_4-LABEL: Checking a loop in 'i16_factor_4' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_8-LABEL: Checking a loop in 'i16_factor_4' ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2 ; VF_16-LABEL: Checking a loop in 'i16_factor_4' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0 @@ -803,10 +803,10 @@ entry: ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_4' ; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 @@ -821,19 +821,19 @@ entry: ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4 ; VF_16-LABEL: Checking a loop in 'i32_factor_4' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 ; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0 @@ -866,37 +866,37 @@ entry: ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_4' ; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_4' ; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_4' ; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0 @@ -1055,37 +1055,37 @@ entry: ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_4-LABEL: Checking a loop in 'f64_factor_4' ; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_8-LABEL: Checking a loop in 'f64_factor_4' ; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8 ; VF_16-LABEL: Checking a loop in 'f64_factor_4' ; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0