From 59278c514a02c9fb09e7e04f67aa4e32fe408b49 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 28 Jan 2025 16:50:15 +0000 Subject: [PATCH] [CostModel][X86] Reduce worst case v8i16/v16i8 SSE2 shuffle costs These were based off instruction count, not throughput - we can probably improve these further, but these throughput numbers match the worse expanded shuffles we see in the vector-shuffle-128-v* codegen tests. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 14 +- .../test/Analysis/CostModel/X86/reduce-xor.ll | 10 +- llvm/test/Analysis/CostModel/X86/reduction.ll | 12 +- .../X86/shuffle-extract_subvector.ll | 12 +- .../CostModel/X86/shuffle-insert_subvector.ll | 46 +++--- .../CostModel/X86/shuffle-single-src.ll | 14 +- .../CostModel/X86/shuffle-transpose.ll | 12 +- .../Analysis/CostModel/X86/shuffle-two-src.ll | 14 +- .../X86/vector-insert-inseltpoison.ll | 104 ++++++------- .../CostModel/X86/vector-insert-value.ll | 144 +++++++++--------- .../Analysis/CostModel/X86/vector-insert.ll | 104 ++++++------- .../test/Transforms/PhaseOrdering/X86/hadd.ll | 11 +- .../test/Transforms/PhaseOrdering/X86/hsub.ll | 11 +- .../X86/load-partial-vector-shuffle.ll | 14 +- .../VectorCombine/X86/extract-cmp-binop.ll | 20 +-- 15 files changed, 260 insertions(+), 282 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 82523bb6557ad..f6552c500d7cf 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2204,19 +2204,19 @@ InstructionCost X86TTIImpl::getShuffleCost( {TTI::SK_PermuteSingleSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_PermuteSingleSrc, MVT::v2i64, {1, 1, 1, 1}}, // pshufd {TTI::SK_PermuteSingleSrc, MVT::v4i32, {1, 1, 1, 1}}, // pshufd - {TTI::SK_PermuteSingleSrc, MVT::v8i16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw + {TTI::SK_PermuteSingleSrc, MVT::v8i16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw // + pshufd/unpck - {TTI::SK_PermuteSingleSrc, MVT::v8f16, {5, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw + {TTI::SK_PermuteSingleSrc, MVT::v8f16, {3, 5, 5, 5}}, // 2*pshuflw + 2*pshufhw // + pshufd/unpck - {TTI::SK_PermuteSingleSrc, MVT::v16i8, {10, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw - // + 2*pshufd + 2*unpck + 2*packus + {TTI::SK_PermuteSingleSrc, MVT::v16i8, {8, 10, 10, 10}}, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + 2*packus {TTI::SK_PermuteTwoSrc, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_PermuteTwoSrc, MVT::v2i64, {1, 1, 1, 1}}, // shufpd {TTI::SK_PermuteTwoSrc, MVT::v4i32, {2, 2, 2, 2}}, // 2*{unpck,movsd,pshufd} - {TTI::SK_PermuteTwoSrc, MVT::v8i16, {8, 8, 8, 8}}, // blend+permute - {TTI::SK_PermuteTwoSrc, MVT::v8f16, {8, 8, 8, 8}}, // blend+permute - {TTI::SK_PermuteTwoSrc, MVT::v16i8, {13, 13, 13, 13}}, // blend+permute + {TTI::SK_PermuteTwoSrc, MVT::v8i16, {6, 8, 8, 8}}, // blend+permute + {TTI::SK_PermuteTwoSrc, MVT::v8f16, {6, 8, 8, 8}}, // blend+permute + {TTI::SK_PermuteTwoSrc, MVT::v16i8, {11, 13, 13, 13}}, // blend+permute }; static const CostTblEntry SSE3BroadcastLoadTbl[] = { diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll index f1c8bcda65bd1..757db4a5a41be 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll @@ -157,11 +157,11 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' diff --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll index 5ff3920c63874..4ad0887a27884 100644 --- a/llvm/test/Analysis/CostModel/X86/reduction.ll +++ b/llvm/test/Analysis/CostModel/X86/reduction.ll @@ -634,9 +634,9 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-LABEL: 'no_pairwise_reduction8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 @@ -1126,11 +1126,11 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-LABEL: 'pairwise_reduction8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index e91db1b0cb14a..f0536f96941ac 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -251,7 +251,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> @@ -275,7 +275,7 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -899,9 +899,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -923,9 +923,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll index bc3af51914499..a9443cfe0f97b 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -527,23 +527,23 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -858,19 +858,19 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index ed3ddc060c167..595826cd6e8d5 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -193,10 +193,10 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' @@ -285,9 +285,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll index 5c50ec1feb058..8663659d89cb3 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -128,9 +128,9 @@ define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16 ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' @@ -202,9 +202,9 @@ define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index efa0f2eb8dc94..67c753b77f134 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -237,10 +237,10 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' @@ -329,9 +329,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll index 9caaae0717fda..ed034d7bccc94 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll @@ -674,25 +674,25 @@ define i32 @insert_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i8' @@ -707,25 +707,25 @@ define i32 @insert_i8(i32 %arg) { ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i8' @@ -978,25 +978,25 @@ define i32 @insert_i1(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i1' @@ -1011,25 +1011,25 @@ define i32 @insert_i1(i32 %arg) { ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i1' diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll index ee82e10f9ebb6..aff1de0bfde61 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll @@ -673,26 +673,26 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> %src512, i8 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i8' @@ -706,26 +706,26 @@ define i32 @insert_i8(i32 %arg, i8 %val, <2 x i8> %src16, <4 x i8> %src32, <8 x ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> %src64, i8 %val, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> %src64, i8 %val, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> %src128, i8 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_0 = insertelement <16 x i8> %src128, i8 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> %src128, i8 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> %src128, i8 %val, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> %src256, i8 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_0 = insertelement <32 x i8> %src256, i8 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> %src256, i8 %val, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> %src256, i8 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> %src256, i8 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> %src256, i8 %val, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> %src256, i8 %val, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> %src512, i8 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_0 = insertelement <64 x i8> %src512, i8 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> %src512, i8 %val, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> %src512, i8 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> %src512, i8 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> %src512, i8 %val, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> %src512, i8 %val, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_32 = insertelement <64 x i8> %src512, i8 %val, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_48 = insertelement <64 x i8> %src512, i8 %val, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> %src512, i8 %val, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i8' @@ -977,26 +977,26 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i1' @@ -1010,26 +1010,26 @@ define i32 @insert_i1(i32 %arg, i1 %val, <2 x i1> %src2, <4 x i1> %src4, <8 x i1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> %src8, i1 %val, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> %src8, i1 %val, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> %src16, i1 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_0 = insertelement <16 x i1> %src16, i1 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> %src16, i1 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> %src16, i1 %val, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> %src32, i1 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_0 = insertelement <32 x i1> %src32, i1 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> %src32, i1 %val, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> %src32, i1 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> %src32, i1 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> %src32, i1 %val, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> %src32, i1 %val, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> %src64, i1 %val, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_0 = insertelement <64 x i1> %src64, i1 %val, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> %src64, i1 %val, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> %src64, i1 %val, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> %src64, i1 %val, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> %src64, i1 %val, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> %src64, i1 %val, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_32 = insertelement <64 x i1> %src64, i1 %val, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_48 = insertelement <64 x i1> %src64, i1 %val, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> %src64, i1 %val, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i1' diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll index 1d2e1f23b9b53..103b7be692025 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll @@ -674,25 +674,25 @@ define i32 @insert_i8(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i8' @@ -707,25 +707,25 @@ define i32 @insert_i8(i32 %arg) { ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i8' @@ -978,25 +978,25 @@ define i32 @insert_i1(i32 %arg) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i1' @@ -1011,25 +1011,25 @@ define i32 @insert_i1(i32 %arg) { ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i1' diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll index 056d9d1fba141..0c9f279c01bae 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll @@ -59,15 +59,12 @@ define <8 x i16> @add_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: @add_v8i16_u1234567( -; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = add <8 x i16> [[A]], [[SHIFT2]] -; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[A]], [[SHIFT3]] -; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> -; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[HADD1:%.*]] = add <8 x i16> [[TMP7]], [[TMP4]] -; SSE2-NEXT: [[HADD2:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP5]], <8 x i32> -; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD2]], <8 x i16> [[TMP6]], <8 x i32> +; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll index 572ec9efafe1a..ae05f6470e563 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll @@ -59,15 +59,12 @@ define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: @sub_v8i16_u1234567( -; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[A]], [[SHIFT2]] -; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]] -; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> -; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[HSUB1:%.*]] = sub <8 x i16> [[TMP7]], [[TMP4]] -; SSE2-NEXT: [[HSUB2:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP5]], <8 x i32> -; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB2]], <8 x i16> [[TMP6]], <8 x i32> +; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll index 8d37665224e7e..f18a72b0bf776 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll @@ -10,24 +10,16 @@ define <2 x i64> @load_00123456(ptr nocapture noundef readonly %data) { ; SSE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[DATA:%.*]], i64 1 ; SSE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 2 ; SSE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 3 -; SSE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 4 -; SSE-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 5 -; SSE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i64 6 ; SSE-NEXT: [[T0:%.*]] = load i16, ptr [[DATA]], align 2 ; SSE-NEXT: [[T1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2 ; SSE-NEXT: [[T2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2 -; SSE-NEXT: [[T3:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 -; SSE-NEXT: [[T4:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; SSE-NEXT: [[T5:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 -; SSE-NEXT: [[T6:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX3]], align 2 ; SSE-NEXT: [[VECINIT0_I_I:%.*]] = insertelement <8 x i16> undef, i16 [[T0]], i64 0 ; SSE-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i16> [[VECINIT0_I_I]], i16 [[T0]], i64 1 ; SSE-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I_I]], i16 [[T1]], i64 2 ; SSE-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I_I]], i16 [[T2]], i64 3 -; SSE-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I_I]], i16 [[T3]], i64 4 -; SSE-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I_I]], i16 [[T4]], i64 5 -; SSE-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I_I]], i16 [[T5]], i64 6 -; SSE-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I_I]], i16 [[T6]], i64 7 +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <8 x i32> +; SSE-NEXT: [[VECINIT7_I_I:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP2]], <8 x i32> ; SSE-NEXT: [[T7:%.*]] = bitcast <8 x i16> [[VECINIT7_I_I]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[T7]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll index 2b0855dda6dc2..6907e12158337 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll @@ -85,20 +85,12 @@ define i1 @icmp_samesign_xor_v4i32(<4 x i32> %a) { ; add is not canonical (should be xor), but that is ok. define i1 @icmp_add_v8i32(<8 x i32> %a) { -; SSE-LABEL: @icmp_add_v8i32( -; SSE-NEXT: [[E1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 7 -; SSE-NEXT: [[E2:%.*]] = extractelement <8 x i32> [[A]], i32 2 -; SSE-NEXT: [[CMP1:%.*]] = icmp eq i32 [[E1]], 42 -; SSE-NEXT: [[CMP2:%.*]] = icmp eq i32 [[E2]], -8 -; SSE-NEXT: [[R:%.*]] = add i1 [[CMP1]], [[CMP2]] -; SSE-NEXT: ret i1 [[R]] -; -; AVX-LABEL: @icmp_add_v8i32( -; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> -; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]] -; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 -; AVX-NEXT: ret i1 [[R]] +; CHECK-LABEL: @icmp_add_v8i32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 +; CHECK-NEXT: ret i1 [[R]] ; %e1 = extractelement <8 x i32> %a, i32 7 %e2 = extractelement <8 x i32> %a, i32 2