From eb61e3137286f6761b5b455bef99cc4bb57fccb1 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Fri, 7 Mar 2025 19:22:33 +0900 Subject: [PATCH 1/2] [VectorCombine] Fix invalid shuffle cost argument of foldShuffleOfSelects In the previous code, it specified the destination vector as the getShuffleCost argument. Because the shuffle mask specifies the indices of the two vectors specified as elements, the maximum value is twice the size of the source vector. This causes a problem if the destination vector is smaller than the source vector and specify an index in the mask that exceeds the size of the destination vector. Fix the problem by correcting the previous code, which was using wrong argument in the Cost calculation. Fixed https://github.com/llvm/llvm-project/issues/130250 --- .../Transforms/Vectorize/VectorCombine.cpp | 21 +- .../PhaseOrdering/X86/blendv-select.ll | 342 +++++++++++++----- .../AArch64/shuffletoidentity.ll | 6 +- .../VectorCombine/X86/shuffle-of-selects.ll | 255 ++++++++----- 4 files changed, 435 insertions(+), 189 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 4d4a1a6e04d32..776a733d86afc 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2037,7 +2037,6 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) { m_Mask(Mask)))) return false; - auto *DstVecTy = dyn_cast(I.getType()); auto *C1VecTy = dyn_cast(C1->getType()); auto *C2VecTy = dyn_cast(C2->getType()); if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy) @@ -2051,24 +2050,26 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) { (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags()))) return false; + auto *SrcVecTy = dyn_cast(T1->getType()); + auto *DstVecTy = dyn_cast(I.getType()); auto SK = TargetTransformInfo::SK_PermuteTwoSrc; auto SelOp = Instruction::Select; InstructionCost OldCost = TTI.getCmpSelInstrCost( - SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); - OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy, + SelOp, DstVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); + OldCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C2VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); - OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, + OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {I.getOperand(0), I.getOperand(1)}, &I); - auto *C1C2VecTy = cast( - toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements())); InstructionCost NewCost = - TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2}); + TTI.getShuffleCost(SK, C1VecTy, Mask, CostKind, 0, nullptr, {C1, C2}); NewCost += - TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2}); + TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {T1, T2}); NewCost += - TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2}); - NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy, + TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {F1, F2}); + auto *C1C2ShuffledVecTy = cast( + toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements())); + NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll index c2ed7b9c84523..84edc6e90a91d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s -; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s -; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 ; ; PR58895 - replace shuffled _mm_blendv_epi8+icmp with select+icmp @@ -12,10 +12,20 @@ ; define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { -; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]] -; CHECK-NEXT: ret <4 x double> [[DOTV]] +; SSE-LABEL: @x86_pblendvb_v4f64_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]] +; SSE-NEXT: ret <4 x double> [[DOTV]] +; +; AVX2-LABEL: @x86_pblendvb_v4f64_v2f64( +; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]] +; AVX2-NEXT: ret <4 x double> [[DOTV]] +; +; AVX512-LABEL: @x86_pblendvb_v4f64_v2f64( +; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]] +; AVX512-NEXT: ret <4 x double> [[DOTV]] ; %a.bc = bitcast <4 x double> %a to <32 x i8> %b.bc = bitcast <4 x double> %b to <32 x i8> @@ -36,10 +46,20 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, } define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { -; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]] -; CHECK-NEXT: ret <8 x float> [[DOTV]] +; SSE-LABEL: @x86_pblendvb_v8f32_v4f32( +; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]] +; SSE-NEXT: ret <8 x float> [[DOTV]] +; +; AVX2-LABEL: @x86_pblendvb_v8f32_v4f32( +; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]] +; AVX2-NEXT: ret <8 x float> [[DOTV]] +; +; AVX512-LABEL: @x86_pblendvb_v8f32_v4f32( +; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]] +; AVX512-NEXT: ret <8 x float> [[DOTV]] ; %a.bc = bitcast <8 x float> %a to <32 x i8> %b.bc = bitcast <8 x float> %b to <32 x i8> @@ -60,10 +80,20 @@ define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 } define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v4i64_v2i64( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; SSE-LABEL: @x86_pblendvb_v4i64_v2i64( +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]] +; SSE-NEXT: ret <4 x i64> [[TMP2]] +; +; AVX2-LABEL: @x86_pblendvb_v4i64_v2i64( +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]] +; AVX2-NEXT: ret <4 x i64> [[TMP2]] +; +; AVX512-LABEL: @x86_pblendvb_v4i64_v2i64( +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]] +; AVX512-NEXT: ret <4 x i64> [[TMP1]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> %b.bc = bitcast <4 x i64> %b to <32 x i8> @@ -84,15 +114,35 @@ define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> } define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v8i32_v4i32( -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v8i32_v4i32( +; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; SSE-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64> +; SSE-NEXT: ret <4 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32( +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; AVX2-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64> +; AVX2-NEXT: ret <4 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v8i32_v4i32( +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64> +; AVX512-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> %b.bc = bitcast <4 x i64> %b to <32 x i8> @@ -115,15 +165,35 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> } define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v16i16_v8i16( -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v16i16_v8i16( +; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> +; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] +; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64> +; SSE-NEXT: ret <4 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16( +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> +; AVX2-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64> +; AVX2-NEXT: ret <4 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v16i16_v8i16( +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; AVX512-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> +; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64> +; AVX512-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> %b.bc = bitcast <4 x i64> %b to <32 x i8> @@ -146,15 +216,35 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64 } define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v32i8_v16i8( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v32i8_v16i8( +; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> +; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8> +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]] +; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> +; SSE-NEXT: ret <4 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v32i8_v16i8( +; AVX2-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8> +; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> +; AVX2-NEXT: ret <4 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v32i8_v16i8( +; AVX512-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; AVX512-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64> +; AVX512-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> %b.bc = bitcast <4 x i64> %b to <32 x i8> @@ -180,10 +270,20 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> ; define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { -; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] -; CHECK-NEXT: ret <8 x double> [[DOTV]] +; SSE-LABEL: @x86_pblendvb_v8f64_v4f64( +; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] +; SSE-NEXT: ret <8 x double> [[DOTV]] +; +; AVX2-LABEL: @x86_pblendvb_v8f64_v4f64( +; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] +; AVX2-NEXT: ret <8 x double> [[DOTV]] +; +; AVX512-LABEL: @x86_pblendvb_v8f64_v4f64( +; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] +; AVX512-NEXT: ret <8 x double> [[DOTV]] ; %a.bc = bitcast <8 x double> %a to <64 x i8> %b.bc = bitcast <8 x double> %b to <64 x i8> @@ -204,10 +304,20 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, } define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { -; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] -; CHECK-NEXT: ret <16 x float> [[DOTV]] +; SSE-LABEL: @x86_pblendvb_v16f32_v8f32( +; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] +; SSE-NEXT: ret <16 x float> [[DOTV]] +; +; AVX2-LABEL: @x86_pblendvb_v16f32_v8f32( +; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] +; AVX2-NEXT: ret <16 x float> [[DOTV]] +; +; AVX512-LABEL: @x86_pblendvb_v16f32_v8f32( +; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] +; AVX512-NEXT: ret <16 x float> [[DOTV]] ; %a.bc = bitcast <16 x float> %a to <64 x i8> %b.bc = bitcast <16 x float> %b to <64 x i8> @@ -228,10 +338,20 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, } define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; SSE-LABEL: @x86_pblendvb_v8i64_v4i64( +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] +; SSE-NEXT: ret <8 x i64> [[TMP2]] +; +; AVX2-LABEL: @x86_pblendvb_v8i64_v4i64( +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] +; AVX2-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] +; AVX2-NEXT: ret <8 x i64> [[TMP2]] +; +; AVX512-LABEL: @x86_pblendvb_v8i64_v4i64( +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] +; AVX512-NEXT: ret <8 x i64> [[TMP1]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -252,15 +372,35 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> } define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32( -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64> -; CHECK-NEXT: ret <8 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v16i32_v8i32( +; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] +; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64> +; SSE-NEXT: ret <8 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v16i32_v8i32( +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64> +; AVX2-NEXT: ret <8 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v16i32_v8i32( +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64> +; AVX512-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -283,15 +423,35 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64 } define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v32i16_v16i16( -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64> -; CHECK-NEXT: ret <8 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v32i16_v16i16( +; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> +; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> +; SSE-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]] +; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64> +; SSE-NEXT: ret <8 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16( +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> +; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> +; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> +; AVX2-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64> +; AVX2-NEXT: ret <8 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16( +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> +; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> +; AVX512-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64> +; AVX512-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -314,15 +474,35 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6 } define <8 x i64> @x86_pblendvb_v64i8_v32i8(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; CHECK-LABEL: @x86_pblendvb_v64i8_v32i8( -; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> -; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> -; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8> -; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]] -; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> -; CHECK-NEXT: ret <8 x i64> [[RES]] +; SSE-LABEL: @x86_pblendvb_v64i8_v32i8( +; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> +; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> +; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8> +; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8> +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]] +; SSE-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> +; SSE-NEXT: ret <8 x i64> [[RES]] +; +; AVX2-LABEL: @x86_pblendvb_v64i8_v32i8( +; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> +; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> +; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8> +; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8> +; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> +; AVX2-NEXT: ret <8 x i64> [[RES]] +; +; AVX512-LABEL: @x86_pblendvb_v64i8_v32i8( +; AVX512-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8> +; AVX512-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8> +; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8> +; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8> +; AVX512-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]] +; AVX512-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]] +; AVX512-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64> +; AVX512-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 09875c5e0af40..1c128c8f56a03 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -997,8 +997,10 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> ; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]] -; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> -; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> +; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> +; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32> ; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll index 6653bf3375423..0bc87a9b03821 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll @@ -4,28 +4,12 @@ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) { -; SSE-LABEL: define <4 x i16> @src_v2tov4_i16( -; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> -; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]] -; SSE-NEXT: ret <4 x i16> [[RES]] -; -; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16( -; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> -; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]] -; AVX2-NEXT: ret <4 x i16> [[RES]] -; -; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16( -; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { -; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]] -; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]] -; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> -; AVX512-NEXT: ret <4 x i16> [[RES]] +; CHECK-LABEL: define <4 x i16> @src_v2tov4_i16( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i16> [[RES]] ; %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x @@ -34,28 +18,12 @@ define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i1 } define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) { -; SSE-LABEL: define <8 x i16> @src_v4tov8_i16( -; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> -; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] -; SSE-NEXT: ret <8 x i16> [[RES]] -; -; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16( -; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> -; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] -; AVX2-NEXT: ret <8 x i16> [[RES]] -; -; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16( -; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { -; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]] -; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]] -; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> -; AVX512-NEXT: ret <8 x i16> [[RES]] +; CHECK-LABEL: define <8 x i16> @src_v4tov8_i16( +; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[RES]] ; %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x @@ -94,12 +62,28 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1 } define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { -; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16( -; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] -; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]] -; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[RES]] +; SSE-LABEL: define <16 x i16> @src_v8tov16_i16( +; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> +; SSE-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] +; SSE-NEXT: ret <16 x i16> [[RES]] +; +; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16( +; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] +; AVX2-NEXT: ret <16 x i16> [[RES]] +; +; AVX512-LABEL: define <16 x i16> @src_v8tov16_i16( +; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> +; AVX512-NEXT: ret <16 x i16> [[RES]] ; %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x @@ -108,28 +92,12 @@ define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x } define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { -; SSE-LABEL: define <4 x i32> @src_v2tov4_i32( -; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> -; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] -; SSE-NEXT: ret <4 x i32> [[RES]] -; -; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32( -; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> -; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] -; AVX2-NEXT: ret <4 x i32> [[RES]] -; -; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32( -; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { -; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] -; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] -; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> -; AVX512-NEXT: ret <4 x i32> [[RES]] +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] ; %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x @@ -138,12 +106,28 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3 } define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32( -; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]] -; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]] -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[RES]] +; SSE-LABEL: define <8 x i32> @src_v4tov8_i32( +; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> +; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; SSE-NEXT: ret <8 x i32> [[RES]] +; +; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32( +; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; AVX2-NEXT: ret <8 x i32> [[RES]] +; +; AVX512-LABEL: define <8 x i32> @src_v4tov8_i32( +; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> +; AVX512-NEXT: ret <8 x i32> [[RES]] ; %select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z %select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x @@ -182,12 +166,28 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6 } define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { -; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64( -; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]] -; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]] -; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[RES]] +; SSE-LABEL: define <4 x i64> @src_v2tov4_i64( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] +; SSE-NEXT: ret <4 x i64> [[RES]] +; +; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] +; AVX2-NEXT: ret <4 x i64> [[RES]] +; +; AVX512-LABEL: define <4 x i64> @src_v2tov4_i64( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x i64> [[RES]] ; %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x @@ -226,12 +226,28 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, < } define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: define <8 x float> @src_v4tov8_float( -; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]] -; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]] -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[RES]] +; SSE-LABEL: define <8 x float> @src_v4tov8_float( +; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> +; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]] +; SSE-NEXT: ret <8 x float> [[RES]] +; +; AVX2-LABEL: define <8 x float> @src_v4tov8_float( +; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]] +; AVX2-NEXT: ret <8 x float> [[RES]] +; +; AVX512-LABEL: define <8 x float> @src_v4tov8_float( +; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> +; AVX512-NEXT: ret <8 x float> [[RES]] ; %select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z %select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x @@ -270,12 +286,28 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x } define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) { -; CHECK-LABEL: define <4 x double> @src_v2tov4_double( -; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]] -; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]] -; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> -; CHECK-NEXT: ret <4 x double> [[RES]] +; SSE-LABEL: define <4 x double> @src_v2tov4_double( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]] +; SSE-NEXT: ret <4 x double> [[RES]] +; +; AVX2-LABEL: define <4 x double> @src_v2tov4_double( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]] +; AVX2-NEXT: ret <4 x double> [[RES]] +; +; AVX512-LABEL: define <4 x double> @src_v2tov4_double( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x double> [[RES]] ; %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x @@ -283,6 +315,37 @@ define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x ret <4 x double> %res } +; ISSUE - https://github.com/llvm/llvm-project/issues/130250 +; There should be no issues when the mask elements are in the following range +; DestVectorSize * 2 < MaskEls < SrcVectorSize * 2 +define <2 x float> @test_mask0(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <2 x float> @test_mask0( +; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]] +; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> +; CHECK-NEXT: ret <2 x float> [[RES]] +; + %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z + %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> + ret <2 x float> %res +} + +define <2 x float> @test_mask1(<4 x i1> %c, <4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <2 x float> @test_mask1( +; CHECK-SAME: <4 x i1> [[C:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XY:%.*]] = select <4 x i1> [[C]], <4 x float> [[X]], <4 x float> [[Y]] +; CHECK-NEXT: [[SELECT_YZ:%.*]] = select <4 x i1> [[C]], <4 x float> [[Y]], <4 x float> [[Z]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XY]], <4 x float> [[SELECT_YZ]], <2 x i32> +; CHECK-NEXT: ret <2 x float> [[RES]] +; + %select.xy = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + %select.yz = select <4 x i1> %c, <4 x float> %y, <4 x float> %z + %res = shufflevector <4 x float> %select.xy, <4 x float> %select.yz, <2 x i32> + ret <2 x float> %res +} + ; FMF Flags define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { ; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan( From 602706350d7296f713ef107285734c4d0a722b96 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Sat, 8 Mar 2025 00:37:44 +0900 Subject: [PATCH 2/2] Fix invalid argument in getCmpSelInstrCost The target vector for SelectInst is SrcVecTy. It was a mistake to specify DstVecTy as an argument before. it is now fixed. --- .../Transforms/Vectorize/VectorCombine.cpp | 4 +- .../PhaseOrdering/X86/blendv-select.ll | 116 +++++------------- .../VectorCombine/X86/shuffle-of-selects.ll | 35 +++--- 3 files changed, 50 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 776a733d86afc..019d79567b4ae 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2055,8 +2055,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) { auto SK = TargetTransformInfo::SK_PermuteTwoSrc; auto SelOp = Instruction::Select; InstructionCost OldCost = TTI.getCmpSelInstrCost( - SelOp, DstVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); - OldCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C2VecTy, + SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); + OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr, {I.getOperand(0), I.getOperand(1)}, &I); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll index 84edc6e90a91d..444e256f9854b 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll @@ -117,11 +117,11 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> ; SSE-LABEL: @x86_pblendvb_v8i32_v4i32( ; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> ; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> -; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> ; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> -; SSE-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] -; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64> +; SSE-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]] +; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[RES]] ; ; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32( @@ -168,11 +168,11 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64 ; SSE-LABEL: @x86_pblendvb_v16i16_v8i16( ; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> ; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> -; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] +; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> ; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> -; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> -; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] -; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64> +; SSE-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]] +; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64> ; SSE-NEXT: ret <4 x i64> [[RES]] ; ; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16( @@ -270,20 +270,10 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> ; define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { -; SSE-LABEL: @x86_pblendvb_v8f64_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] -; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] -; SSE-NEXT: ret <8 x double> [[DOTV]] -; -; AVX2-LABEL: @x86_pblendvb_v8f64_v4f64( -; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] -; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] -; AVX2-NEXT: ret <8 x double> [[DOTV]] -; -; AVX512-LABEL: @x86_pblendvb_v8f64_v4f64( -; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] -; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] -; AVX512-NEXT: ret <8 x double> [[DOTV]] +; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]] +; CHECK-NEXT: ret <8 x double> [[DOTV]] ; %a.bc = bitcast <8 x double> %a to <64 x i8> %b.bc = bitcast <8 x double> %b to <64 x i8> @@ -304,20 +294,10 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, } define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { -; SSE-LABEL: @x86_pblendvb_v16f32_v8f32( -; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] -; SSE-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] -; SSE-NEXT: ret <16 x float> [[DOTV]] -; -; AVX2-LABEL: @x86_pblendvb_v16f32_v8f32( -; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] -; AVX2-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] -; AVX2-NEXT: ret <16 x float> [[DOTV]] -; -; AVX512-LABEL: @x86_pblendvb_v16f32_v8f32( -; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] -; AVX512-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] -; AVX512-NEXT: ret <16 x float> [[DOTV]] +; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]] +; CHECK-NEXT: ret <16 x float> [[DOTV]] ; %a.bc = bitcast <16 x float> %a to <64 x i8> %b.bc = bitcast <16 x float> %b to <64 x i8> @@ -338,20 +318,10 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, } define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; SSE-LABEL: @x86_pblendvb_v8i64_v4i64( -; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] -; SSE-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] -; SSE-NEXT: ret <8 x i64> [[TMP2]] -; -; AVX2-LABEL: @x86_pblendvb_v8i64_v4i64( -; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] -; AVX2-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] -; AVX2-NEXT: ret <8 x i64> [[TMP2]] -; -; AVX512-LABEL: @x86_pblendvb_v8i64_v4i64( -; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] -; AVX512-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] -; AVX512-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -372,35 +342,15 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> } define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { -; SSE-LABEL: @x86_pblendvb_v16i32_v8i32( -; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> -; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> -; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> -; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> -; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] -; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64> -; SSE-NEXT: ret <8 x i64> [[RES]] -; -; AVX2-LABEL: @x86_pblendvb_v16i32_v8i32( -; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> -; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> -; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> -; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] -; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64> -; AVX2-NEXT: ret <8 x i64> [[RES]] -; -; AVX512-LABEL: @x86_pblendvb_v16i32_v8i32( -; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> -; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> -; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]] -; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64> -; AVX512-NEXT: ret <8 x i64> [[RES]] +; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32( +; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64> +; CHECK-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> %b.bc = bitcast <8 x i64> %b to <64 x i8> @@ -436,11 +386,11 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6 ; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16( ; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> ; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> -; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] +; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> ; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> -; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> -; AVX2-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]] -; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64> +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]] +; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64> ; AVX2-NEXT: ret <8 x i64> [[RES]] ; ; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16( diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll index 0bc87a9b03821..2588f9116f322 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll @@ -64,10 +64,9 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1 define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { ; SSE-LABEL: define <16 x i16> @src_v8tov16_i16( ; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> -; SSE-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]] +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] +; SSE-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> ; SSE-NEXT: ret <16 x i16> [[RES]] ; ; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16( @@ -108,10 +107,9 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3 define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; SSE-LABEL: define <8 x i32> @src_v4tov8_i32( ; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> -; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]] +; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[RES]] ; ; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32( @@ -168,10 +166,9 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6 define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { ; SSE-LABEL: define <4 x i64> @src_v2tov4_i64( ; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> -; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]] +; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> ; SSE-NEXT: ret <4 x i64> [[RES]] ; ; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64( @@ -228,10 +225,9 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, < define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) { ; SSE-LABEL: define <8 x float> @src_v4tov8_float( ; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> -; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]] +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]] +; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> ; SSE-NEXT: ret <8 x float> [[RES]] ; ; AVX2-LABEL: define <8 x float> @src_v4tov8_float( @@ -288,10 +284,9 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) { ; SSE-LABEL: define <4 x double> @src_v2tov4_double( ; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> -; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]] +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]] +; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> ; SSE-NEXT: ret <4 x double> [[RES]] ; ; AVX2-LABEL: define <4 x double> @src_v2tov4_double(