diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 746742e14d080..08ae79e600ce5 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -119,6 +119,7 @@ class VectorCombine { bool foldConcatOfBoolMasks(Instruction &I); bool foldPermuteOfBinops(Instruction &I); bool foldShuffleOfBinops(Instruction &I); + bool foldShuffleOfSelects(Instruction &I); bool foldShuffleOfCastops(Instruction &I); bool foldShuffleOfShuffles(Instruction &I); bool foldShuffleOfIntrinsics(Instruction &I); @@ -1899,6 +1900,76 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { return true; } +/// Try to convert, +/// (shuffle(select(c1,t1,f1)), (select(c2,t2,f2)), m) into +/// (select (shuffle c1,c2,m), (shuffle t1,t2,m), (shuffle f1,f2,m)) +bool VectorCombine::foldShuffleOfSelects(Instruction &I) { + ArrayRef Mask; + Value *C1, *T1, *F1, *C2, *T2, *F2; + if (!match(&I, m_Shuffle( + m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))), + m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))), + m_Mask(Mask)))) + return false; + + auto *DstVecTy = dyn_cast(I.getType()); + auto *C1VecTy = dyn_cast(C1->getType()); + auto *C2VecTy = dyn_cast(C2->getType()); + if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy) + return false; + + auto *SI0FOp = dyn_cast(I.getOperand(0)); + auto *SI1FOp = dyn_cast(I.getOperand(1)); + // SelectInsts must have the same FMF. + if (((SI0FOp == nullptr) != (SI1FOp == nullptr)) || + ((SI0FOp != nullptr) && + (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags()))) + return false; + + auto SK = TargetTransformInfo::SK_PermuteTwoSrc; + auto SelOp = Instruction::Select; + InstructionCost OldCost = TTI.getCmpSelInstrCost( + SelOp, T1->getType(), C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); + OldCost += TTI.getCmpSelInstrCost(SelOp, T2->getType(), C2VecTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + OldCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, + {I.getOperand(0), I.getOperand(1)}, &I); + + auto *C1C2VecTy = cast( + toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements())); + InstructionCost NewCost = + TTI.getShuffleCost(SK, C1C2VecTy, Mask, CostKind, 0, nullptr, {C1, C2}); + NewCost += + TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {T1, T2}); + NewCost += + TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {F1, F2}); + NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, DstVecTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I + << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost + << "\n"); + if (NewCost > OldCost) + return false; + + Value *ShuffleCmp = Builder.CreateShuffleVector(C1, C2, Mask); + Value *ShuffleTrue = Builder.CreateShuffleVector(T1, T2, Mask); + Value *ShuffleFalse = Builder.CreateShuffleVector(F1, F2, Mask); + Value *NewSel; + // We presuppose that the SelectInsts have the same FMF. + if (SI0FOp) + NewSel = Builder.CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse, + SI0FOp->getFastMathFlags()); + else + NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse); + + Worklist.pushValue(ShuffleCmp); + Worklist.pushValue(ShuffleTrue); + Worklist.pushValue(ShuffleFalse); + replaceValue(I, *NewSel); + return true; +} + /// Try to convert "shuffle (castop), (castop)" with a shared castop operand /// into "castop (shuffle)". bool VectorCombine::foldShuffleOfCastops(Instruction &I) { @@ -3352,6 +3423,7 @@ bool VectorCombine::run() { case Instruction::ShuffleVector: MadeChange |= foldPermuteOfBinops(I); MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldShuffleOfSelects(I); MadeChange |= foldShuffleOfCastops(I); MadeChange |= foldShuffleOfShuffles(I); MadeChange |= foldShuffleOfIntrinsics(I); diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll new file mode 100644 index 0000000000000..6653bf3375423 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll @@ -0,0 +1,581 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 + +define <4 x i16> @src_v2tov4_i16(<2 x i1> %a, <2 x i1> %b, <2 x i16> %x, <2 x i16> %y, <2 x i16> %z) { +; SSE-LABEL: define <4 x i16> @src_v2tov4_i16( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]] +; SSE-NEXT: ret <4 x i16> [[RES]] +; +; AVX2-LABEL: define <4 x i16> @src_v2tov4_i16( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[X]], <2 x i16> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[Z]], <2 x i16> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP2]], <4 x i16> [[TMP3]] +; AVX2-NEXT: ret <4 x i16> [[RES]] +; +; AVX512-LABEL: define <4 x i16> @src_v2tov4_i16( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i16> [[X]], <2 x i16> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i16> [[Y]], <2 x i16> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i16> [[SELECT_XZ]], <2 x i16> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x i16> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i16> %x, <2 x i16> %z + %select.yx = select <2 x i1> %b, <2 x i16> %y, <2 x i16> %x + %res = shufflevector <2 x i16> %select.xz, <2 x i16> %select.yx, <4 x i32> + ret <4 x i16> %res +} + +define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i16> %y, <4 x i16> %z) { +; SSE-LABEL: define <8 x i16> @src_v4tov8_i16( +; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> +; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; SSE-NEXT: ret <8 x i16> [[RES]] +; +; AVX2-LABEL: define <8 x i16> @src_v4tov8_i16( +; AVX2-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> [[Y]], <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[Z]], <4 x i16> [[X]], <8 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; AVX2-NEXT: ret <8 x i16> [[RES]] +; +; AVX512-LABEL: define <8 x i16> @src_v4tov8_i16( +; AVX512-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i16> [[X:%.*]], <4 x i16> [[Y:%.*]], <4 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i16> [[X]], <4 x i16> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i16> [[Y]], <4 x i16> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[SELECT_XZ]], <4 x i16> [[SELECT_YX]], <8 x i32> +; AVX512-NEXT: ret <8 x i16> [[RES]] +; + %select.xz = select <4 x i1> %a, <4 x i16> %x, <4 x i16> %z + %select.yx = select <4 x i1> %b, <4 x i16> %y, <4 x i16> %x + %res = shufflevector <4 x i16> %select.xz, <4 x i16> %select.yx, <8 x i32> + ret <8 x i16> %res +} + +define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { +; SSE-LABEL: define <8 x i16> @src_v8tov8_i16( +; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> +; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; SSE-NEXT: ret <8 x i16> [[RES]] +; +; AVX2-LABEL: define <8 x i16> @src_v8tov8_i16( +; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; AVX2-NEXT: ret <8 x i16> [[RES]] +; +; AVX512-LABEL: define <8 x i16> @src_v8tov8_i16( +; AVX512-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <8 x i32> +; AVX512-NEXT: ret <8 x i16> [[RES]] +; + %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z + %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x + %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <8 x i32> + ret <8 x i16> %res +} + +define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { +; CHECK-LABEL: define <16 x i16> @src_v8tov16_i16( +; CHECK-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %select.xz = select <8 x i1> %a, <8 x i16> %x, <8 x i16> %z + %select.yx = select <8 x i1> %b, <8 x i16> %y, <8 x i16> %x + %res = shufflevector <8 x i16> %select.xz, <8 x i16> %select.yx, <16 x i32> + ret <16 x i16> %res +} + +define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; SSE-LABEL: define <4 x i32> @src_v2tov4_i32( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; SSE-NEXT: ret <4 x i32> [[RES]] +; +; AVX2-LABEL: define <4 x i32> @src_v2tov4_i32( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[Z]], <2 x i32> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; AVX2-NEXT: ret <4 x i32> [[RES]] +; +; AVX512-LABEL: define <4 x i32> @src_v2tov4_i32( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <8 x i32> @src_v4tov8_i32( +; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %select.xz = select <4 x i1> %a, <4 x i32> %x, <4 x i32> %z + %select.yx = select <4 x i1> %b, <4 x i32> %y, <4 x i32> %x + %res = shufflevector <4 x i32> %select.xz, <4 x i32> %select.yx, <8 x i32> + ret <8 x i32> %res +} + +define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; SSE-LABEL: define <2 x i64> @src_v2tov2_i64( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> +; SSE-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] +; SSE-NEXT: ret <2 x i64> [[RES]] +; +; AVX2-LABEL: define <2 x i64> @src_v2tov2_i64( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <2 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] +; AVX2-NEXT: ret <2 x i64> [[RES]] +; +; AVX512-LABEL: define <2 x i64> @src_v2tov2_i64( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <2 x i32> +; AVX512-NEXT: ret <2 x i64> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z + %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x + %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <2 x i32> + ret <2 x i64> %res +} + +define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: define <4 x i64> @src_v2tov4_i64( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i64> %x, <2 x i64> %z + %select.yx = select <2 x i1> %b, <2 x i64> %y, <2 x i64> %x + %res = shufflevector <2 x i64> %select.xz, <2 x i64> %select.yx, <4 x i32> + ret <4 x i64> %res +} + +define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <8 x float> @src_v4tov8_float( +; CHECK-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %select.xz = select <4 x i1> %a, <4 x float> %x, <4 x float> %z + %select.yx = select <4 x i1> %b, <4 x float> %y, <4 x float> %x + %res = shufflevector <4 x float> %select.xz, <4 x float> %select.yx, <8 x i32> + ret <8 x float> %res +} + +define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) { +; SSE-LABEL: define <2 x double> @src_v2tov2_double( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> +; SSE-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] +; SSE-NEXT: ret <2 x double> [[RES]] +; +; AVX2-LABEL: define <2 x double> @src_v2tov2_double( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <2 x i32> +; AVX2-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] +; AVX2-NEXT: ret <2 x double> [[RES]] +; +; AVX512-LABEL: define <2 x double> @src_v2tov2_double( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <2 x i32> +; AVX512-NEXT: ret <2 x double> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z + %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x + %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <2 x i32> + ret <2 x double> %res +} + +define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) { +; CHECK-LABEL: define <4 x double> @src_v2tov4_double( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x double> %x, <2 x double> %z + %select.yx = select <2 x i1> %b, <2 x double> %y, <2 x double> %x + %res = shufflevector <2 x double> %select.xz, <2 x double> %select.yx, <4 x i32> + ret <4 x double> %res +} + +; FMF Flags +define <4 x float> @src_v2tov4_float_nnan(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select nnan <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nnan <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_nnan_ninf(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select nnan ninf <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan ninf <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nnan ninf <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan ninf <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_nsz( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nsz( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nsz( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_nnan_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select nnan nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_nsz( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_ninf_nsz( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_nnan_ninf_nsz(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; SSE-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz( +; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; SSE-NEXT: [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; SSE-NEXT: ret <4 x float> [[RES]] +; +; AVX2-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz( +; AVX2-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y]], <4 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[Z]], <2 x float> [[X]], <4 x i32> +; AVX2-NEXT: [[RES:%.*]] = select nnan ninf nsz <4 x i1> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]] +; AVX2-NEXT: ret <4 x float> [[RES]] +; +; AVX512-LABEL: define <4 x float> @src_v2tov4_float_nnan_ninf_nsz( +; AVX512-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select nnan ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; AVX512-NEXT: [[SELECT_YX:%.*]] = select nnan ninf nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; AVX512-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; AVX512-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nnan ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan ninf nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +; Negative - different FPM +define <4 x float> @src_v2tov4_float_nonfpm_with_fpm1(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nonfpm_with_fpm1( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_nonfpm_with_fpm2(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: define <4 x float> @src_v2tov4_float_nonfpm_with_fpm2( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select nnan nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select nnan nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @src_v2tov4_float_diff_fpm(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: define <4 x float> @src_v2tov4_float_diff_fpm( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select ninf nsz <2 x i1> [[A]], <2 x float> [[X]], <2 x float> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select nnan nsz <2 x i1> [[B]], <2 x float> [[Y]], <2 x float> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x float> [[SELECT_XZ]], <2 x float> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %select.xz = select ninf nsz <2 x i1> %a, <2 x float> %x, <2 x float> %z + %select.yx = select nnan nsz <2 x i1> %b, <2 x float> %y, <2 x float> %x + %res = shufflevector <2 x float> %select.xz, <2 x float> %select.yx, <4 x i32> + ret <4 x float> %res +} + +; Negative - Vector order +define <4 x i32> @src_v2tov4_i32_backward(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_backward( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <4 x i32> @src_v2tov4_i32_change_in_same_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_in_same_vector( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_change_to_other_vector( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +}