diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index eda71dde620b6..b0e23c297e204 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -391,8 +391,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set DAG combine for 'LSX' feature. - if (Subtarget.hasExtLSX()) + if (Subtarget.hasExtLSX()) { setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::BITCAST); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -4329,6 +4331,85 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) + return SDValue(); + + unsigned Opc = ISD::DELETED_NODE; + // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible + if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) { + bool UseLASX; + EVT CmpVT = Src.getOperand(0).getValueType(); + EVT EltVT = CmpVT.getVectorElementType(); + + if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= 128) + UseLASX = false; + else if (Subtarget.has32S() && Subtarget.hasExtLASX() && + CmpVT.getSizeInBits() <= 256) + UseLASX = true; + else + return SDValue(); + + SDValue SrcN1 = Src.getOperand(1); + switch (cast(Src.getOperand(2))->get()) { + default: + break; + case ISD::SETEQ: + // x == 0 => not (vmsknez.b x) + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ; + break; + case ISD::SETGT: + // x > -1 => vmskgez.b x + if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; + break; + case ISD::SETGE: + // x >= 0 => vmskgez.b x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; + break; + case ISD::SETLT: + // x < 0 => vmskltz.{b,h,w,d} x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && + (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || + EltVT == MVT::i64)) + Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; + break; + case ISD::SETLE: + // x <= -1 => vmskltz.{b,h,w,d} x + if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && + (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || + EltVT == MVT::i64)) + Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; + break; + case ISD::SETNE: + // x != 0 => vmsknez.b x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ; + break; + } + } + + if (Opc == ISD::DELETED_NODE) + return SDValue(); + + SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0)); + EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); + V = DAG.getZExtOrTrunc(V, DL, T); + return DAG.getBitcast(VT, V); +} + static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { @@ -5373,6 +5454,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSETCCCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); + case ISD::BITCAST: + return performBITCASTCombine(N, DAG, DCI, Subtarget); case LoongArchISD::BITREV_W: return performBITREV_WCombine(N, DAG, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: @@ -5663,6 +5746,120 @@ static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI, return BB; } +static MachineBasicBlock * +emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::LSX128RegClass; + const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + DebugLoc DL = MI.getDebugLoc(); + unsigned EleBits = 8; + unsigned NotOpc = 0; + unsigned MskOpc; + + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoVMSKLTZ_B: + MskOpc = LoongArch::VMSKLTZ_B; + break; + case LoongArch::PseudoVMSKLTZ_H: + MskOpc = LoongArch::VMSKLTZ_H; + EleBits = 16; + break; + case LoongArch::PseudoVMSKLTZ_W: + MskOpc = LoongArch::VMSKLTZ_W; + EleBits = 32; + break; + case LoongArch::PseudoVMSKLTZ_D: + MskOpc = LoongArch::VMSKLTZ_D; + EleBits = 64; + break; + case LoongArch::PseudoVMSKGEZ_B: + MskOpc = LoongArch::VMSKGEZ_B; + break; + case LoongArch::PseudoVMSKEQZ_B: + MskOpc = LoongArch::VMSKNZ_B; + NotOpc = LoongArch::VNOR_V; + break; + case LoongArch::PseudoVMSKNEZ_B: + MskOpc = LoongArch::VMSKNZ_B; + break; + case LoongArch::PseudoXVMSKLTZ_B: + MskOpc = LoongArch::XVMSKLTZ_B; + RC = &LoongArch::LASX256RegClass; + break; + case LoongArch::PseudoXVMSKLTZ_H: + MskOpc = LoongArch::XVMSKLTZ_H; + RC = &LoongArch::LASX256RegClass; + EleBits = 16; + break; + case LoongArch::PseudoXVMSKLTZ_W: + MskOpc = LoongArch::XVMSKLTZ_W; + RC = &LoongArch::LASX256RegClass; + EleBits = 32; + break; + case LoongArch::PseudoXVMSKLTZ_D: + MskOpc = LoongArch::XVMSKLTZ_D; + RC = &LoongArch::LASX256RegClass; + EleBits = 64; + break; + case LoongArch::PseudoXVMSKGEZ_B: + MskOpc = LoongArch::XVMSKGEZ_B; + RC = &LoongArch::LASX256RegClass; + break; + case LoongArch::PseudoXVMSKEQZ_B: + MskOpc = LoongArch::XVMSKNZ_B; + NotOpc = LoongArch::XVNOR_V; + RC = &LoongArch::LASX256RegClass; + break; + case LoongArch::PseudoXVMSKNEZ_B: + MskOpc = LoongArch::XVMSKNZ_B; + RC = &LoongArch::LASX256RegClass; + break; + } + + Register Msk = MRI.createVirtualRegister(RC); + if (NotOpc) { + Register Tmp = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src); + BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk) + .addReg(Tmp, RegState::Kill) + .addReg(Tmp, RegState::Kill); + } else { + BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src); + } + + if (TRI->getRegSizeInBits(*RC) > 128) { + Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo) + .addReg(Msk) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi) + .addReg(Msk, RegState::Kill) + .addImm(4); + BuildMI(*BB, MI, DL, + TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D + : LoongArch::BSTRINS_W), + Dst) + .addReg(Lo, RegState::Kill) + .addReg(Hi, RegState::Kill) + .addImm(256 / EleBits - 1) + .addImm(128 / EleBits); + } else { + BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst) + .addReg(Msk, RegState::Kill) + .addImm(0); + } + + MI.eraseFromParent(); + return BB; +} + static bool isSelectPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { default: @@ -5869,6 +6066,21 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( return emitPseudoXVINSGR2VR(MI, BB, Subtarget); case LoongArch::PseudoCTPOP: return emitPseudoCTPOP(MI, BB, Subtarget); + case LoongArch::PseudoVMSKLTZ_B: + case LoongArch::PseudoVMSKLTZ_H: + case LoongArch::PseudoVMSKLTZ_W: + case LoongArch::PseudoVMSKLTZ_D: + case LoongArch::PseudoVMSKGEZ_B: + case LoongArch::PseudoVMSKEQZ_B: + case LoongArch::PseudoVMSKNEZ_B: + case LoongArch::PseudoXVMSKLTZ_B: + case LoongArch::PseudoXVMSKLTZ_H: + case LoongArch::PseudoXVMSKLTZ_W: + case LoongArch::PseudoXVMSKLTZ_D: + case LoongArch::PseudoXVMSKGEZ_B: + case LoongArch::PseudoXVMSKEQZ_B: + case LoongArch::PseudoXVMSKNEZ_B: + return emitPseudoVMSKCOND(MI, BB, Subtarget); case TargetOpcode::STATEPOINT: // STATEPOINT is a pseudo instruction which has no implicit defs/uses // while bl call instruction (where statepoint will be lowered at the @@ -5990,6 +6202,14 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VBSLL) NODE_NAME_CASE(VBSRL) NODE_NAME_CASE(VLDREPL) + NODE_NAME_CASE(VMSKLTZ) + NODE_NAME_CASE(VMSKGEZ) + NODE_NAME_CASE(VMSKEQZ) + NODE_NAME_CASE(VMSKNEZ) + NODE_NAME_CASE(XVMSKLTZ) + NODE_NAME_CASE(XVMSKGEZ) + NODE_NAME_CASE(XVMSKEQZ) + NODE_NAME_CASE(XVMSKNEZ) } #undef NODE_NAME_CASE return nullptr; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8c00ec75db94b..4b6d3272db2c9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -161,7 +161,17 @@ enum NodeType : unsigned { VBSRL, // Scalar load broadcast to vector - VLDREPL + VLDREPL, + + // Vector mask set by condition + VMSKLTZ, + VMSKGEZ, + VMSKEQZ, + VMSKNEZ, + XVMSKLTZ, + XVMSKGEZ, + XVMSKEQZ, + XVMSKNEZ, // Intrinsic operations end ============================================= }; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 59dec76ef1c2d..ff7b0f2ae3f25 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -12,6 +12,10 @@ // Target nodes. def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>; +def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>; +def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>; +def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>; +def loongarch_xvmsknez: SDNode<"LoongArchISD::XVMSKNEZ", SDT_LoongArchVMSKCOND>; def lasxsplati8 : PatFrag<(ops node:$e0), @@ -1086,6 +1090,16 @@ def PseudoXVINSGR2VR_H : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; } // usesCustomInserter = 1, Constraints = "$xd = $dst" +let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def PseudoXVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKLTZ_H : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKLTZ_W : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKLTZ_D : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKGEZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKEQZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +def PseudoXVMSKNEZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>; +} // usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 + } // Predicates = [HasExtLASX] multiclass PatXr { @@ -1856,6 +1870,15 @@ def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)), defm : PatXrXr; defm : PatXrXrU; +// Vector mask set by condition +def : Pat<(loongarch_xvmskltz (v32i8 LASX256:$vj)), (PseudoXVMSKLTZ_B LASX256:$vj)>; +def : Pat<(loongarch_xvmskltz (v16i16 LASX256:$vj)), (PseudoXVMSKLTZ_H LASX256:$vj)>; +def : Pat<(loongarch_xvmskltz (v8i32 LASX256:$vj)), (PseudoXVMSKLTZ_W LASX256:$vj)>; +def : Pat<(loongarch_xvmskltz (v4i64 LASX256:$vj)), (PseudoXVMSKLTZ_D LASX256:$vj)>; +def : Pat<(loongarch_xvmskgez (v32i8 LASX256:$vj)), (PseudoXVMSKGEZ_B LASX256:$vj)>; +def : Pat<(loongarch_xvmskeqz (v32i8 LASX256:$vj)), (PseudoXVMSKEQZ_B LASX256:$vj)>; +def : Pat<(loongarch_xvmsknez (v32i8 LASX256:$vj)), (PseudoXVMSKNEZ_B LASX256:$vj)>; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index b2b3b65155265..d73d78083ddcd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -31,6 +31,7 @@ def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, S def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>; +def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; // Target nodes. def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; @@ -74,6 +75,11 @@ def loongarch_vldrepl : SDNode<"LoongArchISD::VLDREPL", SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def loongarch_vmskltz: SDNode<"LoongArchISD::VMSKLTZ", SDT_LoongArchVMSKCOND>; +def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>; +def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>; +def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>; + def immZExt1 : ImmLeaf(Imm);}]>; def immZExt2 : ImmLeaf(Imm);}]>; def immZExt3 : ImmLeaf(Imm);}]>; @@ -1266,6 +1272,16 @@ let usesCustomInserter = 1 in def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj), [(set GPR:$rd, (ctpop GPR:$rj))]>; +let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def PseudoVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKLTZ_H : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKLTZ_W : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKLTZ_D : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKGEZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKEQZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +def PseudoVMSKNEZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; +} // usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 + } // Predicates = [HasExtLSX] multiclass PatVr { @@ -2050,6 +2066,15 @@ def : Pat<(f64 f64imm_vldi:$in), defm : PatVrVr; defm : PatVrVrU; +// Vector mask set by condition +def : Pat<(loongarch_vmskltz (v16i8 LSX128:$vj)), (PseudoVMSKLTZ_B LSX128:$vj)>; +def : Pat<(loongarch_vmskltz (v8i16 LSX128:$vj)), (PseudoVMSKLTZ_H LSX128:$vj)>; +def : Pat<(loongarch_vmskltz (v4i32 LSX128:$vj)), (PseudoVMSKLTZ_W LSX128:$vj)>; +def : Pat<(loongarch_vmskltz (v2i64 LSX128:$vj)), (PseudoVMSKLTZ_D LSX128:$vj)>; +def : Pat<(loongarch_vmskgez (v16i8 LSX128:$vj)), (PseudoVMSKGEZ_B LSX128:$vj)>; +def : Pat<(loongarch_vmskeqz (v16i8 LSX128:$vj)), (PseudoVMSKEQZ_B LSX128:$vj)>; +def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>; + } // Predicates = [HasExtLSX] /// Intrinsic pattern diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll index 82e2daee60f82..fb3937c476b03 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll @@ -1,140 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s | FileCheck %s define i32 @xmsk_eq_allzeros_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_eq_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvseqi.b $xr0, $xr0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmsknz.b $xr0, $xr0 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp eq <32 x i8> %a, splat (i8 0) @@ -145,138 +19,10 @@ entry: define i32 @xmsk_sgt_allones_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_sgt_allones_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvrepli.b $xr1, -1 -; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskgez.b $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp sgt <32 x i8> %a, splat (i8 -1) @@ -287,138 +33,10 @@ entry: define i32 @xmsk_sge_allzeros_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_sge_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvrepli.b $xr1, 0 -; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskgez.b $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp sge <32 x i8> %a, splat (i8 0) @@ -429,137 +47,10 @@ entry: define i32 @xmsk_slt_allzeros_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_slt_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvslti.b $xr0, $xr0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskltz.b $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp slt <32 x i8> %a, splat (i8 0) @@ -570,73 +61,10 @@ entry: define i16 @xmsk_slt_allzeros_i16(<16 x i16 > %a) { ; CHECK-LABEL: xmsk_slt_allzeros_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvslti.h $xr0, $xr0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 18 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 8 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 22 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 10 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 26 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 12 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.h $a1, $sp, 30 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 14 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskltz.h $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8 ; CHECK-NEXT: ret entry: %1 = icmp slt <16 x i16> %a, splat (i16 0) @@ -647,30 +75,10 @@ entry: define i8 @xmsk_slt_allzeros_i32(<8 x i32 > %a) { ; CHECK-LABEL: xmsk_slt_allzeros_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: xvslti.w $xr0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: xvmskltz.w $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4 ; CHECK-NEXT: ret entry: %1 = icmp slt <8 x i32> %a, splat (i32 0) @@ -681,21 +89,10 @@ entry: define i4 @xmsk_slt_allzeros_i64(<4 x i64 > %a) { ; CHECK-LABEL: xmsk_slt_allzeros_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: xvslti.d $xr0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 1 -; CHECK-NEXT: sub.d $a0, $a1, $a0 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; CHECK-NEXT: slli.d $a1, $a1, 3 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 15 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: xvmskltz.d $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2 ; CHECK-NEXT: ret entry: %1 = icmp slt <4 x i64> %a, splat (i64 0) @@ -706,137 +103,10 @@ entry: define i32 @xmsk_sle_allones_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_sle_allones_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvslei.b $xr0, $xr0, -1 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskltz.b $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp sle <32 x i8> %a, splat (i8 -1) @@ -847,73 +117,10 @@ entry: define i16 @xmsk_sle_allones_i32(<16 x i16 > %a) { ; CHECK-LABEL: xmsk_sle_allones_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvslei.h $xr0, $xr0, -1 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 18 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 8 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 22 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 10 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.hu $a1, $sp, 26 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 12 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.hu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.h $a1, $sp, 30 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 14 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmskltz.h $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8 ; CHECK-NEXT: ret entry: %1 = icmp sle <16 x i16> %a, splat (i16 -1) @@ -924,30 +131,10 @@ entry: define i8 @xmsk_sle_allones_i16(<8 x i32 > %a) { ; CHECK-LABEL: xmsk_sle_allones_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: xvslei.w $xr0, $xr0, -1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: xvmskltz.w $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4 ; CHECK-NEXT: ret entry: %1 = icmp sle <8 x i32> %a, splat (i32 -1) @@ -958,21 +145,10 @@ entry: define i4 @xmsk_sle_allones_i64(<4 x i64 > %a) { ; CHECK-LABEL: xmsk_sle_allones_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: xvslei.d $xr0, $xr0, -1 -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 1 -; CHECK-NEXT: sub.d $a0, $a1, $a0 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; CHECK-NEXT: slli.d $a1, $a1, 3 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 15 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: xvmskltz.d $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2 ; CHECK-NEXT: ret entry: %1 = icmp sle <4 x i64> %a, splat (i64 -1) @@ -983,138 +159,10 @@ entry: define i32 @xmsk_ne_allzeros_i8(<32 x i8 > %a) { ; CHECK-LABEL: xmsk_ne_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: .cfi_offset 22, -16 -; CHECK-NEXT: addi.d $fp, $sp, 64 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: xvseqi.b $xr0, $xr0, 0 -; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 -; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 16 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 17 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 16 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 18 -; CHECK-NEXT: slli.d $a1, $a1, 17 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 19 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 18 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 20 -; CHECK-NEXT: slli.d $a1, $a1, 19 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 21 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 20 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 22 -; CHECK-NEXT: slli.d $a1, $a1, 21 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 23 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 22 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 24 -; CHECK-NEXT: slli.d $a1, $a1, 23 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 25 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 24 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 26 -; CHECK-NEXT: slli.d $a1, $a1, 25 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 27 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 26 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 28 -; CHECK-NEXT: slli.d $a1, $a1, 27 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.bu $a1, $sp, 29 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 28 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: ld.bu $a2, $sp, 30 -; CHECK-NEXT: slli.d $a1, $a1, 29 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: ld.b $a1, $sp, 31 -; CHECK-NEXT: andi $a2, $a2, 1 -; CHECK-NEXT: slli.d $a2, $a2, 30 -; CHECK-NEXT: or $a0, $a0, $a2 -; CHECK-NEXT: slli.d $a1, $a1, 31 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: addi.d $sp, $fp, -64 -; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: xvmsknz.b $xr0, $xr0 +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4 +; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16 ; CHECK-NEXT: ret entry: %1 = icmp ne <32 x i8> %a, splat (i8 0) diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index eff3302145c95..5cb8f4e40b27a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -1,65 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --verify-machineinstrs < %s | FileCheck %s define i16 @vmsk_eq_allzeros_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_eq_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmsknz.b $vr0, $vr0 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp eq <16 x i8> %a, splat (i8 0) @@ -70,63 +17,8 @@ entry: define i16 @vmsk_sgt_allones_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_sgt_allones_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vrepli.b $vr1, -1 -; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskgez.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sgt <16 x i8> %a, splat (i8 -1) @@ -137,63 +29,8 @@ entry: define i16 @vmsk_sge_allzeros_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_sge_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskgez.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sge <16 x i8> %a, splat (i8 0) @@ -204,62 +41,8 @@ entry: define i16 @vmsk_slt_allzeros_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_slt_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslti.b $vr0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp slt <16 x i8> %a, splat (i8 0) @@ -270,30 +53,8 @@ entry: define i8 @vmsk_slt_allzeros_i16(<8 x i16 > %a) { ; CHECK-LABEL: vmsk_slt_allzeros_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslti.h $vr0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.h $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp slt <8 x i16> %a, splat (i16 0) @@ -304,20 +65,8 @@ entry: define i4 @vmsk_slt_allzeros_i32(<4 x i32 > %a) { ; CHECK-LABEL: vmsk_slt_allzeros_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslti.w $vr0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 3 -; CHECK-NEXT: slli.d $a1, $a1, 3 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 15 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.w $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp slt <4 x i32> %a, splat (i32 0) @@ -328,15 +77,8 @@ entry: define i2 @vmsk_slt_allzeros_i64(<2 x i64 > %a) { ; CHECK-LABEL: vmsk_slt_allzeros_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslti.d $vr0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 1 -; CHECK-NEXT: slli.d $a1, $a1, 1 -; CHECK-NEXT: sub.d $a0, $a1, $a0 -; CHECK-NEXT: andi $a0, $a0, 3 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.d $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp slt <2 x i64> %a, splat (i64 0) @@ -347,62 +89,8 @@ entry: define i16 @vmsk_sle_allones_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_sle_allones_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslei.b $vr0, $vr0, -1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sle <16 x i8> %a, splat (i8 -1) @@ -413,30 +101,8 @@ entry: define i8 @vmsk_sle_allones_i16(<8 x i16 > %a) { ; CHECK-LABEL: vmsk_sle_allones_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslei.h $vr0, $vr0, -1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 255 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.h $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sle <8 x i16> %a, splat (i16 -1) @@ -447,20 +113,8 @@ entry: define i4 @vmsk_sle_allones_i32(<4 x i32 > %a) { ; CHECK-LABEL: vmsk_sle_allones_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslei.w $vr0, $vr0, -1 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.w $a1, $vr0, 3 -; CHECK-NEXT: slli.d $a1, $a1, 3 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: andi $a0, $a0, 15 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.w $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sle <4 x i32> %a, splat (i32 -1) @@ -471,15 +125,8 @@ entry: define i2 @vmsk_sle_allones_i64(<2 x i64 > %a) { ; CHECK-LABEL: vmsk_sle_allones_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vslei.d $vr0, $vr0, -1 -; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 1 -; CHECK-NEXT: slli.d $a1, $a1, 1 -; CHECK-NEXT: sub.d $a0, $a1, $a0 -; CHECK-NEXT: andi $a0, $a0, 3 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmskltz.d $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp sle <2 x i64> %a, splat (i64 -1) @@ -490,63 +137,8 @@ entry: define i16 @vmsk_ne_allzeros_i8(<16 x i8 > %a) { ; CHECK-LABEL: vmsk_ne_allzeros_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 -; CHECK-NEXT: vxori.b $vr0, $vr0, 255 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: bstrins.d $a0, $a1, 2, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3 -; CHECK-NEXT: bstrins.d $a0, $a1, 3, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 4, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5 -; CHECK-NEXT: bstrins.d $a0, $a1, 5, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 6 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 7 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 8 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 9 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 10 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 11 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 12 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 13 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: andi $a1, $a1, 1 -; CHECK-NEXT: slli.d $a1, $a1, 14 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15 -; CHECK-NEXT: slli.d $a1, $a1, 15 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: bstrpick.d $a0, $a0, 15, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vmsknz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 ; CHECK-NEXT: ret entry: %1 = icmp ne <16 x i8> %a, splat (i8 0)