From 8d58556a5ae181cd09088848696e8566e99cab34 Mon Sep 17 00:00:00 2001 From: wizardengineer Date: Wed, 5 Nov 2025 17:10:05 -0500 Subject: [PATCH] [LLVM][ARM] Add native ct.select support for ARM32 and Thumb This patch implements architecture-specific lowering for ct.select on ARM (both ARM32 and Thumb modes) using conditional move instructions and bitwise operations for constant-time selection. Implementation details: - Uses pseudo-instructions that are expanded Post-RA to bitwise operations - Post-RA expansion in ARMBaseInstrInfo for BUNDLE pseudo-instructions - Handles scalar integer types, floating-point, and half-precision types - Handles vector types with NEON when available - Support for both ARM and Thumb instruction sets (Thumb1 and Thumb2) - Special handling for Thumb1 which lacks conditional execution - Comprehensive test coverage including half-precision and vectors The implementation includes: - ISelLowering: Custom lowering to CTSELECT pseudo-instructions - ISelDAGToDAG: Selection of appropriate pseudo-instructions - BaseInstrInfo: Post-RA expansion of BUNDLE to bitwise instruction sequences - InstrInfo.td: Pseudo-instruction definitions for different types - TargetMachine: Registration of Post-RA expansion pass - Proper handling of condition codes and register allocation constraints --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 337 +++- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 6 + llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 86 + llvm/lib/Target/ARM/ARMISelLowering.cpp | 184 +- llvm/lib/Target/ARM/ARMISelLowering.h | 13 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 185 ++ llvm/lib/Target/ARM/ARMTargetMachine.cpp | 8 +- llvm/test/CodeGen/ARM/ctselect-half.ll | 975 ++++++++++ llvm/test/CodeGen/ARM/ctselect-vector.ll | 2179 ++++++++++++++++++++++ llvm/test/CodeGen/ARM/ctselect.ll | 555 ++++++ 10 files changed, 4499 insertions(+), 29 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/ctselect-half.ll create mode 100644 llvm/test/CodeGen/ARM/ctselect-vector.ll create mode 100644 llvm/test/CodeGen/ARM/ctselect.ll diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 22769dbf38719..6d8a3b72244fe 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1526,18 +1526,351 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { BB->erase(MI); } +// Expands the ctselect pseudo for vector operands, post-RA. +bool ARMBaseInstrInfo::expandCtSelectVector(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + + // These operations will differ by operand register size. + unsigned AndOp = ARM::VANDd; + unsigned BicOp = ARM::VBICd; + unsigned OrrOp = ARM::VORRd; + unsigned BroadcastOp = ARM::VDUP32d; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(DestReg); + + if (ARM::QPRRegClass.hasSubClassEq(RC)) { + AndOp = ARM::VANDq; + BicOp = ARM::VBICq; + OrrOp = ARM::VORRq; + BroadcastOp = ARM::VDUP32q; + } + + unsigned RsbOp = Subtarget.isThumb2() ? ARM::t2RSBri : ARM::RSBri; + + // Any vector pseudo has: ((outs $dst, $tmp_mask, $bcast_mask), (ins $src1, + // $src2, $cond)) + Register VectorMaskReg = MI.getOperand(2).getReg(); + Register Src1Reg = MI.getOperand(3).getReg(); + Register Src2Reg = MI.getOperand(4).getReg(); + Register CondReg = MI.getOperand(5).getReg(); + + // The following sequence of steps yields: (src1 & mask) | (src2 & ~mask) + + // 1. mask = 0 - cond + // When cond = 0: mask = 0x00000000. + // When cond = 1: mask = 0xFFFFFFFF. + + MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg) + .addReg(CondReg) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 2. A = src1 & mask + // For vectors, broadcast the scalar mask so it matches operand size. + BuildMI(*MBB, MI, DL, get(BroadcastOp), VectorMaskReg) + .addReg(MaskReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + BuildMI(*MBB, MI, DL, get(AndOp), DestReg) + .addReg(Src1Reg) + .addReg(VectorMaskReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 3. B = src2 & ~mask + BuildMI(*MBB, MI, DL, get(BicOp), VectorMaskReg) + .addReg(Src2Reg) + .addReg(VectorMaskReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 4. result = A | B + auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg) + .addReg(DestReg) + .addReg(VectorMaskReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + auto BundleStart = FirstNewMI->getIterator(); + auto BundleEnd = LastNewMI->getIterator(); + + // Add instruction bundling + finalizeBundle(*MBB, BundleStart, std::next(BundleEnd)); + + MI.eraseFromParent(); + return true; +} + +// Expands the ctselect pseudo for thumb1, post-RA. +bool ARMBaseInstrInfo::expandCtSelectThumb(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + // pseudos in thumb1 mode have: (outs $dst, $tmp_mask), (ins $src1, $src2, + // $cond)) register class here is always tGPR. + Register DestReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + Register Src2Reg = MI.getOperand(3).getReg(); + Register CondReg = MI.getOperand(4).getReg(); + + // Access register info + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + unsigned RegSize = TRI->getRegSizeInBits(MaskReg, MRI); + unsigned ShiftAmount = RegSize - 1; + + // Option 1: Shift-based mask (preferred - no flag modification) + MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::tMOVr), MaskReg) + .addReg(CondReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // Instead of using RSB, we can use LSL and ASR to get the mask. This is to + // avoid the flag modification caused by RSB. tLSLri: (outs tGPR:$Rd, + // s_cc_out:$s), (ins tGPR:$Rm, imm0_31:$imm5, pred:$p) + BuildMI(*MBB, MI, DL, get(ARM::tLSLri), MaskReg) + .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s + .addReg(MaskReg) // $Rm + .addImm(ShiftAmount) // imm0_31:$imm5 + .add(predOps(ARMCC::AL)) // pred:$p + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // tASRri: (outs tGPR:$Rd, s_cc_out:$s), (ins tGPR:$Rm, imm_sr:$imm5, pred:$p) + BuildMI(*MBB, MI, DL, get(ARM::tASRri), MaskReg) + .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s + .addReg(MaskReg) // $Rm + .addImm(ShiftAmount) // imm_sr:$imm5 + .add(predOps(ARMCC::AL)) // pred:$p + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 2. xor_diff = src1 ^ src2 + BuildMI(*MBB, MI, DL, get(ARM::tMOVr), DestReg) + .addReg(Src1Reg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, + // pred:$p) with constraint "$Rn = $Rdn" + BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg) + .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s + .addReg(DestReg) // tied input $Rn + .addReg(Src2Reg) // $Rm + .add(predOps(ARMCC::AL)) // pred:$p + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 3. masked_xor = xor_diff & mask + // tAND has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, + // pred:$p) with constraint "$Rn = $Rdn" + BuildMI(*MBB, MI, DL, get(ARM::tAND), DestReg) + .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s + .addReg(DestReg) // tied input $Rn + .addReg(MaskReg, RegState::Kill) // $Rm + .add(predOps(ARMCC::AL)) // pred:$p + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 4. result = src2 ^ masked_xor + // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn, + // pred:$p) with constraint "$Rn = $Rdn" + auto LastMI = + BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg) + .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s + .addReg(DestReg) // tied input $Rn + .addReg(Src2Reg) // $Rm + .add(predOps(ARMCC::AL)) // pred:$p + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // Add instruction bundling + auto BundleStart = FirstNewMI->getIterator(); + finalizeBundle(*MBB, BundleStart, std::next(LastMI->getIterator())); + + MI.eraseFromParent(); + return true; +} + +// Expands the ctselect pseudo, post-RA. +bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register DestRegSavedRef = DestReg; + Register Src1Reg, Src2Reg, CondReg; + + // These operations will differ by operand register size. + unsigned RsbOp = ARM::RSBri; + unsigned AndOp = ARM::ANDrr; + unsigned BicOp = ARM::BICrr; + unsigned OrrOp = ARM::ORRrr; + + if (Subtarget.isThumb2()) { + RsbOp = ARM::t2RSBri; + AndOp = ARM::t2ANDrr; + BicOp = ARM::t2BICrr; + OrrOp = ARM::t2ORRrr; + } + + unsigned Opcode = MI.getOpcode(); + bool IsFloat = Opcode == ARM::CTSELECTf32 || Opcode == ARM::CTSELECTf16 || + Opcode == ARM::CTSELECTbf16; + MachineInstr *FirstNewMI = nullptr; + if (IsFloat) { + // Each float pseudo has: (outs $dst, $tmp_mask, $scratch1, $scratch2), (ins + // $src1, $src2, $cond)) We use two scratch registers in tablegen for + // bitwise ops on float types,. + Register GPRScratch1 = MI.getOperand(2).getReg(); + Register GPRScratch2 = MI.getOperand(3).getReg(); + + // choice a from __builtin_ct_select(cond, a, b) + Src1Reg = MI.getOperand(4).getReg(); + // choice b from __builtin_ct_select(cond, a, b) + Src2Reg = MI.getOperand(5).getReg(); + // cond from __builtin_ct_select(cond, a, b) + CondReg = MI.getOperand(6).getReg(); + + // Move fp src1 to GPR scratch1 so we can do our bitwise ops + FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch1) + .addReg(Src1Reg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // Move src2 to scratch2 + BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch2) + .addReg(Src2Reg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + Src1Reg = GPRScratch1; + Src2Reg = GPRScratch2; + // Reuse GPRScratch1 for dest after we are done working with src1. + DestReg = GPRScratch1; + } else { + // Any non-float, non-vector pseudo has: (outs $dst, $tmp_mask), (ins $src1, + // $src2, $cond)) + Src1Reg = MI.getOperand(2).getReg(); + Src2Reg = MI.getOperand(3).getReg(); + CondReg = MI.getOperand(4).getReg(); + } + + // The following sequence of steps yields: (src1 & mask) | (src2 & ~mask) + + // 1. mask = 0 - cond + // When cond = 0: mask = 0x00000000. + // When cond = 1: mask = 0xFFFFFFFF. + auto TmpNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg) + .addReg(CondReg) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // We use the first instruction in the bundle as the first instruction. + if (!FirstNewMI) + FirstNewMI = TmpNewMI; + + // 2. A = src1 & mask + BuildMI(*MBB, MI, DL, get(AndOp), DestReg) + .addReg(Src1Reg) + .addReg(MaskReg) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 3. B = src2 & ~mask + BuildMI(*MBB, MI, DL, get(BicOp), MaskReg) + .addReg(Src2Reg) + .addReg(MaskReg) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + // 4. result = A | B + auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg) + .addReg(DestReg) + .addReg(MaskReg) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + + if (IsFloat) { + // Return our result from GPR to the correct register type. + LastNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVSR), DestRegSavedRef) + .addReg(DestReg) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::MIFlag::NoMerge); + } + + auto BundleStart = FirstNewMI->getIterator(); + auto BundleEnd = LastNewMI->getIterator(); + + // Add instruction bundling + finalizeBundle(*MBB, BundleStart, std::next(BundleEnd)); + + MI.eraseFromParent(); + return true; +} + bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { + auto opcode = MI.getOpcode(); + + if (opcode == TargetOpcode::LOAD_STACK_GUARD) { expandLoadStackGuard(MI); MI.getParent()->erase(MI); return true; } - if (MI.getOpcode() == ARM::MEMCPY) { + if (opcode == ARM::MEMCPY) { expandMEMCPY(MI); return true; } + if (opcode == ARM::CTSELECTf64) { + if (Subtarget.isThumb1Only()) { + LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode + << "replaced by: " << MI); + return expandCtSelectThumb(MI); + } else { + LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode + << "replaced by: " << MI); + return expandCtSelectVector(MI); + } + } + + if (opcode == ARM::CTSELECTv8i8 || opcode == ARM::CTSELECTv4i16 || + opcode == ARM::CTSELECTv2i32 || opcode == ARM::CTSELECTv1i64 || + opcode == ARM::CTSELECTv2f32 || opcode == ARM::CTSELECTv4f16 || + opcode == ARM::CTSELECTv4bf16 || opcode == ARM::CTSELECTv16i8 || + opcode == ARM::CTSELECTv8i16 || opcode == ARM::CTSELECTv4i32 || + opcode == ARM::CTSELECTv2i64 || opcode == ARM::CTSELECTv4f32 || + opcode == ARM::CTSELECTv2f64 || opcode == ARM::CTSELECTv8f16 || + opcode == ARM::CTSELECTv8bf16) { + LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode << "replaced by: " << MI); + return expandCtSelectVector(MI); + } + + if (opcode == ARM::CTSELECTint || opcode == ARM::CTSELECTf16 || + opcode == ARM::CTSELECTbf16 || opcode == ARM::CTSELECTf32) { + if (Subtarget.isThumb1Only()) { + LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode + << "replaced by: " << MI); + return expandCtSelectThumb(MI); + } else { + LLVM_DEBUG(dbgs() << "Opcode " << opcode << "replaced by: " << MI); + return expandCtSelect(MI); + } + } + // This hook gets to expand COPY instructions before they become // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2869e7f708046..f0e090f09f5dc 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -221,6 +221,12 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; + bool expandCtSelectVector(MachineInstr &MI) const; + + bool expandCtSelectThumb(MachineInstr &MI) const; + + bool expandCtSelect(MachineInstr &MI) const; + bool expandPostRAPseudo(MachineInstr &MI) const override; bool shouldSink(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..3fdc5734baaa5 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -4200,6 +4200,92 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ARMISD::CTSELECT: { + EVT VT = N->getValueType(0); + unsigned PseudoOpcode; + bool IsFloat = false; + bool IsVector = false; + + if (VT == MVT::f16) { + PseudoOpcode = ARM::CTSELECTf16; + IsFloat = true; + } else if (VT == MVT::bf16) { + PseudoOpcode = ARM::CTSELECTbf16; + IsFloat = true; + } else if (VT == MVT::f32) { + PseudoOpcode = ARM::CTSELECTf32; + IsFloat = true; + } else if (VT == MVT::f64) { + PseudoOpcode = ARM::CTSELECTf64; + IsVector = true; + } else if (VT == MVT::v8i8) { + PseudoOpcode = ARM::CTSELECTv8i8; + IsVector = true; + } else if (VT == MVT::v4i16) { + PseudoOpcode = ARM::CTSELECTv4i16; + IsVector = true; + } else if (VT == MVT::v2i32) { + PseudoOpcode = ARM::CTSELECTv2i32; + IsVector = true; + } else if (VT == MVT::v1i64) { + PseudoOpcode = ARM::CTSELECTv1i64; + IsVector = true; + } else if (VT == MVT::v2f32) { + PseudoOpcode = ARM::CTSELECTv2f32; + IsVector = true; + } else if (VT == MVT::v4f16) { + PseudoOpcode = ARM::CTSELECTv4f16; + IsVector = true; + } else if (VT == MVT::v4bf16) { + PseudoOpcode = ARM::CTSELECTv4bf16; + IsVector = true; + } else if (VT == MVT::v16i8) { + PseudoOpcode = ARM::CTSELECTv16i8; + IsVector = true; + } else if (VT == MVT::v8i16) { + PseudoOpcode = ARM::CTSELECTv8i16; + IsVector = true; + } else if (VT == MVT::v4i32) { + PseudoOpcode = ARM::CTSELECTv4i32; + IsVector = true; + } else if (VT == MVT::v2i64) { + PseudoOpcode = ARM::CTSELECTv2i64; + IsVector = true; + } else if (VT == MVT::v4f32) { + PseudoOpcode = ARM::CTSELECTv4f32; + IsVector = true; + } else if (VT == MVT::v2f64) { + PseudoOpcode = ARM::CTSELECTv2f64; + IsVector = true; + } else if (VT == MVT::v8f16) { + PseudoOpcode = ARM::CTSELECTv8f16; + IsVector = true; + } else if (VT == MVT::v8bf16) { + PseudoOpcode = ARM::CTSELECTv8bf16; + IsVector = true; + } else { + // i1, i8, i16, i32, i64 + PseudoOpcode = ARM::CTSELECTint; + } + + SmallVector VTs; + VTs.push_back(VT); // $dst + VTs.push_back(MVT::i32); // $tmp_mask (always GPR) + + if (IsVector) { + VTs.push_back(VT); // $bcast_mask (same type as dst for vectors) + } else if (IsFloat) { + VTs.push_back(MVT::i32); // $scratch1 (GPR) + VTs.push_back(MVT::i32); // $scratch2 (GPR) + } + + // src1, src2, cond + SDValue Ops[] = {N->getOperand(0), N->getOperand(1), N->getOperand(2)}; + + SDNode *ResNode = CurDAG->getMachineNode(PseudoOpcode, SDLoc(N), VTs, Ops); + ReplaceNode(N, ResNode); + return; + } case ARMISD::VZIP: { EVT VT = N->getValueType(0); // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b0653457cbaf..63005f1c9f989 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -203,6 +203,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::CTSELECT, VT, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); @@ -304,6 +305,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::CTSELECT, VT, Custom); // Vector reductions setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); @@ -355,6 +357,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::CTSELECT, VT, Custom); // Pre and Post inc are supported on loads and stores for (unsigned im = (unsigned)ISD::PRE_INC; @@ -408,6 +411,28 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); + if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::CTSELECT, MVT::v4f16, Custom); + setOperationAction(ISD::CTSELECT, MVT::v8f16, Custom); + } + + if (Subtarget->hasBF16()) { + setOperationAction(ISD::CTSELECT, MVT::v4bf16, Custom); + setOperationAction(ISD::CTSELECT, MVT::v8bf16, Custom); + } + + // small exotic vectors get scalarised for ctselect + setOperationAction(ISD::CTSELECT, MVT::v1i8, Expand); + setOperationAction(ISD::CTSELECT, MVT::v1i16, Expand); + setOperationAction(ISD::CTSELECT, MVT::v1i32, Expand); + setOperationAction(ISD::CTSELECT, MVT::v1f32, Expand); + setOperationAction(ISD::CTSELECT, MVT::v2i8, Expand); + + setOperationAction(ISD::CTSELECT, MVT::v2i16, Promote); + setOperationPromotedToType(ISD::CTSELECT, MVT::v2i16, MVT::v4i16); + setOperationAction(ISD::CTSELECT, MVT::v4i8, Promote); + setOperationPromotedToType(ISD::CTSELECT, MVT::v4i8, MVT::v8i8); + // We 'support' these types up to bitcast/load/store level, regardless of // MVE integer-only / float support. Only doing FP data processing on the FP // vector types is inhibited at integer-only level. @@ -419,6 +444,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::CTSELECT, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); @@ -474,6 +500,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::CTSELECT, VT, Custom); if (!HasMVEFP) { setOperationAction(ISD::SINT_TO_FP, VT, Expand); @@ -1247,10 +1274,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + setOperationAction(ISD::CTSELECT, MVT::i8, Promote); + setOperationAction(ISD::CTSELECT, MVT::i16, Promote); + setOperationPromotedToType(ISD::CTSELECT, MVT::i16, MVT::i32); + + setOperationAction(ISD::CTSELECT, MVT::i32, Custom); + setOperationAction(ISD::CTSELECT, MVT::i64, Expand); + setOperationAction(ISD::CTSELECT, MVT::f32, Custom); + setOperationAction(ISD::CTSELECT, MVT::f64, Custom); + + // Handle f16 and bf16 without falling back to select from ctselect. + setTargetDAGCombine({ISD::CTSELECT}); + if (Subtarget->hasFullFP16()) { setOperationAction(ISD::SETCC, MVT::f16, Expand); setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); + setOperationAction(ISD::CTSELECT, MVT::f16, Custom); + } + + if (Subtarget->hasBF16()) { + setOperationAction(ISD::CTSELECT, MVT::bf16, Custom); } setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); @@ -1589,6 +1633,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(ARMISD::BCC_i64) MAKE_CASE(ARMISD::FMSTAT) MAKE_CASE(ARMISD::CMOV) + MAKE_CASE(ARMISD::CTSELECT) MAKE_CASE(ARMISD::SSAT) MAKE_CASE(ARMISD::USAT) MAKE_CASE(ARMISD::ASRL) @@ -5129,6 +5174,20 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } +SDValue ARMTargetLowering::LowerCTSELECT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + SDValue Cond = Op.getOperand(0); + SDValue TrueVal = Op.getOperand(1); + SDValue FalseVal = Op.getOperand(2); + EVT VT = Op.getValueType(); + + // Normalise the condition to 0 or 1. + SDValue One = DAG.getConstant(1, DL, MVT::i32); + SDValue CondNode = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One); + return DAG.getNode(ARMISD::CTSELECT, DL, VT, TrueVal, FalseVal, CondNode); +} + static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps) { // Start by selecting the GE condition code for opcodes that return true for @@ -10628,6 +10687,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::CTSELECT: + return LowerCTSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); @@ -10857,6 +10918,36 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::FP_TO_UINT_SAT: Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget); break; + case ISD::CTSELECT: { + EVT VT = N->getValueType(0); + + // Handle f16/bf16 type promotion while preserving ctselect + if (VT == MVT::f16 || VT == MVT::bf16) { + SDLoc DL(N); + SDValue Cond = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + + // Bitcast to i16, then promote to i32 + SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal); + SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal); + + TrueInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, TrueInt); + FalseInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, FalseInt); + + // Normalize condition + SDValue One = DAG.getConstant(1, DL, MVT::i32); + SDValue CondNorm = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One); + + // Create i32 ctselect that will go through normal lowering + Res = + DAG.getNode(ISD::CTSELECT, DL, MVT::i32, CondNorm, TrueInt, FalseInt); + } else { + // For other types, use existing lowering + Res = LowerCTSELECT(SDValue(N, 0), DAG); + } + break; + } } if (Res.getNode()) Results.push_back(Res); @@ -13478,6 +13569,64 @@ static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) { DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts)); } +static SDValue PerformCTSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!DCI.isBeforeLegalize()) { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + EVT VT = N->getValueType(0); + if (VT == MVT::f16 || VT == MVT::bf16) { + SDValue Cond = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + + SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal); + SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal); + + // Create i16 ctselect - this will be promoted to i32 ctselect naturally + SDValue Result = + DAG.getNode(ISD::CTSELECT, DL, MVT::i16, Cond, TrueInt, FalseInt); + + return DAG.getBitcast(VT, Result); + } else if (VT.isVector()) { + EVT EltVT = VT.getVectorElementType(); + if (EltVT == MVT::f16 || EltVT == MVT::bf16) { + SDValue Cond = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + + EVT IntVT; + switch (VT.getSimpleVT().SimpleTy) { + case MVT::v4f16: + case MVT::v4bf16: + IntVT = MVT::v4i16; + break; + case MVT::v8f16: + case MVT::v8bf16: + IntVT = MVT::v8i16; + break; + default: + return SDValue(); // Unsupported vector type + } + + SDValue TrueInt = DAG.getBitcast(IntVT, TrueVal); + SDValue FalseInt = DAG.getBitcast(IntVT, FalseVal); + + SDValue Result = + DAG.getNode(ISD::CTSELECT, DL, IntVT, Cond, TrueInt, FalseInt); + + return DAG.getBitcast(VT, Result); + } + } + + return SDValue(); +} + static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -18981,6 +19130,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT_CC: case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); + case ISD::CTSELECT: + return PerformCTSELECTCombine(N, DCI, Subtarget); case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); @@ -21394,28 +21545,21 @@ bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const { } void ARMTargetLowering::insertSSPDeclarations(Module &M) const { - // MSVC CRT provides functionalities for stack protection. RTLIB::LibcallImpl SecurityCheckCookieLibcall = getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); - - RTLIB::LibcallImpl SecurityCookieVar = - getLibcallImpl(RTLIB::STACK_CHECK_GUARD); - if (SecurityCheckCookieLibcall != RTLIB::Unsupported && - SecurityCookieVar != RTLIB::Unsupported) { - // MSVC CRT has a global variable holding security cookie. - M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar), - PointerType::getUnqual(M.getContext())); - - // MSVC CRT has a function to validate security cookie. - FunctionCallee SecurityCheckCookie = - M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall), - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(M.getContext())); - if (Function *F = dyn_cast(SecurityCheckCookie.getCallee())) - F->addParamAttr(0, Attribute::AttrKind::InReg); - } - - TargetLowering::insertSSPDeclarations(M); + if (SecurityCheckCookieLibcall == RTLIB::Unsupported) + return TargetLowering::insertSSPDeclarations(M); + + // MSVC CRT has a global variable holding security cookie. + M.getOrInsertGlobal("__security_cookie", + PointerType::getUnqual(M.getContext())); + + // MSVC CRT has a function to validate security cookie. + FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( + getLibcallImplName(SecurityCheckCookieLibcall), + Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext())); + if (Function *F = dyn_cast(SecurityCheckCookie.getCallee())) + F->addParamAttr(0, Attribute::AttrKind::InReg); } bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bf3438b0d8803..90aa1bf162694 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -97,6 +97,9 @@ class VectorType; CMOV, // ARM conditional move instructions. + CTSELECT, // ARM constant-time select, implemented with constant-time + // bitwise arithmetic instructions. + SSAT, // Signed saturation USAT, // Unsigned saturation @@ -430,8 +433,12 @@ class VectorType; const char *getTargetNodeName(unsigned Opcode) const override; bool isSelectSupported(SelectSupportKind Kind) const override { - // ARM does not support scalar condition selects on vectors. - return (Kind != ScalarCondVectorVal); + if (Kind == SelectSupportKind::CtSelect) { + return true; + } else { + // ARM does not support scalar condition selects on vectors. + return (Kind != SelectSupportKind::ScalarCondVectorVal); + } } bool isReadOnly(const GlobalValue *GV) const; @@ -885,6 +892,7 @@ class VectorType; SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; @@ -1032,6 +1040,7 @@ class VectorType; MachineBasicBlock *MBB) const; MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const; + void addMVEVectorTypes(bool HasMVEFP); void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action); void setAllExpand(MVT VT); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f7176a65d8163..b63d041081098 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -32,6 +32,13 @@ def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_ARMCtSelect : SDTypeProfile<1, 3, [ + /* any */ // result + SDTCisSameAs<1, 0>, // value on false + SDTCisSameAs<2, 0>, // value on true + SDTCisVT<3, i32> // cond +]>; + def SDT_ARMCMov : SDTypeProfile<1, 4, [ /* any */ // result SDTCisSameAs<1, 0>, // value on false @@ -188,6 +195,7 @@ def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def ARMctselect : SDNode<"ARMISD::CTSELECT", SDT_ARMCtSelect>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>; def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; @@ -5108,6 +5116,183 @@ def : ARMPat<(ARMcmov i32:$false, mod_imm_not:$imm, imm:$cc, CPSR), def : ARMV6T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR), (MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>; +//===----------------------------------------------------------------------===// +// Constant-time selection pseudoinstructions. +// We use a machine pass to lower these pseudos as applicable by subtarget, +// in order to avoid backend optimizations that could invalidate constant-time +// guarantees to the source programmer by node merging or other operations that +// would result in machine code that does not run in constant time. +let isNotDuplicable = 1, isPseudo = 1, hasNoSchedulingInfo = 1 in { + + // i1, i8, i16, i32, i64 + def CTSELECTint : ARMPseudoInst<(outs GPR:$dst, GPR:$tmp_mask), + (ins GPR:$src1, GPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber $tmp_mask"; + } + + def CTSELECTf16 + : ARMPseudoInst< + (outs HPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2), + (ins HPR:$src1, HPR:$src2, GPR:$cond), 4, NoItinerary, []> { + let Constraints = + "@earlyclobber $dst,@earlyclobber $tmp_mask,@earlyclobber " + "$scratch1,@earlyclobber $scratch2"; + } + + def CTSELECTbf16 + : ARMPseudoInst< + (outs HPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2), + (ins HPR:$src1, HPR:$src2, GPR:$cond), 4, NoItinerary, []> { + let Constraints = + "@earlyclobber $dst,@earlyclobber $tmp_mask,@earlyclobber " + "$scratch1,@earlyclobber $scratch2"; + } + + def CTSELECTf32 + : ARMPseudoInst< + (outs SPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2), + (ins SPR:$src1, SPR:$src2, GPR:$cond), 4, NoItinerary, []> { + let Constraints = + "@earlyclobber $dst,@earlyclobber $tmp_mask,@earlyclobber " + "$scratch1,@earlyclobber $scratch2"; + } + + let Predicates = [HasDPVFP] in { + def CTSELECTf64 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + } + + let Predicates = [HasNEON] in { + // DPR + def CTSELECTv8i8 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv4i16 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv2i32 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv1i64 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv2f32 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv4f16 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv4bf16 + : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask), + (ins DPR:$src1, DPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + // QPR + def CTSELECTv16i8 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv8i16 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv4i32 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv2i64 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv4f32 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv2f64 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv8f16 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + + def CTSELECTv8bf16 + : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask), + (ins QPR:$src1, QPR:$src2, GPR:$cond), 4, + NoItinerary, []> { + let Constraints = "@earlyclobber $dst,@earlyclobber " + "$tmp_mask,@earlyclobber $bcast_mask"; + } + } +} + //===----------------------------------------------------------------------===// // Atomic operations intrinsics // diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 590d4c70592f8..abde3ae28a751 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -521,13 +521,11 @@ void ARMPassConfig::addPreSched2() { void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); - // Unpack bundles for: + // Always unpack bundles for: // - Thumb2: Constant island pass requires unbundled instructions // - KCFI: KCFI_CHECK pseudo instructions need to be unbundled for AsmPrinter - addPass(createUnpackMachineBundles([](const MachineFunction &MF) { - return MF.getSubtarget().isThumb2() || - MF.getFunction().getParent()->getModuleFlag("kcfi"); - })); + // - CTSELECT: Post-RA expansion creates bundles that must be unpacked + addPass(createUnpackMachineBundles(nullptr)); // Don't optimize barriers or block placement at -O0. if (getOptLevel() != CodeGenOptLevel::None) { diff --git a/llvm/test/CodeGen/ARM/ctselect-half.ll b/llvm/test/CodeGen/ARM/ctselect-half.ll new file mode 100644 index 0000000000000..fed3387ce8f53 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ctselect-half.ll @@ -0,0 +1,975 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s +; RUN: llc < %s -mtriple=armv8.6a-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=BFLOAT-F16-NATIVE %s +; RUN: llc < %s -mtriple=armv8.2a-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=F16-NATIVE %s +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s + +define half @ct_half(i1 %cond, half %a, half %b) { +; CT-LABEL: ct_half: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; BFLOAT-F16-NATIVE-LABEL: ct_half: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: and r3, r0, #1 +; BFLOAT-F16-NATIVE-NEXT: rsb r12, r3, #0 +; BFLOAT-F16-NATIVE-NEXT: and r0, r1, r12 +; BFLOAT-F16-NATIVE-NEXT: bic r12, r2, r12 +; BFLOAT-F16-NATIVE-NEXT: orr r0, r0, r12 +; BFLOAT-F16-NATIVE-NEXT: bx lr +; +; F16-NATIVE-LABEL: ct_half: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: and r3, r0, #1 +; F16-NATIVE-NEXT: rsb r12, r3, #0 +; F16-NATIVE-NEXT: and r0, r1, r12 +; F16-NATIVE-NEXT: bic r12, r2, r12 +; F16-NATIVE-NEXT: orr r0, r0, r12 +; F16-NATIVE-NEXT: bx lr +; +; THUMB1-LABEL: ct_half: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_half: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call half @llvm.ct.select.f16(i1 %cond, half %a, half %b) + ret half %sel +} + +define bfloat @ct_bf16(i1 %cond, bfloat %a, bfloat %b) { +; CT-LABEL: ct_bf16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; BFLOAT-F16-NATIVE-LABEL: ct_bf16: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: .pad #4 +; BFLOAT-F16-NATIVE-NEXT: sub sp, sp, #4 +; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1 +; BFLOAT-F16-NATIVE-NEXT: rsb r12, r0, #0 +; BFLOAT-F16-NATIVE-NEXT: and r3, r1, r12 +; BFLOAT-F16-NATIVE-NEXT: bic r12, r2, r12 +; BFLOAT-F16-NATIVE-NEXT: orr r3, r3, r12 +; BFLOAT-F16-NATIVE-NEXT: strh r3, [sp, #2] +; BFLOAT-F16-NATIVE-NEXT: ldrh r0, [sp, #2] +; BFLOAT-F16-NATIVE-NEXT: add sp, sp, #4 +; BFLOAT-F16-NATIVE-NEXT: bx lr +; +; F16-NATIVE-LABEL: ct_bf16: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: and r3, r0, #1 +; F16-NATIVE-NEXT: rsb r12, r3, #0 +; F16-NATIVE-NEXT: and r0, r1, r12 +; F16-NATIVE-NEXT: bic r12, r2, r12 +; F16-NATIVE-NEXT: orr r0, r0, r12 +; F16-NATIVE-NEXT: bx lr +; +; THUMB1-LABEL: ct_bf16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_bf16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call bfloat @llvm.ct.select.bf16(i1 %cond, bfloat %a, bfloat %b) + ret bfloat %sel +} + +define <4 x half> @ct_v4f16(i1 %cond, <4 x half> %a, <4 x half> %b) { +; CT-LABEL: ct_v4f16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: .save {r4, r5, r6, lr} +; CT-NEXT: push {r4, r5, r6, lr} +; CT-NEXT: ldrh r1, [sp, #20] +; CT-NEXT: pkhbt r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r12, [sp, #36] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: ldrh lr, [sp, #28] +; CT-NEXT: ldrh r6, [sp, #24] +; CT-NEXT: ldrh r4, [sp, #16] +; CT-NEXT: ldrh r5, [sp, #32] +; CT-NEXT: orr r6, r6, lr, lsl #16 +; CT-NEXT: orr r1, r4, r1, lsl #16 +; CT-NEXT: orr r3, r5, r12, lsl #16 +; CT-NEXT: vmov d17, r2, r1 +; CT-NEXT: vmov d16, r6, r3 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov.u16 r0, d18[0] +; CT-NEXT: vmov.u16 r1, d18[1] +; CT-NEXT: vmov.u16 r2, d18[2] +; CT-NEXT: vmov.u16 r3, d18[3] +; CT-NEXT: pop {r4, r5, r6, pc} +; +; BFLOAT-F16-NATIVE-LABEL: ct_v4f16: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: .save {r4, r5, r6, lr} +; BFLOAT-F16-NATIVE-NEXT: push {r4, r5, r6, lr} +; BFLOAT-F16-NATIVE-NEXT: ldrh r1, [sp, #20] +; BFLOAT-F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1 +; BFLOAT-F16-NATIVE-NEXT: ldrh lr, [sp, #28] +; BFLOAT-F16-NATIVE-NEXT: ldrh r6, [sp, #24] +; BFLOAT-F16-NATIVE-NEXT: ldrh r4, [sp, #16] +; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #32] +; BFLOAT-F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: vmov d17, r2, r1 +; BFLOAT-F16-NATIVE-NEXT: vmov d16, r6, r3 +; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0 +; BFLOAT-F16-NATIVE-NEXT: vdup.32 d19, r1 +; BFLOAT-F16-NATIVE-NEXT: vand d18, d17, d19 +; BFLOAT-F16-NATIVE-NEXT: vbic d19, d16, d19 +; BFLOAT-F16-NATIVE-NEXT: vorr d18, d18, d19 +; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r0, d18[0] +; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r1, d18[1] +; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r2, d18[2] +; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r3, d18[3] +; BFLOAT-F16-NATIVE-NEXT: pop {r4, r5, r6, pc} +; +; F16-NATIVE-LABEL: ct_v4f16: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: .save {r4, r5, r6, lr} +; F16-NATIVE-NEXT: push {r4, r5, r6, lr} +; F16-NATIVE-NEXT: ldrh r1, [sp, #20] +; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; F16-NATIVE-NEXT: and r0, r0, #1 +; F16-NATIVE-NEXT: ldrh lr, [sp, #28] +; F16-NATIVE-NEXT: ldrh r6, [sp, #24] +; F16-NATIVE-NEXT: ldrh r4, [sp, #16] +; F16-NATIVE-NEXT: ldrh r5, [sp, #32] +; F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16 +; F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16 +; F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16 +; F16-NATIVE-NEXT: vmov d17, r2, r1 +; F16-NATIVE-NEXT: vmov d16, r6, r3 +; F16-NATIVE-NEXT: rsb r1, r0, #0 +; F16-NATIVE-NEXT: vdup.32 d19, r1 +; F16-NATIVE-NEXT: vand d18, d17, d19 +; F16-NATIVE-NEXT: vbic d19, d16, d19 +; F16-NATIVE-NEXT: vorr d18, d18, d19 +; F16-NATIVE-NEXT: vmov.u16 r0, d18[0] +; F16-NATIVE-NEXT: vmov.u16 r1, d18[1] +; F16-NATIVE-NEXT: vmov.u16 r2, d18[2] +; F16-NATIVE-NEXT: vmov.u16 r3, d18[3] +; F16-NATIVE-NEXT: pop {r4, r5, r6, pc} +; +; THUMB1-LABEL: ct_v4f16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4f16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldrh.w r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldrh.w r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldrh.w r3, [sp, #16] +; THUMB2-NEXT: ldrh.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldrh.w lr, [sp, #36] +; THUMB2-NEXT: ldrh.w r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x half> @llvm.ct.select.v4f16(i1 %cond, <4 x half> %a, <4 x half> %b) + ret <4 x half> %sel +} + +define <4 x bfloat> @ct_v4bf16(i1 %cond, <4 x bfloat> %a, <4 x bfloat> %b) { +; CT-LABEL: ct_v4bf16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: .save {r4, r5, r6, lr} +; CT-NEXT: push {r4, r5, r6, lr} +; CT-NEXT: ldrh r1, [sp, #20] +; CT-NEXT: pkhbt r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r12, [sp, #36] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: ldrh lr, [sp, #28] +; CT-NEXT: ldrh r6, [sp, #24] +; CT-NEXT: ldrh r4, [sp, #16] +; CT-NEXT: ldrh r5, [sp, #32] +; CT-NEXT: orr r6, r6, lr, lsl #16 +; CT-NEXT: orr r1, r4, r1, lsl #16 +; CT-NEXT: orr r3, r5, r12, lsl #16 +; CT-NEXT: vmov d17, r2, r1 +; CT-NEXT: vmov d16, r6, r3 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov.u16 r0, d18[0] +; CT-NEXT: vmov.u16 r1, d18[1] +; CT-NEXT: vmov.u16 r2, d18[2] +; CT-NEXT: vmov.u16 r3, d18[3] +; CT-NEXT: pop {r4, r5, r6, pc} +; +; BFLOAT-F16-NATIVE-LABEL: ct_v4bf16: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: vldr d16, [sp] +; BFLOAT-F16-NATIVE-NEXT: vmov d17, r2, r3 +; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1 +; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0 +; BFLOAT-F16-NATIVE-NEXT: vdup.32 d19, r1 +; BFLOAT-F16-NATIVE-NEXT: vand d18, d17, d19 +; BFLOAT-F16-NATIVE-NEXT: vbic d19, d16, d19 +; BFLOAT-F16-NATIVE-NEXT: vorr d18, d18, d19 +; BFLOAT-F16-NATIVE-NEXT: vmov r0, r1, d18 +; BFLOAT-F16-NATIVE-NEXT: bx lr +; +; F16-NATIVE-LABEL: ct_v4bf16: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: .save {r4, r5, r6, lr} +; F16-NATIVE-NEXT: push {r4, r5, r6, lr} +; F16-NATIVE-NEXT: ldrh r1, [sp, #20] +; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; F16-NATIVE-NEXT: and r0, r0, #1 +; F16-NATIVE-NEXT: ldrh lr, [sp, #28] +; F16-NATIVE-NEXT: ldrh r6, [sp, #24] +; F16-NATIVE-NEXT: ldrh r4, [sp, #16] +; F16-NATIVE-NEXT: ldrh r5, [sp, #32] +; F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16 +; F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16 +; F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16 +; F16-NATIVE-NEXT: vmov d17, r2, r1 +; F16-NATIVE-NEXT: vmov d16, r6, r3 +; F16-NATIVE-NEXT: rsb r1, r0, #0 +; F16-NATIVE-NEXT: vdup.32 d19, r1 +; F16-NATIVE-NEXT: vand d18, d17, d19 +; F16-NATIVE-NEXT: vbic d19, d16, d19 +; F16-NATIVE-NEXT: vorr d18, d18, d19 +; F16-NATIVE-NEXT: vmov.u16 r0, d18[0] +; F16-NATIVE-NEXT: vmov.u16 r1, d18[1] +; F16-NATIVE-NEXT: vmov.u16 r2, d18[2] +; F16-NATIVE-NEXT: vmov.u16 r3, d18[3] +; F16-NATIVE-NEXT: pop {r4, r5, r6, pc} +; +; THUMB1-LABEL: ct_v4bf16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4bf16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldrh.w r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldrh.w r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldrh.w r3, [sp, #16] +; THUMB2-NEXT: ldrh.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldrh.w lr, [sp, #36] +; THUMB2-NEXT: ldrh.w r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x bfloat> @llvm.ct.select.v4bf16(i1 %cond, <4 x bfloat> %a, <4 x bfloat> %b) + ret <4 x bfloat> %sel +} + +define <8 x half> @ct_v8f16(i1 %cond, <8 x half> %a, <8 x half> %b) { +; CT-LABEL: ct_v8f16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CT-NEXT: push {r4, r5, r6, r7, r8, lr} +; CT-NEXT: ldrh r12, [sp, #36] +; CT-NEXT: pkhbt r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r7, [sp, #32] +; CT-NEXT: and r1, r1, #1 +; CT-NEXT: ldrh r3, [sp, #52] +; CT-NEXT: vmov.32 d16[0], r2 +; CT-NEXT: ldrh r2, [sp, #48] +; CT-NEXT: orr r7, r7, r12, lsl #16 +; CT-NEXT: ldrh r5, [sp, #68] +; CT-NEXT: orr r2, r2, r3, lsl #16 +; CT-NEXT: vmov.32 d17[0], r7 +; CT-NEXT: ldrh r7, [sp, #64] +; CT-NEXT: ldrh r3, [sp, #28] +; CT-NEXT: vmov.32 d18[0], r2 +; CT-NEXT: ldrh r2, [sp, #24] +; CT-NEXT: orr r7, r7, r5, lsl #16 +; CT-NEXT: ldrh r5, [sp, #76] +; CT-NEXT: vmov.32 d19[0], r7 +; CT-NEXT: orr r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r7, [sp, #72] +; CT-NEXT: ldrh lr, [sp, #60] +; CT-NEXT: vmov.32 d16[1], r2 +; CT-NEXT: orr r2, r7, r5, lsl #16 +; CT-NEXT: ldrh r4, [sp, #56] +; CT-NEXT: ldrh r8, [sp, #44] +; CT-NEXT: vmov.32 d19[1], r2 +; CT-NEXT: orr r2, r4, lr, lsl #16 +; CT-NEXT: ldrh r6, [sp, #40] +; CT-NEXT: vmov.32 d18[1], r2 +; CT-NEXT: orr r2, r6, r8, lsl #16 +; CT-NEXT: vmov.32 d17[1], r2 +; CT-NEXT: rsb r2, r1, #0 +; CT-NEXT: vdup.32 q11, r2 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vst1.64 {d20, d21}, [r0:128] +; CT-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BFLOAT-F16-NATIVE-LABEL: ct_v8f16: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BFLOAT-F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr} +; BFLOAT-F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; BFLOAT-F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #32] +; BFLOAT-F16-NATIVE-NEXT: and r1, r1, #1 +; BFLOAT-F16-NATIVE-NEXT: ldrh r3, [sp, #52] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d16[0], r2 +; BFLOAT-F16-NATIVE-NEXT: ldrh r2, [sp, #48] +; BFLOAT-F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #68] +; BFLOAT-F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d17[0], r7 +; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #64] +; BFLOAT-F16-NATIVE-NEXT: ldrh r3, [sp, #28] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d18[0], r2 +; BFLOAT-F16-NATIVE-NEXT: ldrh r2, [sp, #24] +; BFLOAT-F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #76] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d19[0], r7 +; BFLOAT-F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #72] +; BFLOAT-F16-NATIVE-NEXT: ldrh lr, [sp, #60] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d16[1], r2 +; BFLOAT-F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r4, [sp, #56] +; BFLOAT-F16-NATIVE-NEXT: ldrh r8, [sp, #44] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d19[1], r2 +; BFLOAT-F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: ldrh r6, [sp, #40] +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d18[1], r2 +; BFLOAT-F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16 +; BFLOAT-F16-NATIVE-NEXT: vmov.32 d17[1], r2 +; BFLOAT-F16-NATIVE-NEXT: rsb r2, r1, #0 +; BFLOAT-F16-NATIVE-NEXT: vdup.32 q11, r2 +; BFLOAT-F16-NATIVE-NEXT: vand q10, q8, q11 +; BFLOAT-F16-NATIVE-NEXT: vbic q11, q9, q11 +; BFLOAT-F16-NATIVE-NEXT: vorr q10, q10, q11 +; BFLOAT-F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128] +; BFLOAT-F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; F16-NATIVE-LABEL: ct_v8f16: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr} +; F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr} +; F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r7, [sp, #32] +; F16-NATIVE-NEXT: and r1, r1, #1 +; F16-NATIVE-NEXT: ldrh r3, [sp, #52] +; F16-NATIVE-NEXT: vmov.32 d16[0], r2 +; F16-NATIVE-NEXT: ldrh r2, [sp, #48] +; F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16 +; F16-NATIVE-NEXT: ldrh r5, [sp, #68] +; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: vmov.32 d17[0], r7 +; F16-NATIVE-NEXT: ldrh r7, [sp, #64] +; F16-NATIVE-NEXT: ldrh r3, [sp, #28] +; F16-NATIVE-NEXT: vmov.32 d18[0], r2 +; F16-NATIVE-NEXT: ldrh r2, [sp, #24] +; F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16 +; F16-NATIVE-NEXT: ldrh r5, [sp, #76] +; F16-NATIVE-NEXT: vmov.32 d19[0], r7 +; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r7, [sp, #72] +; F16-NATIVE-NEXT: ldrh lr, [sp, #60] +; F16-NATIVE-NEXT: vmov.32 d16[1], r2 +; F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16 +; F16-NATIVE-NEXT: ldrh r4, [sp, #56] +; F16-NATIVE-NEXT: ldrh r8, [sp, #44] +; F16-NATIVE-NEXT: vmov.32 d19[1], r2 +; F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16 +; F16-NATIVE-NEXT: ldrh r6, [sp, #40] +; F16-NATIVE-NEXT: vmov.32 d18[1], r2 +; F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16 +; F16-NATIVE-NEXT: vmov.32 d17[1], r2 +; F16-NATIVE-NEXT: rsb r2, r1, #0 +; F16-NATIVE-NEXT: vdup.32 q11, r2 +; F16-NATIVE-NEXT: vand q10, q8, q11 +; F16-NATIVE-NEXT: vbic q11, q9, q11 +; F16-NATIVE-NEXT: vorr q10, q10, q11 +; F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128] +; F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; THUMB1-LABEL: ct_v8f16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r1 +; THUMB1-NEXT: ldr r1, [sp, #76] +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #14] +; THUMB1-NEXT: ldr r1, [sp, #72] +; THUMB1-NEXT: ldr r5, [sp, #40] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #12] +; THUMB1-NEXT: ldr r1, [sp, #68] +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #10] +; THUMB1-NEXT: ldr r1, [sp, #64] +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #8] +; THUMB1-NEXT: ldr r1, [sp, #60] +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #6] +; THUMB1-NEXT: ldr r1, [sp, #56] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #4] +; THUMB1-NEXT: ldr r1, [sp, #52] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r5, r3 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: ands r5, r6 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: strh r5, [r0, #2] +; THUMB1-NEXT: ldr r1, [sp, #48] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r3, r2 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: ands r3, r5 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: strh r3, [r0] +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v8f16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and lr, r1, #1 +; THUMB2-NEXT: ldrh.w r12, [sp, #68] +; THUMB2-NEXT: ldrh.w r1, [sp, #36] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r4, r1, r5 +; THUMB2-NEXT: bic.w r5, r12, r5 +; THUMB2-NEXT: orrs r4, r5 +; THUMB2-NEXT: strh r4, [r0, #14] +; THUMB2-NEXT: ldrh.w r12, [sp, #64] +; THUMB2-NEXT: ldrh.w r5, [sp, #32] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #12] +; THUMB2-NEXT: ldrh.w r12, [sp, #60] +; THUMB2-NEXT: ldrh.w r5, [sp, #28] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #10] +; THUMB2-NEXT: ldrh.w r12, [sp, #56] +; THUMB2-NEXT: ldrh.w r5, [sp, #24] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #8] +; THUMB2-NEXT: ldrh.w r12, [sp, #52] +; THUMB2-NEXT: ldrh.w r5, [sp, #20] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #6] +; THUMB2-NEXT: ldrh.w r12, [sp, #48] +; THUMB2-NEXT: ldrh.w r5, [sp, #16] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #4] +; THUMB2-NEXT: ldrh.w r1, [sp, #44] +; THUMB2-NEXT: rsb.w r4, lr, #0 +; THUMB2-NEXT: and.w r5, r3, r4 +; THUMB2-NEXT: bic.w r4, r1, r4 +; THUMB2-NEXT: orrs r5, r4 +; THUMB2-NEXT: strh r5, [r0, #2] +; THUMB2-NEXT: ldrh.w r1, [sp, #40] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r3, r2, r5 +; THUMB2-NEXT: bic.w r5, r1, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: strh r3, [r0] +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <8 x half> @llvm.ct.select.v8f16(i1 %cond, <8 x half> %a, <8 x half> %b) + ret <8 x half> %sel +} + +define <8 x bfloat> @ct_v8bf16(i1 %cond, <8 x bfloat> %a, <8 x bfloat> %b) { +; CT-LABEL: ct_v8bf16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CT-NEXT: push {r4, r5, r6, r7, r8, lr} +; CT-NEXT: ldrh r12, [sp, #36] +; CT-NEXT: pkhbt r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r7, [sp, #32] +; CT-NEXT: and r1, r1, #1 +; CT-NEXT: ldrh r3, [sp, #52] +; CT-NEXT: vmov.32 d16[0], r2 +; CT-NEXT: ldrh r2, [sp, #48] +; CT-NEXT: orr r7, r7, r12, lsl #16 +; CT-NEXT: ldrh r5, [sp, #68] +; CT-NEXT: orr r2, r2, r3, lsl #16 +; CT-NEXT: vmov.32 d17[0], r7 +; CT-NEXT: ldrh r7, [sp, #64] +; CT-NEXT: ldrh r3, [sp, #28] +; CT-NEXT: vmov.32 d18[0], r2 +; CT-NEXT: ldrh r2, [sp, #24] +; CT-NEXT: orr r7, r7, r5, lsl #16 +; CT-NEXT: ldrh r5, [sp, #76] +; CT-NEXT: vmov.32 d19[0], r7 +; CT-NEXT: orr r2, r2, r3, lsl #16 +; CT-NEXT: ldrh r7, [sp, #72] +; CT-NEXT: ldrh lr, [sp, #60] +; CT-NEXT: vmov.32 d16[1], r2 +; CT-NEXT: orr r2, r7, r5, lsl #16 +; CT-NEXT: ldrh r4, [sp, #56] +; CT-NEXT: ldrh r8, [sp, #44] +; CT-NEXT: vmov.32 d19[1], r2 +; CT-NEXT: orr r2, r4, lr, lsl #16 +; CT-NEXT: ldrh r6, [sp, #40] +; CT-NEXT: vmov.32 d18[1], r2 +; CT-NEXT: orr r2, r6, r8, lsl #16 +; CT-NEXT: vmov.32 d17[1], r2 +; CT-NEXT: rsb r2, r1, #0 +; CT-NEXT: vdup.32 q11, r2 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vst1.64 {d20, d21}, [r0:128] +; CT-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BFLOAT-F16-NATIVE-LABEL: ct_v8bf16: +; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry +; BFLOAT-F16-NATIVE-NEXT: vldr d17, [sp] +; BFLOAT-F16-NATIVE-NEXT: add r1, sp, #8 +; BFLOAT-F16-NATIVE-NEXT: vmov d16, r2, r3 +; BFLOAT-F16-NATIVE-NEXT: vld1.64 {d18, d19}, [r1] +; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1 +; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0 +; BFLOAT-F16-NATIVE-NEXT: vdup.32 q11, r1 +; BFLOAT-F16-NATIVE-NEXT: vand q10, q8, q11 +; BFLOAT-F16-NATIVE-NEXT: vbic q11, q9, q11 +; BFLOAT-F16-NATIVE-NEXT: vorr q10, q10, q11 +; BFLOAT-F16-NATIVE-NEXT: vmov r0, r1, d20 +; BFLOAT-F16-NATIVE-NEXT: vmov r2, r3, d21 +; BFLOAT-F16-NATIVE-NEXT: bx lr +; +; F16-NATIVE-LABEL: ct_v8bf16: +; F16-NATIVE: @ %bb.0: @ %entry +; F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr} +; F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr} +; F16-NATIVE-NEXT: ldrh r12, [sp, #36] +; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r7, [sp, #32] +; F16-NATIVE-NEXT: and r1, r1, #1 +; F16-NATIVE-NEXT: ldrh r3, [sp, #52] +; F16-NATIVE-NEXT: vmov.32 d16[0], r2 +; F16-NATIVE-NEXT: ldrh r2, [sp, #48] +; F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16 +; F16-NATIVE-NEXT: ldrh r5, [sp, #68] +; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: vmov.32 d17[0], r7 +; F16-NATIVE-NEXT: ldrh r7, [sp, #64] +; F16-NATIVE-NEXT: ldrh r3, [sp, #28] +; F16-NATIVE-NEXT: vmov.32 d18[0], r2 +; F16-NATIVE-NEXT: ldrh r2, [sp, #24] +; F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16 +; F16-NATIVE-NEXT: ldrh r5, [sp, #76] +; F16-NATIVE-NEXT: vmov.32 d19[0], r7 +; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16 +; F16-NATIVE-NEXT: ldrh r7, [sp, #72] +; F16-NATIVE-NEXT: ldrh lr, [sp, #60] +; F16-NATIVE-NEXT: vmov.32 d16[1], r2 +; F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16 +; F16-NATIVE-NEXT: ldrh r4, [sp, #56] +; F16-NATIVE-NEXT: ldrh r8, [sp, #44] +; F16-NATIVE-NEXT: vmov.32 d19[1], r2 +; F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16 +; F16-NATIVE-NEXT: ldrh r6, [sp, #40] +; F16-NATIVE-NEXT: vmov.32 d18[1], r2 +; F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16 +; F16-NATIVE-NEXT: vmov.32 d17[1], r2 +; F16-NATIVE-NEXT: rsb r2, r1, #0 +; F16-NATIVE-NEXT: vdup.32 q11, r2 +; F16-NATIVE-NEXT: vand q10, q8, q11 +; F16-NATIVE-NEXT: vbic q11, q9, q11 +; F16-NATIVE-NEXT: vorr q10, q10, q11 +; F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128] +; F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; THUMB1-LABEL: ct_v8bf16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r1 +; THUMB1-NEXT: ldr r1, [sp, #76] +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #14] +; THUMB1-NEXT: ldr r1, [sp, #72] +; THUMB1-NEXT: ldr r5, [sp, #40] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #12] +; THUMB1-NEXT: ldr r1, [sp, #68] +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #10] +; THUMB1-NEXT: ldr r1, [sp, #64] +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #8] +; THUMB1-NEXT: ldr r1, [sp, #60] +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #6] +; THUMB1-NEXT: ldr r1, [sp, #56] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #4] +; THUMB1-NEXT: ldr r1, [sp, #52] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r5, r3 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: ands r5, r6 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: strh r5, [r0, #2] +; THUMB1-NEXT: ldr r1, [sp, #48] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r3, r2 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: ands r3, r5 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: strh r3, [r0] +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v8bf16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and lr, r1, #1 +; THUMB2-NEXT: ldrh.w r12, [sp, #68] +; THUMB2-NEXT: ldrh.w r1, [sp, #36] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r4, r1, r5 +; THUMB2-NEXT: bic.w r5, r12, r5 +; THUMB2-NEXT: orrs r4, r5 +; THUMB2-NEXT: strh r4, [r0, #14] +; THUMB2-NEXT: ldrh.w r12, [sp, #64] +; THUMB2-NEXT: ldrh.w r5, [sp, #32] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #12] +; THUMB2-NEXT: ldrh.w r12, [sp, #60] +; THUMB2-NEXT: ldrh.w r5, [sp, #28] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #10] +; THUMB2-NEXT: ldrh.w r12, [sp, #56] +; THUMB2-NEXT: ldrh.w r5, [sp, #24] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #8] +; THUMB2-NEXT: ldrh.w r12, [sp, #52] +; THUMB2-NEXT: ldrh.w r5, [sp, #20] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #6] +; THUMB2-NEXT: ldrh.w r12, [sp, #48] +; THUMB2-NEXT: ldrh.w r5, [sp, #16] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #4] +; THUMB2-NEXT: ldrh.w r1, [sp, #44] +; THUMB2-NEXT: rsb.w r4, lr, #0 +; THUMB2-NEXT: and.w r5, r3, r4 +; THUMB2-NEXT: bic.w r4, r1, r4 +; THUMB2-NEXT: orrs r5, r4 +; THUMB2-NEXT: strh r5, [r0, #2] +; THUMB2-NEXT: ldrh.w r1, [sp, #40] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r3, r2, r5 +; THUMB2-NEXT: bic.w r5, r1, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: strh r3, [r0] +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <8 x bfloat> @llvm.ct.select.v8bf16(i1 %cond, <8 x bfloat> %a, <8 x bfloat> %b) + ret <8 x bfloat> %sel +} diff --git a/llvm/test/CodeGen/ARM/ctselect-vector.ll b/llvm/test/CodeGen/ARM/ctselect-vector.ll new file mode 100644 index 0000000000000..22619735c4535 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ctselect-vector.ll @@ -0,0 +1,2179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s +; RUN: llc < %s -mtriple=armv6 -verify-machineinstrs | FileCheck --check-prefix=DEFAULT %s +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s + +define <8 x i8> @ct_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) { +; CT-LABEL: ct_v8i8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v8i8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and lr, r1, #1 +; DEFAULT-NEXT: ldrb r12, [sp, #68] +; DEFAULT-NEXT: ldrb r1, [sp, #36] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r4, r1, r5 +; DEFAULT-NEXT: bic r5, r12, r5 +; DEFAULT-NEXT: orr r4, r4, r5 +; DEFAULT-NEXT: strb r4, [r0, #7] +; DEFAULT-NEXT: ldrb r12, [sp, #64] +; DEFAULT-NEXT: ldrb r5, [sp, #32] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #6] +; DEFAULT-NEXT: ldrb r12, [sp, #60] +; DEFAULT-NEXT: ldrb r5, [sp, #28] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #5] +; DEFAULT-NEXT: ldrb r12, [sp, #56] +; DEFAULT-NEXT: ldrb r5, [sp, #24] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #4] +; DEFAULT-NEXT: ldrb r12, [sp, #52] +; DEFAULT-NEXT: ldrb r5, [sp, #20] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #3] +; DEFAULT-NEXT: ldrb r12, [sp, #48] +; DEFAULT-NEXT: ldrb r5, [sp, #16] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #2] +; DEFAULT-NEXT: ldrb r1, [sp, #44] +; DEFAULT-NEXT: rsb r4, lr, #0 +; DEFAULT-NEXT: and r5, r3, r4 +; DEFAULT-NEXT: bic r4, r1, r4 +; DEFAULT-NEXT: orr r5, r5, r4 +; DEFAULT-NEXT: strb r5, [r0, #1] +; DEFAULT-NEXT: ldrb r1, [sp, #40] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r3, r2, r5 +; DEFAULT-NEXT: bic r5, r1, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: strb r3, [r0] +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v8i8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r1 +; THUMB1-NEXT: ldr r1, [sp, #76] +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #7] +; THUMB1-NEXT: ldr r1, [sp, #72] +; THUMB1-NEXT: ldr r5, [sp, #40] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #6] +; THUMB1-NEXT: ldr r1, [sp, #68] +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #5] +; THUMB1-NEXT: ldr r1, [sp, #64] +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #4] +; THUMB1-NEXT: ldr r1, [sp, #60] +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #3] +; THUMB1-NEXT: ldr r1, [sp, #56] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #2] +; THUMB1-NEXT: ldr r1, [sp, #52] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r5, r3 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: ands r5, r6 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: strb r5, [r0, #1] +; THUMB1-NEXT: ldr r1, [sp, #48] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r3, r2 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: ands r3, r5 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: strb r3, [r0] +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v8i8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and lr, r1, #1 +; THUMB2-NEXT: ldrb.w r12, [sp, #68] +; THUMB2-NEXT: ldrb.w r1, [sp, #36] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r4, r1, r5 +; THUMB2-NEXT: bic.w r5, r12, r5 +; THUMB2-NEXT: orrs r4, r5 +; THUMB2-NEXT: strb r4, [r0, #7] +; THUMB2-NEXT: ldrb.w r12, [sp, #64] +; THUMB2-NEXT: ldrb.w r5, [sp, #32] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #6] +; THUMB2-NEXT: ldrb.w r12, [sp, #60] +; THUMB2-NEXT: ldrb.w r5, [sp, #28] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #5] +; THUMB2-NEXT: ldrb.w r12, [sp, #56] +; THUMB2-NEXT: ldrb.w r5, [sp, #24] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #4] +; THUMB2-NEXT: ldrb.w r12, [sp, #52] +; THUMB2-NEXT: ldrb.w r5, [sp, #20] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #3] +; THUMB2-NEXT: ldrb.w r12, [sp, #48] +; THUMB2-NEXT: ldrb.w r5, [sp, #16] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #2] +; THUMB2-NEXT: ldrb.w r1, [sp, #44] +; THUMB2-NEXT: rsb.w r4, lr, #0 +; THUMB2-NEXT: and.w r5, r3, r4 +; THUMB2-NEXT: bic.w r4, r1, r4 +; THUMB2-NEXT: orrs r5, r4 +; THUMB2-NEXT: strb r5, [r0, #1] +; THUMB2-NEXT: ldrb.w r1, [sp, #40] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r3, r2, r5 +; THUMB2-NEXT: bic.w r5, r1, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: strb r3, [r0] +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <8 x i8> @llvm.ct.select.v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %sel +} + +define <4 x i16> @ct_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) { +; CT-LABEL: ct_v4i16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v4i16: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldrh r1, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldrh r2, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldrh r3, [sp, #16] +; DEFAULT-NEXT: ldrh lr, [sp, #32] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r2, r3, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r2, r2, r4 +; DEFAULT-NEXT: ldrh lr, [sp, #36] +; DEFAULT-NEXT: ldrh r4, [sp, #20] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v4i16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4i16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldrh.w r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldrh.w r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldrh.w r3, [sp, #16] +; THUMB2-NEXT: ldrh.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldrh.w lr, [sp, #36] +; THUMB2-NEXT: ldrh.w r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x i16> @llvm.ct.select.v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %sel +} + +define <2 x i32> @ct_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) { +; CT-LABEL: ct_v2i32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2i32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #12] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_v2i32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #20] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_v2i32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #12] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +entry: + %sel = call <2 x i32> @llvm.ct.select.v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %sel +} + +define <1 x i64> @ct_v1i64(i1 %cond, <1 x i64> %a, <1 x i64> %b) { +; CT-LABEL: ct_v1i64: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v1i64: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #12] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_v1i64: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #20] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_v1i64: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #12] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +entry: + %sel = call <1 x i64> @llvm.ct.select.v1i64(i1 %cond, <1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %sel +} + +define <2 x float> @ct_v2f32(i1 %cond, <2 x float> %a, <2 x float> %b) { +; CT-LABEL: ct_v2f32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2f32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #12] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_v2f32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #20] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_v2f32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #12] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +entry: + %sel = call <2 x float> @llvm.ct.select.v2f32(i1 %cond, <2 x float> %a, <2 x float> %b) + ret <2 x float> %sel +} + +define <16 x i8> @ct_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { +; CT-LABEL: ct_v16i8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v16i8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and lr, r1, #1 +; DEFAULT-NEXT: ldrb r12, [sp, #132] +; DEFAULT-NEXT: ldrb r1, [sp, #68] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r4, r1, r5 +; DEFAULT-NEXT: bic r5, r12, r5 +; DEFAULT-NEXT: orr r4, r4, r5 +; DEFAULT-NEXT: strb r4, [r0, #15] +; DEFAULT-NEXT: ldrb r12, [sp, #128] +; DEFAULT-NEXT: ldrb r5, [sp, #64] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #14] +; DEFAULT-NEXT: ldrb r12, [sp, #124] +; DEFAULT-NEXT: ldrb r5, [sp, #60] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #13] +; DEFAULT-NEXT: ldrb r12, [sp, #120] +; DEFAULT-NEXT: ldrb r5, [sp, #56] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #12] +; DEFAULT-NEXT: ldrb r12, [sp, #116] +; DEFAULT-NEXT: ldrb r5, [sp, #52] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #11] +; DEFAULT-NEXT: ldrb r12, [sp, #112] +; DEFAULT-NEXT: ldrb r5, [sp, #48] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #10] +; DEFAULT-NEXT: ldrb r12, [sp, #108] +; DEFAULT-NEXT: ldrb r5, [sp, #44] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #9] +; DEFAULT-NEXT: ldrb r12, [sp, #104] +; DEFAULT-NEXT: ldrb r5, [sp, #40] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #8] +; DEFAULT-NEXT: ldrb r12, [sp, #100] +; DEFAULT-NEXT: ldrb r5, [sp, #36] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #7] +; DEFAULT-NEXT: ldrb r12, [sp, #96] +; DEFAULT-NEXT: ldrb r5, [sp, #32] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #6] +; DEFAULT-NEXT: ldrb r12, [sp, #92] +; DEFAULT-NEXT: ldrb r5, [sp, #28] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #5] +; DEFAULT-NEXT: ldrb r12, [sp, #88] +; DEFAULT-NEXT: ldrb r5, [sp, #24] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #4] +; DEFAULT-NEXT: ldrb r12, [sp, #84] +; DEFAULT-NEXT: ldrb r5, [sp, #20] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #3] +; DEFAULT-NEXT: ldrb r12, [sp, #80] +; DEFAULT-NEXT: ldrb r5, [sp, #16] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strb r4, [r0, #2] +; DEFAULT-NEXT: ldrb r1, [sp, #76] +; DEFAULT-NEXT: rsb r4, lr, #0 +; DEFAULT-NEXT: and r5, r3, r4 +; DEFAULT-NEXT: bic r4, r1, r4 +; DEFAULT-NEXT: orr r5, r5, r4 +; DEFAULT-NEXT: strb r5, [r0, #1] +; DEFAULT-NEXT: ldrb r1, [sp, #72] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r3, r2, r5 +; DEFAULT-NEXT: bic r5, r1, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: strb r3, [r0] +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v16i8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r1 +; THUMB1-NEXT: ldr r1, [sp, #140] +; THUMB1-NEXT: ldr r5, [sp, #76] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #15] +; THUMB1-NEXT: ldr r1, [sp, #136] +; THUMB1-NEXT: ldr r5, [sp, #72] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #14] +; THUMB1-NEXT: ldr r1, [sp, #132] +; THUMB1-NEXT: ldr r5, [sp, #68] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #13] +; THUMB1-NEXT: ldr r1, [sp, #128] +; THUMB1-NEXT: ldr r5, [sp, #64] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #12] +; THUMB1-NEXT: ldr r1, [sp, #124] +; THUMB1-NEXT: ldr r5, [sp, #60] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #11] +; THUMB1-NEXT: ldr r1, [sp, #120] +; THUMB1-NEXT: ldr r5, [sp, #56] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #10] +; THUMB1-NEXT: ldr r1, [sp, #116] +; THUMB1-NEXT: ldr r5, [sp, #52] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #9] +; THUMB1-NEXT: ldr r1, [sp, #112] +; THUMB1-NEXT: ldr r5, [sp, #48] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #8] +; THUMB1-NEXT: ldr r1, [sp, #108] +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #7] +; THUMB1-NEXT: ldr r1, [sp, #104] +; THUMB1-NEXT: ldr r5, [sp, #40] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #6] +; THUMB1-NEXT: ldr r1, [sp, #100] +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #5] +; THUMB1-NEXT: ldr r1, [sp, #96] +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #4] +; THUMB1-NEXT: ldr r1, [sp, #92] +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #3] +; THUMB1-NEXT: ldr r1, [sp, #88] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strb r6, [r0, #2] +; THUMB1-NEXT: ldr r1, [sp, #84] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r5, r3 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: ands r5, r6 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: strb r5, [r0, #1] +; THUMB1-NEXT: ldr r1, [sp, #80] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r3, r2 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: ands r3, r5 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: strb r3, [r0] +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v16i8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and lr, r1, #1 +; THUMB2-NEXT: ldrb.w r12, [sp, #132] +; THUMB2-NEXT: ldrb.w r1, [sp, #68] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r4, r1, r5 +; THUMB2-NEXT: bic.w r5, r12, r5 +; THUMB2-NEXT: orrs r4, r5 +; THUMB2-NEXT: strb r4, [r0, #15] +; THUMB2-NEXT: ldrb.w r12, [sp, #128] +; THUMB2-NEXT: ldrb.w r5, [sp, #64] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #14] +; THUMB2-NEXT: ldrb.w r12, [sp, #124] +; THUMB2-NEXT: ldrb.w r5, [sp, #60] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #13] +; THUMB2-NEXT: ldrb.w r12, [sp, #120] +; THUMB2-NEXT: ldrb.w r5, [sp, #56] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #12] +; THUMB2-NEXT: ldrb.w r12, [sp, #116] +; THUMB2-NEXT: ldrb.w r5, [sp, #52] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #11] +; THUMB2-NEXT: ldrb.w r12, [sp, #112] +; THUMB2-NEXT: ldrb.w r5, [sp, #48] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #10] +; THUMB2-NEXT: ldrb.w r12, [sp, #108] +; THUMB2-NEXT: ldrb.w r5, [sp, #44] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #9] +; THUMB2-NEXT: ldrb.w r12, [sp, #104] +; THUMB2-NEXT: ldrb.w r5, [sp, #40] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #8] +; THUMB2-NEXT: ldrb.w r12, [sp, #100] +; THUMB2-NEXT: ldrb.w r5, [sp, #36] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #7] +; THUMB2-NEXT: ldrb.w r12, [sp, #96] +; THUMB2-NEXT: ldrb.w r5, [sp, #32] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #6] +; THUMB2-NEXT: ldrb.w r12, [sp, #92] +; THUMB2-NEXT: ldrb.w r5, [sp, #28] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #5] +; THUMB2-NEXT: ldrb.w r12, [sp, #88] +; THUMB2-NEXT: ldrb.w r5, [sp, #24] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #4] +; THUMB2-NEXT: ldrb.w r12, [sp, #84] +; THUMB2-NEXT: ldrb.w r5, [sp, #20] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #3] +; THUMB2-NEXT: ldrb.w r12, [sp, #80] +; THUMB2-NEXT: ldrb.w r5, [sp, #16] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strb r4, [r0, #2] +; THUMB2-NEXT: ldrb.w r1, [sp, #76] +; THUMB2-NEXT: rsb.w r4, lr, #0 +; THUMB2-NEXT: and.w r5, r3, r4 +; THUMB2-NEXT: bic.w r4, r1, r4 +; THUMB2-NEXT: orrs r5, r4 +; THUMB2-NEXT: strb r5, [r0, #1] +; THUMB2-NEXT: ldrb.w r1, [sp, #72] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r3, r2, r5 +; THUMB2-NEXT: bic.w r5, r1, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: strb r3, [r0] +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %sel +} + +define <8 x i16> @ct_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { +; CT-LABEL: ct_v8i16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v8i16: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and lr, r1, #1 +; DEFAULT-NEXT: ldrh r12, [sp, #68] +; DEFAULT-NEXT: ldrh r1, [sp, #36] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r4, r1, r5 +; DEFAULT-NEXT: bic r5, r12, r5 +; DEFAULT-NEXT: orr r4, r4, r5 +; DEFAULT-NEXT: strh r4, [r0, #14] +; DEFAULT-NEXT: ldrh r12, [sp, #64] +; DEFAULT-NEXT: ldrh r5, [sp, #32] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strh r4, [r0, #12] +; DEFAULT-NEXT: ldrh r12, [sp, #60] +; DEFAULT-NEXT: ldrh r5, [sp, #28] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strh r4, [r0, #10] +; DEFAULT-NEXT: ldrh r12, [sp, #56] +; DEFAULT-NEXT: ldrh r5, [sp, #24] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strh r4, [r0, #8] +; DEFAULT-NEXT: ldrh r12, [sp, #52] +; DEFAULT-NEXT: ldrh r5, [sp, #20] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strh r4, [r0, #6] +; DEFAULT-NEXT: ldrh r12, [sp, #48] +; DEFAULT-NEXT: ldrh r5, [sp, #16] +; DEFAULT-NEXT: rsb r1, lr, #0 +; DEFAULT-NEXT: and r4, r5, r1 +; DEFAULT-NEXT: bic r1, r12, r1 +; DEFAULT-NEXT: orr r4, r4, r1 +; DEFAULT-NEXT: strh r4, [r0, #4] +; DEFAULT-NEXT: ldrh r1, [sp, #44] +; DEFAULT-NEXT: rsb r4, lr, #0 +; DEFAULT-NEXT: and r5, r3, r4 +; DEFAULT-NEXT: bic r4, r1, r4 +; DEFAULT-NEXT: orr r5, r5, r4 +; DEFAULT-NEXT: strh r5, [r0, #2] +; DEFAULT-NEXT: ldrh r1, [sp, #40] +; DEFAULT-NEXT: rsb r5, lr, #0 +; DEFAULT-NEXT: and r3, r2, r5 +; DEFAULT-NEXT: bic r5, r1, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: strh r3, [r0] +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v8i16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r1 +; THUMB1-NEXT: ldr r1, [sp, #76] +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #14] +; THUMB1-NEXT: ldr r1, [sp, #72] +; THUMB1-NEXT: ldr r5, [sp, #40] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #12] +; THUMB1-NEXT: ldr r1, [sp, #68] +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #10] +; THUMB1-NEXT: ldr r1, [sp, #64] +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #8] +; THUMB1-NEXT: ldr r1, [sp, #60] +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #6] +; THUMB1-NEXT: ldr r1, [sp, #56] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: ands r6, r7 +; THUMB1-NEXT: eors r6, r1 +; THUMB1-NEXT: strh r6, [r0, #4] +; THUMB1-NEXT: ldr r1, [sp, #52] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r5, r3 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: ands r5, r6 +; THUMB1-NEXT: eors r5, r1 +; THUMB1-NEXT: strh r5, [r0, #2] +; THUMB1-NEXT: ldr r1, [sp, #48] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r3, r2 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: ands r3, r5 +; THUMB1-NEXT: eors r3, r1 +; THUMB1-NEXT: strh r3, [r0] +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v8i16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and lr, r1, #1 +; THUMB2-NEXT: ldrh.w r12, [sp, #68] +; THUMB2-NEXT: ldrh.w r1, [sp, #36] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r4, r1, r5 +; THUMB2-NEXT: bic.w r5, r12, r5 +; THUMB2-NEXT: orrs r4, r5 +; THUMB2-NEXT: strh r4, [r0, #14] +; THUMB2-NEXT: ldrh.w r12, [sp, #64] +; THUMB2-NEXT: ldrh.w r5, [sp, #32] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #12] +; THUMB2-NEXT: ldrh.w r12, [sp, #60] +; THUMB2-NEXT: ldrh.w r5, [sp, #28] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #10] +; THUMB2-NEXT: ldrh.w r12, [sp, #56] +; THUMB2-NEXT: ldrh.w r5, [sp, #24] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #8] +; THUMB2-NEXT: ldrh.w r12, [sp, #52] +; THUMB2-NEXT: ldrh.w r5, [sp, #20] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #6] +; THUMB2-NEXT: ldrh.w r12, [sp, #48] +; THUMB2-NEXT: ldrh.w r5, [sp, #16] +; THUMB2-NEXT: rsb.w r1, lr, #0 +; THUMB2-NEXT: and.w r4, r5, r1 +; THUMB2-NEXT: bic.w r1, r12, r1 +; THUMB2-NEXT: orrs r4, r1 +; THUMB2-NEXT: strh r4, [r0, #4] +; THUMB2-NEXT: ldrh.w r1, [sp, #44] +; THUMB2-NEXT: rsb.w r4, lr, #0 +; THUMB2-NEXT: and.w r5, r3, r4 +; THUMB2-NEXT: bic.w r4, r1, r4 +; THUMB2-NEXT: orrs r5, r4 +; THUMB2-NEXT: strh r5, [r0, #2] +; THUMB2-NEXT: ldrh.w r1, [sp, #40] +; THUMB2-NEXT: rsb.w r5, lr, #0 +; THUMB2-NEXT: and.w r3, r2, r5 +; THUMB2-NEXT: bic.w r5, r1, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: strh r3, [r0] +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %sel +} + +define <4 x i32> @ct_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { +; CT-LABEL: ct_v4i32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v4i32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldr r3, [sp, #16] +; DEFAULT-NEXT: ldr lr, [sp, #32] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r2, r3, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r2, r2, r4 +; DEFAULT-NEXT: ldr lr, [sp, #36] +; DEFAULT-NEXT: ldr r4, [sp, #20] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v4i32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4i32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldr r3, [sp, #16] +; THUMB2-NEXT: ldr.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldr.w lr, [sp, #36] +; THUMB2-NEXT: ldr r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %sel +} + +define <2 x i64> @ct_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { +; CT-LABEL: ct_v2i64: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2i64: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldr r3, [sp, #16] +; DEFAULT-NEXT: ldr lr, [sp, #32] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r2, r3, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r2, r2, r4 +; DEFAULT-NEXT: ldr lr, [sp, #36] +; DEFAULT-NEXT: ldr r4, [sp, #20] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v2i64: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v2i64: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldr r3, [sp, #16] +; THUMB2-NEXT: ldr.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldr.w lr, [sp, #36] +; THUMB2-NEXT: ldr r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %sel +} + +define <4 x float> @ct_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) { +; CT-LABEL: ct_v4f32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v4f32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldr r3, [sp, #16] +; DEFAULT-NEXT: ldr lr, [sp, #32] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r2, r3, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r2, r2, r4 +; DEFAULT-NEXT: ldr lr, [sp, #36] +; DEFAULT-NEXT: ldr r4, [sp, #20] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v4f32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4f32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldr r3, [sp, #16] +; THUMB2-NEXT: ldr.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldr.w lr, [sp, #36] +; THUMB2-NEXT: ldr r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) + ret <4 x float> %sel +} + +define <2 x double> @ct_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) { +; CT-LABEL: ct_v2f64: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d17, [sp] +; CT-NEXT: add r1, sp, #8 +; CT-NEXT: vmov d16, r2, r3 +; CT-NEXT: vld1.64 {d18, d19}, [r1] +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 q11, r1 +; CT-NEXT: vand q10, q8, q11 +; CT-NEXT: vbic q11, q9, q11 +; CT-NEXT: vorr q10, q10, q11 +; CT-NEXT: vmov r0, r1, d20 +; CT-NEXT: vmov r2, r3, d21 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2f64: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldr r3, [sp, #16] +; DEFAULT-NEXT: ldr lr, [sp, #32] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r2, r3, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r2, r2, r4 +; DEFAULT-NEXT: ldr lr, [sp, #36] +; DEFAULT-NEXT: ldr r4, [sp, #20] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v2f64: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #4 +; THUMB1-NEXT: sub sp, #4 +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #32] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #36] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ldr r3, [sp, #40] +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r5 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r3 +; THUMB1-NEXT: ldr r5, [sp, #44] +; THUMB1-NEXT: ldr r6, [sp, #28] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: add sp, #4 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v2f64: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldr r3, [sp, #16] +; THUMB2-NEXT: ldr.w lr, [sp, #32] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r2, r3, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r2, r4 +; THUMB2-NEXT: ldr.w lr, [sp, #36] +; THUMB2-NEXT: ldr r4, [sp, #20] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) + ret <2 x double> %sel +} + +; +; itty bitty vector type edge cases follow. these should be scalarised. +; +define <1 x i8> @ct_v1i8(i1 %cond, <1 x i8> %a, <1 x i8> %b) { +; CT-LABEL: ct_v1i8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v1i8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_v1i8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_v1i8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call <1 x i8> @llvm.ct.select.i8(i1 %cond, <1 x i8> %a, <1 x i8> %b) + ret <1 x i8> %sel +} + +define <2 x i8> @ct_v2i8(i1 %cond, <2 x i8> %a, <2 x i8> %b) { +; CT-LABEL: ct_v2i8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2i8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r1, lr +; DEFAULT-NEXT: bic lr, r3, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldrb r3, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r2, lr +; DEFAULT-NEXT: bic lr, r3, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_v2i8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: ldr r3, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r2 +; THUMB1-NEXT: eors r1, r3 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r3 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_v2i8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r1, lr +; THUMB2-NEXT: bic.w lr, r3, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldrb.w r3, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r2, lr +; THUMB2-NEXT: bic.w lr, r3, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +entry: + %sel = call <2 x i8> @llvm.ct.select.i16(i1 %cond, <2 x i8> %a, <2 x i8> %b) + ret <2 x i8> %sel +} + +define <4 x i8> @ct_v4i8(i1 %cond, <4 x i8> %a, <4 x i8> %b) { +; CT-LABEL: ct_v4i8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v4i8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r4, r5, r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldrb lr, [sp, #20] +; DEFAULT-NEXT: rsb r4, r12, #0 +; DEFAULT-NEXT: and r0, r1, r4 +; DEFAULT-NEXT: bic r4, lr, r4 +; DEFAULT-NEXT: orr r0, r0, r4 +; DEFAULT-NEXT: ldrb r4, [sp, #24] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r2, lr +; DEFAULT-NEXT: bic lr, r4, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: ldrb r4, [sp, #28] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r2, r3, lr +; DEFAULT-NEXT: bic lr, r4, lr +; DEFAULT-NEXT: orr r2, r2, lr +; DEFAULT-NEXT: ldrb lr, [sp, #32] +; DEFAULT-NEXT: ldrb r4, [sp, #16] +; DEFAULT-NEXT: rsb r5, r12, #0 +; DEFAULT-NEXT: and r3, r4, r5 +; DEFAULT-NEXT: bic r5, lr, r5 +; DEFAULT-NEXT: orr r3, r3, r5 +; DEFAULT-NEXT: pop {r4, r5, r11, pc} +; +; THUMB1-LABEL: ct_v4i8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r5, [sp, #24] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r5 +; THUMB1-NEXT: ands r0, r6 +; THUMB1-NEXT: eors r0, r5 +; THUMB1-NEXT: ldr r5, [sp, #28] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r1, r2 +; THUMB1-NEXT: eors r1, r5 +; THUMB1-NEXT: ands r1, r6 +; THUMB1-NEXT: eors r1, r5 +; THUMB1-NEXT: ldr r5, [sp, #32] +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: lsls r6, r6, #31 +; THUMB1-NEXT: asrs r6, r6, #31 +; THUMB1-NEXT: mov r2, r3 +; THUMB1-NEXT: eors r2, r5 +; THUMB1-NEXT: ands r2, r6 +; THUMB1-NEXT: eors r2, r5 +; THUMB1-NEXT: ldr r5, [sp, #36] +; THUMB1-NEXT: ldr r6, [sp, #20] +; THUMB1-NEXT: mov r7, r4 +; THUMB1-NEXT: lsls r7, r7, #31 +; THUMB1-NEXT: asrs r7, r7, #31 +; THUMB1-NEXT: mov r3, r6 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: ands r3, r7 +; THUMB1-NEXT: eors r3, r5 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ct_v4i8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r4, r5, r7, lr} +; THUMB2-NEXT: push {r4, r5, r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldrb.w lr, [sp, #20] +; THUMB2-NEXT: rsb.w r4, r12, #0 +; THUMB2-NEXT: and.w r0, r1, r4 +; THUMB2-NEXT: bic.w r4, lr, r4 +; THUMB2-NEXT: orrs r0, r4 +; THUMB2-NEXT: ldrb.w r4, [sp, #24] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r2, lr +; THUMB2-NEXT: bic.w lr, r4, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: ldrb.w r4, [sp, #28] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r2, r3, lr +; THUMB2-NEXT: bic.w lr, r4, lr +; THUMB2-NEXT: orr.w r2, r2, lr +; THUMB2-NEXT: ldrb.w lr, [sp, #32] +; THUMB2-NEXT: ldrb.w r4, [sp, #16] +; THUMB2-NEXT: rsb.w r5, r12, #0 +; THUMB2-NEXT: and.w r3, r4, r5 +; THUMB2-NEXT: bic.w r5, lr, r5 +; THUMB2-NEXT: orrs r3, r5 +; THUMB2-NEXT: pop {r4, r5, r7, pc} +entry: + %sel = call <4 x i8> @llvm.ct.select.i32(i1 %cond, <4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %sel +} + +define <1 x i16> @ct_v1i16(i1 %cond, <1 x i16> %a, <1 x i16> %b) { +; CT-LABEL: ct_v1i16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v1i16: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_v1i16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_v1i16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call <1 x i16> @llvm.ct.select.i16(i1 %cond, <1 x i16> %a, <1 x i16> %b) + ret <1 x i16> %sel +} + +define <2 x i16> @ct_v2i16(i1 %cond, <2 x i16> %a, <2 x i16> %b) { +; CT-LABEL: ct_v2i16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v2i16: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r1, lr +; DEFAULT-NEXT: bic lr, r3, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldrh r3, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r2, lr +; DEFAULT-NEXT: bic lr, r3, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_v2i16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: ldr r3, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r2 +; THUMB1-NEXT: eors r1, r3 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r3 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_v2i16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r1, lr +; THUMB2-NEXT: bic.w lr, r3, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldrh.w r3, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r2, lr +; THUMB2-NEXT: bic.w lr, r3, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +entry: + %sel = call <2 x i16> @llvm.ct.select.i32(i1 %cond, <2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %sel +} + +define <1 x i32> @ct_v1i32(i1 %cond, <1 x i32> %a, <1 x i32> %b) { +; CT-LABEL: ct_v1i32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v1i32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_v1i32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_v1i32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call <1 x i32> @llvm.ct.select.i32(i1 %cond, <1 x i32> %a, <1 x i32> %b) + ret <1 x i32> %sel +} + +define <1 x float> @ct_v1f32(i1 %cond, <1 x float> %a, <1 x float> %b) { +; CT-LABEL: ct_v1f32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: vmov s0, r2 +; CT-NEXT: vmov s2, r1 +; CT-NEXT: vmov r2, s2 +; CT-NEXT: vmov r3, s0 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: and r2, r2, r1 +; CT-NEXT: bic r1, r3, r1 +; CT-NEXT: orr r2, r2, r1 +; CT-NEXT: vmov s4, r2 +; CT-NEXT: vmov r0, s4 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_v1f32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_v1f32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_v1f32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +entry: + %sel = call <1 x float> @llvm.ct.select.f32(i1 %cond, <1 x float> %a, <1 x float> %b) + ret <1 x float> %sel +} diff --git a/llvm/test/CodeGen/ARM/ctselect.ll b/llvm/test/CodeGen/ARM/ctselect.ll new file mode 100644 index 0000000000000..7e64c90a2a9b1 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ctselect.ll @@ -0,0 +1,555 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s +; RUN: llc < %s -mtriple=armv6 -verify-machineinstrs | FileCheck --check-prefix=DEFAULT %s +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s +; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -mcpu=cortex-a9 -verify-machineinstrs | FileCheck --check-prefix=CORTEXA9 %s +; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 -verify-machineinstrs | FileCheck --check-prefix=CORTEX-NOTHUMB %s + +define i1 @ct_i1(i1 %cond, i1 %a, i1 %b) { +; CT-LABEL: ct_i1: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_i1: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_i1: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_i1: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +; +; CORTEXA9-LABEL: ct_i1: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r3, r0, #1 +; CORTEXA9-NEXT: rsb.w r12, r3, #0 +; CORTEXA9-NEXT: and.w r0, r1, r12 +; CORTEXA9-NEXT: bic.w r12, r2, r12 +; CORTEXA9-NEXT: orr.w r0, r0, r12 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_i1: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r3, r0, #1 +; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0 +; CORTEX-NOTHUMB-NEXT: and r0, r1, r12 +; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12 +; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b) + ret i1 %sel +} + +define i8 @ct_int8(i1 %cond, i8 %a, i8 %b) { +; CT-LABEL: ct_int8: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_int8: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_int8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_int8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +; +; CORTEXA9-LABEL: ct_int8: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r3, r0, #1 +; CORTEXA9-NEXT: rsb.w r12, r3, #0 +; CORTEXA9-NEXT: and.w r0, r1, r12 +; CORTEXA9-NEXT: bic.w r12, r2, r12 +; CORTEXA9-NEXT: orr.w r0, r0, r12 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_int8: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r3, r0, #1 +; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0 +; CORTEX-NOTHUMB-NEXT: and r0, r1, r12 +; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12 +; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) + ret i8 %sel +} + +define i16 @ct_int16(i1 %cond, i16 %a, i16 %b) { +; CT-LABEL: ct_int16: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_int16: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_int16: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_int16: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +; +; CORTEXA9-LABEL: ct_int16: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r3, r0, #1 +; CORTEXA9-NEXT: rsb.w r12, r3, #0 +; CORTEXA9-NEXT: and.w r0, r1, r12 +; CORTEXA9-NEXT: bic.w r12, r2, r12 +; CORTEXA9-NEXT: orr.w r0, r0, r12 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_int16: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r3, r0, #1 +; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0 +; CORTEX-NOTHUMB-NEXT: and r0, r1, r12 +; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12 +; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) + ret i16 %sel +} + +define i32 @ct_int32(i1 %cond, i32 %a, i32 %b) { +; CT-LABEL: ct_int32: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r3, r0, #1 +; CT-NEXT: rsb r12, r3, #0 +; CT-NEXT: and r0, r1, r12 +; CT-NEXT: bic r12, r2, r12 +; CT-NEXT: orr r0, r0, r12 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_int32: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_int32: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_int32: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +; +; CORTEXA9-LABEL: ct_int32: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r3, r0, #1 +; CORTEXA9-NEXT: rsb.w r12, r3, #0 +; CORTEXA9-NEXT: and.w r0, r1, r12 +; CORTEXA9-NEXT: bic.w r12, r2, r12 +; CORTEXA9-NEXT: orr.w r0, r0, r12 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_int32: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r3, r0, #1 +; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0 +; CORTEX-NOTHUMB-NEXT: and r0, r1, r12 +; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12 +; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %sel +} + +define i64 @ct_int64(i1 %cond, i64 %a, i64 %b) { +; CT-LABEL: ct_int64: +; CT: @ %bb.0: @ %entry +; CT-NEXT: .save {r4, lr} +; CT-NEXT: push {r4, lr} +; CT-NEXT: ldr r1, [sp, #8] +; CT-NEXT: and lr, r0, #1 +; CT-NEXT: ldr r12, [sp, #12] +; CT-NEXT: rsb r4, lr, #0 +; CT-NEXT: and r0, r2, r4 +; CT-NEXT: bic r4, r1, r4 +; CT-NEXT: orr r0, r0, r4 +; CT-NEXT: rsb r2, lr, #0 +; CT-NEXT: and r1, r3, r2 +; CT-NEXT: bic r2, r12, r2 +; CT-NEXT: orr r1, r1, r2 +; CT-NEXT: pop {r4, pc} +; +; DEFAULT-LABEL: ct_int64: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #12] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_int64: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #20] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_int64: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #12] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +; +; CORTEXA9-LABEL: ct_int64: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: .save {r4, lr} +; CORTEXA9-NEXT: push {r4, lr} +; CORTEXA9-NEXT: ldrd r1, r12, [sp, #8] +; CORTEXA9-NEXT: and lr, r0, #1 +; CORTEXA9-NEXT: rsb.w r4, lr, #0 +; CORTEXA9-NEXT: and.w r0, r2, r4 +; CORTEXA9-NEXT: bic.w r4, r1, r4 +; CORTEXA9-NEXT: orrs r0, r4 +; CORTEXA9-NEXT: rsb.w r2, lr, #0 +; CORTEXA9-NEXT: and.w r1, r3, r2 +; CORTEXA9-NEXT: bic.w r2, r12, r2 +; CORTEXA9-NEXT: orr.w r1, r1, r2 +; CORTEXA9-NEXT: pop {r4, pc} +; +; CORTEX-NOTHUMB-LABEL: ct_int64: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: .save {r4, lr} +; CORTEX-NOTHUMB-NEXT: push {r4, lr} +; CORTEX-NOTHUMB-NEXT: ldr r12, [sp, #12] +; CORTEX-NOTHUMB-NEXT: and lr, r0, #1 +; CORTEX-NOTHUMB-NEXT: ldr r1, [sp, #8] +; CORTEX-NOTHUMB-NEXT: rsb r4, lr, #0 +; CORTEX-NOTHUMB-NEXT: and r0, r2, r4 +; CORTEX-NOTHUMB-NEXT: bic r4, r1, r4 +; CORTEX-NOTHUMB-NEXT: orr r0, r0, r4 +; CORTEX-NOTHUMB-NEXT: rsb r2, lr, #0 +; CORTEX-NOTHUMB-NEXT: and r1, r3, r2 +; CORTEX-NOTHUMB-NEXT: bic r2, r12, r2 +; CORTEX-NOTHUMB-NEXT: orr r1, r1, r2 +; CORTEX-NOTHUMB-NEXT: pop {r4, pc} +entry: + %sel = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) + ret i64 %sel +} + +define float @ct_float(i1 %cond, float %a, float %b) { +; CT-LABEL: ct_float: +; CT: @ %bb.0: @ %entry +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: vmov s0, r2 +; CT-NEXT: vmov s2, r1 +; CT-NEXT: vmov r2, s2 +; CT-NEXT: vmov r3, s0 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: and r2, r2, r1 +; CT-NEXT: bic r1, r3, r1 +; CT-NEXT: orr r2, r2, r1 +; CT-NEXT: vmov s4, r2 +; CT-NEXT: vmov r0, s4 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_float: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: and r3, r0, #1 +; DEFAULT-NEXT: rsb r12, r3, #0 +; DEFAULT-NEXT: and r0, r1, r12 +; DEFAULT-NEXT: bic r12, r2, r12 +; DEFAULT-NEXT: orr r0, r0, r12 +; DEFAULT-NEXT: bx lr +; +; THUMB1-LABEL: ct_float: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, lr} +; THUMB1-NEXT: push {r4, lr} +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: ands r3, r0 +; THUMB1-NEXT: mov r4, r3 +; THUMB1-NEXT: lsls r4, r4, #31 +; THUMB1-NEXT: asrs r4, r4, #31 +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: ands r0, r4 +; THUMB1-NEXT: eors r0, r2 +; THUMB1-NEXT: pop {r4, pc} +; +; THUMB2-LABEL: ct_float: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: and r3, r0, #1 +; THUMB2-NEXT: rsb.w r12, r3, #0 +; THUMB2-NEXT: and.w r0, r1, r12 +; THUMB2-NEXT: bic.w r12, r2, r12 +; THUMB2-NEXT: orr.w r0, r0, r12 +; THUMB2-NEXT: bx lr +; +; CORTEXA9-LABEL: ct_float: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r0, r0, #1 +; CORTEXA9-NEXT: vmov r2, s0 +; CORTEXA9-NEXT: vmov r3, s1 +; CORTEXA9-NEXT: rsbs r1, r0, #0 +; CORTEXA9-NEXT: ands r2, r1 +; CORTEXA9-NEXT: bic.w r1, r3, r1 +; CORTEXA9-NEXT: orrs r2, r1 +; CORTEXA9-NEXT: vmov s2, r2 +; CORTEXA9-NEXT: vmov.f32 s0, s2 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_float: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r0, r0, #1 +; CORTEX-NOTHUMB-NEXT: vmov r2, s0 +; CORTEX-NOTHUMB-NEXT: vmov r3, s1 +; CORTEX-NOTHUMB-NEXT: rsb r1, r0, #0 +; CORTEX-NOTHUMB-NEXT: and r2, r2, r1 +; CORTEX-NOTHUMB-NEXT: bic r1, r3, r1 +; CORTEX-NOTHUMB-NEXT: orr r2, r2, r1 +; CORTEX-NOTHUMB-NEXT: vmov s2, r2 +; CORTEX-NOTHUMB-NEXT: vmov.f32 s0, s2 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b) + ret float %sel +} + +define double @ct_f64(i1 %cond, double %a, double %b) { +; CT-LABEL: ct_f64: +; CT: @ %bb.0: @ %entry +; CT-NEXT: vldr d16, [sp] +; CT-NEXT: vmov d17, r2, r3 +; CT-NEXT: and r0, r0, #1 +; CT-NEXT: rsb r1, r0, #0 +; CT-NEXT: vdup.32 d19, r1 +; CT-NEXT: vand d18, d17, d19 +; CT-NEXT: vbic d19, d16, d19 +; CT-NEXT: vorr d18, d18, d19 +; CT-NEXT: vmov r0, r1, d18 +; CT-NEXT: bx lr +; +; DEFAULT-LABEL: ct_f64: +; DEFAULT: @ %bb.0: @ %entry +; DEFAULT-NEXT: push {r11, lr} +; DEFAULT-NEXT: and r12, r0, #1 +; DEFAULT-NEXT: ldr r1, [sp, #8] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r0, r2, lr +; DEFAULT-NEXT: bic lr, r1, lr +; DEFAULT-NEXT: orr r0, r0, lr +; DEFAULT-NEXT: ldr r2, [sp, #12] +; DEFAULT-NEXT: rsb lr, r12, #0 +; DEFAULT-NEXT: and r1, r3, lr +; DEFAULT-NEXT: bic lr, r2, lr +; DEFAULT-NEXT: orr r1, r1, lr +; DEFAULT-NEXT: pop {r11, pc} +; +; THUMB1-LABEL: ct_f64: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: .save {r4, r5, r7, lr} +; THUMB1-NEXT: push {r4, r5, r7, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: ands r4, r0 +; THUMB1-NEXT: ldr r1, [sp, #16] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ands r0, r5 +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: ldr r2, [sp, #20] +; THUMB1-NEXT: mov r5, r4 +; THUMB1-NEXT: lsls r5, r5, #31 +; THUMB1-NEXT: asrs r5, r5, #31 +; THUMB1-NEXT: mov r1, r3 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: ands r1, r5 +; THUMB1-NEXT: eors r1, r2 +; THUMB1-NEXT: pop {r4, r5, r7, pc} +; +; THUMB2-LABEL: ct_f64: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: .save {r7, lr} +; THUMB2-NEXT: push {r7, lr} +; THUMB2-NEXT: and r12, r0, #1 +; THUMB2-NEXT: ldr r1, [sp, #8] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r0, r2, lr +; THUMB2-NEXT: bic.w lr, r1, lr +; THUMB2-NEXT: orr.w r0, r0, lr +; THUMB2-NEXT: ldr r2, [sp, #12] +; THUMB2-NEXT: rsb.w lr, r12, #0 +; THUMB2-NEXT: and.w r1, r3, lr +; THUMB2-NEXT: bic.w lr, r2, lr +; THUMB2-NEXT: orr.w r1, r1, lr +; THUMB2-NEXT: pop {r7, pc} +; +; CORTEXA9-LABEL: ct_f64: +; CORTEXA9: @ %bb.0: @ %entry +; CORTEXA9-NEXT: and r0, r0, #1 +; CORTEXA9-NEXT: rsbs r1, r0, #0 +; CORTEXA9-NEXT: vdup.32 d17, r1 +; CORTEXA9-NEXT: vand d16, d0, d17 +; CORTEXA9-NEXT: vbic d17, d1, d17 +; CORTEXA9-NEXT: vorr d16, d16, d17 +; CORTEXA9-NEXT: vmov.f64 d0, d16 +; CORTEXA9-NEXT: bx lr +; +; CORTEX-NOTHUMB-LABEL: ct_f64: +; CORTEX-NOTHUMB: @ %bb.0: @ %entry +; CORTEX-NOTHUMB-NEXT: and r0, r0, #1 +; CORTEX-NOTHUMB-NEXT: rsb r1, r0, #0 +; CORTEX-NOTHUMB-NEXT: vdup.32 d17, r1 +; CORTEX-NOTHUMB-NEXT: vand d16, d0, d17 +; CORTEX-NOTHUMB-NEXT: vbic d17, d1, d17 +; CORTEX-NOTHUMB-NEXT: vorr d16, d16, d17 +; CORTEX-NOTHUMB-NEXT: vmov.f64 d0, d16 +; CORTEX-NOTHUMB-NEXT: bx lr +entry: + %sel = call double @llvm.ct.select.f64(i1 %cond, double %a, double %b) + ret double %sel +}