From 80a83ad4739916cbdd1fe43cebe8339c5d7b1f61 Mon Sep 17 00:00:00 2001 From: wizardengineer Date: Wed, 5 Nov 2025 10:51:08 -0500 Subject: [PATCH] [ConstantTime][LLVM] Add llvm.ct.select intrinsic with generic SelectionDAG lowering --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 + llvm/include/llvm/CodeGen/SelectionDAG.h | 7 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 +- llvm/include/llvm/CodeGen/TargetLowering.h | 18 +- llvm/include/llvm/IR/Intrinsics.td | 9 + .../include/llvm/Target/TargetSelectionDAG.td | 6 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 112 ++- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 46 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 17 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 20 + llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +- .../SelectionDAG/LegalizeTypesGeneric.cpp | 14 + .../SelectionDAG/LegalizeVectorTypes.cpp | 13 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + .../SelectionDAG/SelectionDAGBuilder.cpp | 131 +++ .../SelectionDAG/SelectionDAGBuilder.h | 3 + .../SelectionDAG/SelectionDAGDumper.cpp | 1 + llvm/test/CodeGen/RISCV/ctselect-fallback.ll | 330 ++++++++ llvm/test/CodeGen/X86/ctselect.ll | 779 ++++++++++++++++++ 19 files changed, 1507 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/ctselect-fallback.ll create mode 100644 llvm/test/CodeGen/X86/ctselect.ll diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index ff3dd0d4c3c51..656f6e718f029 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -783,6 +783,10 @@ enum NodeType { /// i1 then the high bits must conform to getBooleanContents. SELECT, + /// Constant-time Select, implemented with CMOV instruction. This is used to + /// implement constant-time select. + CTSELECT, + /// Select with a vector condition (op #0) and two vector operands (ops #1 /// and #2), returning a vector result. All vectors have the same length. /// Much like the scalar select and setcc, each bit in the condition selects diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 1a5ffb38f2568..b5debd490d9cb 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1352,6 +1352,13 @@ class SelectionDAG { return getNode(Opcode, DL, VT, Cond, LHS, RHS, Flags); } + SDValue getCTSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, + SDValue RHS, SDNodeFlags Flags = SDNodeFlags()) { + assert(LHS.getValueType() == VT && RHS.getValueType() == VT && + "Cannot use select on differing types"); + return getNode(ISD::CTSELECT, DL, VT, Cond, LHS, RHS, Flags); + } + /// Helper function to make it easier to build SelectCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 1759463ea7965..8e18eb2f7db0e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -435,6 +435,9 @@ struct SDNodeFlags { NonNeg | NoNaNs | NoInfs | SameSign | InBounds, FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal | AllowContract | ApproximateFuncs | AllowReassociation, + + // Flag for disabling optimization + NoMerge = 1 << 15, }; /// Default constructor turns off all optimization flags. @@ -486,7 +489,6 @@ struct SDNodeFlags { bool hasNoFPExcept() const { return Flags & NoFPExcept; } bool hasUnpredictable() const { return Flags & Unpredictable; } bool hasInBounds() const { return Flags & InBounds; } - bool operator==(const SDNodeFlags &Other) const { return Flags == Other.Flags; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 78f63b4406eb0..8198485803d8b 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -242,11 +242,15 @@ class LLVM_ABI TargetLoweringBase { /// Enum that describes what type of support for selects the target has. enum SelectSupportKind { - ScalarValSelect, // The target supports scalar selects (ex: cmov). - ScalarCondVectorVal, // The target supports selects with a scalar condition - // and vector values (ex: cmov). - VectorMaskSelect // The target supports vector selects with a vector - // mask (ex: x86 blends). + ScalarValSelect, // The target supports scalar selects (ex: cmov). + ScalarCondVectorVal, // The target supports selects with a scalar condition + // and vector values (ex: cmov). + VectorMaskSelect, // The target supports vector selects with a vector + // mask (ex: x86 blends). + CtSelect, // The target implements a custom constant-time select. + ScalarCondVectorValCtSelect, // The target supports selects with a scalar + // condition and vector values. + VectorMaskValCtSelect, // The target supports vector selects with a vector }; /// Enum that specifies what an atomic load/AtomicRMWInst is expanded @@ -476,8 +480,8 @@ class LLVM_ABI TargetLoweringBase { MachineMemOperand::Flags getVPIntrinsicMemOperandFlags(const VPIntrinsic &VPIntrin) const; - virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { - return true; + virtual bool isSelectSupported(SelectSupportKind kind) const { + return kind != CtSelect; } /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 6a079f62dd9cf..d41c61777089d 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1825,6 +1825,15 @@ def int_coro_subfn_addr : DefaultAttrsIntrinsic< [IntrReadMem, IntrArgMemOnly, ReadOnly>, NoCapture>]>; +///===-------------------------- Constant Time Intrinsics +///--------------------------===// +// +// Intrinsic to support constant time select +def int_ct_select + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_i1_ty, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrWriteMem, IntrWillReturn, NoUndef]>; + ///===-------------------------- Other Intrinsics --------------------------===// // // TODO: We should introduce a new memory kind fo traps (and other side effects diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 07a858fd682fc..de4abd713d3cf 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -214,6 +214,11 @@ def SDTSelect : SDTypeProfile<1, 3, [ // select SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3> ]>; +def SDTCtSelect + : SDTypeProfile<1, 3, + [ // ctselect + SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; + def SDTVSelect : SDTypeProfile<1, 3, [ // vselect SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -717,6 +722,7 @@ def reset_fpmode : SDNode<"ISD::RESET_FPMODE", SDTNone, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; +def ctselect : SDNode<"ISD::CTSELECT", SDTCtSelect>; def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 46c4bb85a7420..28fcebbb4a92a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -484,6 +484,7 @@ namespace { SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); + SDValue visitCTSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); SDValue visitVP_SELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); @@ -1898,6 +1899,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { + // clang-format off switch (N->getOpcode()) { default: break; @@ -1968,6 +1970,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); + case ISD::CTSELECT: return visitCTSELECT(N); case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); @@ -6032,6 +6035,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, N0CC = cast(N0.getOperand(4))->get(); break; case ISD::SELECT: + case ISD::CTSELECT: case ISD::VSELECT: if (N0.getOperand(0).getOpcode() != ISD::SETCC) return SDValue(); @@ -12184,8 +12188,9 @@ template static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) { assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT || - N->getOpcode() == ISD::VP_SELECT) && - "Expected a (v)(vp.)select"); + N->getOpcode() == ISD::VP_SELECT || + N->getOpcode() == ISD::CTSELECT) && + "Expected a (v)(vp.)(ct) select"); SDValue Cond = N->getOperand(0); SDValue T = N->getOperand(1), F = N->getOperand(2); EVT VT = N->getValueType(0); @@ -12547,6 +12552,109 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); + SDLoc DL(N); + SDNodeFlags Flags = N->getFlags(); + + if (SDValue V = foldBoolSelectToLogic(N, DL, DAG)) + return V; + + // ctselect (not Cond), N1, N2 -> ctselect Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getNode(ISD::CTSELECT, DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } + + if (VT0 == MVT::i1) { + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its preferred form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // ctselect (and Cond0, Cond1), X, Y + // -> ctselect Cond0, (ctselect Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), + Cond1, N1, N2, Flags); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Cond0, + InnerSelect, N2, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); + } + // ctselect (or Cond0, Cond1), X, Y -> ctselect Cond0, X, (ctselect Cond1, + // X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), + Cond1, N1, N2, Flags); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Cond0, N1, + InnerSelect, Flags); + // Cleanup on failure. + if (InnerSelect.use_empty()) + recursivelyDeleteUnusedNodes(InnerSelect.getNode()); + } + + // ctselect Cond0, (ctselect Cond1, X, Y), Y -> ctselect (and Cond0, Cond1), + // X, Y + if (N1->getOpcode() == ISD::CTSELECT && N1->hasOneUse()) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), And, N1_1, + N2, Flags); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) { + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Combined, + N1_1, N2, Flags); + } + } + } + // ctselect Cond0, X, (ctselect Cond1, X, Y) -> ctselect (or Cond0, Cond1), + // X, Y + if (N2->getOpcode() == ISD::CTSELECT && N2->hasOneUse()) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Or, N1, N2_2, + Flags); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, DL)) + return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Combined, N1, + N2_2, Flags); + } + } + } + + return SDValue(); +} + // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 431a81002074f..8178fd8981519 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4136,6 +4136,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } Results.push_back(Tmp1); break; + case ISD::CTSELECT: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + EVT VT = Tmp2.getValueType(); + if (VT.isVector()) { + SmallVector Elements; + unsigned NumElements = VT.getVectorNumElements(); + EVT ScalarVT = VT.getScalarType(); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + SDValue IdxVal = DAG.getConstant(Idx, dl, MVT::i64); + SDValue TVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Tmp2, IdxVal); + SDValue FVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Tmp3, IdxVal); + Elements.push_back( + DAG.getCTSelect(dl, ScalarVT, Tmp1, TVal, FVal, Node->getFlags())); + } + Tmp1 = DAG.getBuildVector(VT, dl, Elements); + } else if (VT.isFloatingPoint()) { + EVT IntegerVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + Tmp2 = DAG.getBitcast(IntegerVT, Tmp2); + Tmp3 = DAG.getBitcast(IntegerVT, Tmp3); + Tmp1 = DAG.getBitcast(VT, DAG.getCTSelect(dl, IntegerVT, Tmp1, Tmp2, Tmp3, + Node->getFlags())); + } else { + assert(VT.isInteger()); + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); + auto [Tmp2Lo, Tmp2Hi] = DAG.SplitScalar(Tmp2, dl, HalfVT, HalfVT); + auto [Tmp3Lo, Tmp3Hi] = DAG.SplitScalar(Tmp3, dl, HalfVT, HalfVT); + SDValue ResLo = + DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Lo, Tmp3Lo, Node->getFlags()); + SDValue ResHi = + DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Hi, Tmp3Hi, Node->getFlags()); + Tmp1 = DAG.getNode(ISD::BUILD_PAIR, dl, VT, ResLo, ResHi); + Tmp1->setFlags(Node->getFlags()); + } + Results.push_back(Tmp1); + break; + } case ISD::BR_JT: { SDValue Chain = Node->getOperand(0); SDValue Table = Node->getOperand(1); @@ -5474,7 +5514,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); break; } - case ISD::SELECT: { + case ISD::SELECT: + case ISD::CTSELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector() || Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { @@ -5492,7 +5533,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. - Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1->setFlags(Node->getFlags()); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb57d7f6..855a15a744cfe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -159,6 +159,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD: R = SoftenFloatRes_ATOMIC_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::CTSELECT: R = SoftenFloatRes_CTSELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break; case ISD::STRICT_SINT_TO_FP: @@ -1041,6 +1042,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { LHS.getValueType(), N->getOperand(0), LHS, RHS); } +SDValue DAGTypeLegalizer::SoftenFloatRes_CTSELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getCTSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, + RHS); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); @@ -1561,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::POISON: case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; + case ISD::CTSELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; @@ -2917,6 +2926,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { R = PromoteFloatRes_ATOMIC_LOAD(N); break; case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::CTSELECT: + R = PromoteFloatRes_SELECT(N); + break; case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: @@ -3219,7 +3231,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + return DAG.getNode(N->getOpcode(), SDLoc(N), TrueVal->getValueType(0), N->getOperand(0), TrueVal, FalseVal); } @@ -3403,6 +3415,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { R = SoftPromoteHalfRes_ATOMIC_LOAD(N); break; case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; + case ISD::CTSELECT: + R = SoftPromoteHalfRes_SELECT(N); + break; case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break; case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 44e5a187c4281..0135b3195438b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -95,6 +95,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_VECTOR_COMPRESS(N); break; case ISD::SELECT: + case ISD::CTSELECT: case ISD::VSELECT: case ISD::VP_SELECT: case ISD::VP_MERGE: @@ -2013,6 +2014,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::CTSELECT: + Res = PromoteIntOp_CTSELECT(N, OpNo); + break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; @@ -2390,6 +2394,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { N->getOperand(2)), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_CTSELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote the condition!"); + SDValue Cond = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + EVT OpVT = N->getOpcode() == ISD::CTSELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); + + return SDValue( + DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Don't know how to promote this operand!"); @@ -2987,6 +3004,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break; case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; + case ISD::CTSELECT: + SplitRes_Select(N, Lo, Hi); + break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::POISON: case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ede522eff6df3..62069b4fb03a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -401,6 +401,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_ScalarOp(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_CTSELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); @@ -633,6 +634,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_LOAD(SDNode *N); SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N); SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_CTSELECT(SDNode *N); SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); @@ -896,6 +898,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_CTSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); @@ -1224,7 +1227,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void SplitVecRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_CTSELECT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 88c1af20a321e..098368ef2f6b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -570,6 +570,20 @@ void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi); } +void DAGTypeLegalizer::SplitRes_CTSELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue LL, LH, RL, RH, CL, CH; + SDLoc dl(N); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + CL = CH = Cond; + assert(!Cond.getValueType().isVector() && "Unsupported vector type"); + + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); +} + void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bb4a8d9967f94..ed1a199cf7576 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -77,6 +77,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::CTSELECT: + R = ScalarizeVecRes_CTSELECT(N); + break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::POISON: @@ -670,6 +673,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { GetScalarizedVector(N->getOperand(2))); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_CTSELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getCTSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(2)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), @@ -1204,6 +1213,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: case ISD::VP_MERGE: case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break; + case ISD::CTSELECT: + SplitRes_CTSELECT(N, Lo, Hi); + break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::POISON: case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -4869,6 +4881,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::VSELECT: case ISD::SELECT: + case ISD::CTSELECT: case ISD::VP_SELECT: case ISD::VP_MERGE: Res = WidenVecRes_Select(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 379242ec5a157..f960e9961c851 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8249,6 +8249,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; break; } + case ISD::SELECT: case ISD::VSELECT: if (SDValue V = simplifySelect(N1, N2, N3)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fa0c899dfcc27..1f7c0fe0d6059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6496,6 +6496,105 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I, setValue(&I, Result); } +/// Fallback implementation for constant-time select using DAG chaining. +/// This implementation uses data dependencies through virtual registers to +/// prevent optimizations from breaking the constant-time property. +/// It handles scalars, vectors (fixed and scalable), and floating-point types. +SDValue SelectionDAGBuilder::createProtectedCtSelectFallback( + SelectionDAG &DAG, const SDLoc &DL, SDValue Cond, SDValue T, SDValue F, + EVT VT) { + + SDValue WorkingT = T; + SDValue WorkingF = F; + EVT WorkingVT = VT; + + SDValue Chain = DAG.getEntryNode(); + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + + // Handle vector condition: splat scalar condition to vector + if (VT.isVector() && !Cond.getValueType().isVector()) { + ElementCount NumElems = VT.getVectorElementCount(); + EVT CondVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElems); + + if (VT.isScalableVector()) { + Cond = DAG.getSplatVector(CondVT, DL, Cond); + } else { + Cond = DAG.getSplatBuildVector(CondVT, DL, Cond); + } + } + + // Handle floating-point types: bitcast to integer for bitwise operations + if (VT.isFloatingPoint()) { + if (VT.isVector()) { + // float vector -> int vector + EVT ElemVT = VT.getVectorElementType(); + unsigned int ElemBitWidth = ElemVT.getScalarSizeInBits(); + EVT IntElemVT = EVT::getIntegerVT(*DAG.getContext(), ElemBitWidth); + + WorkingVT = EVT::getVectorVT(*DAG.getContext(), IntElemVT, + VT.getVectorElementCount()); + } else { + WorkingVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + } + + WorkingT = DAG.getBitcast(WorkingVT, T); + WorkingF = DAG.getBitcast(WorkingVT, F); + } + + // Create mask: sign-extend condition to all bits + SDValue Mask = DAG.getSExtOrTrunc(Cond, DL, WorkingVT); + + // Create all-ones constant for inversion + SDValue AllOnes; + if (WorkingVT.isScalableVector()) { + unsigned BitWidth = WorkingVT.getScalarSizeInBits(); + APInt AllOnesVal = APInt::getAllOnes(BitWidth); + SDValue ScalarAllOnes = + DAG.getConstant(AllOnesVal, DL, WorkingVT.getScalarType()); + AllOnes = DAG.getSplatVector(WorkingVT, DL, ScalarAllOnes); + } else { + AllOnes = DAG.getAllOnesConstant(DL, WorkingVT); + } + + // Invert mask for false value + SDValue Invert = DAG.getNode(ISD::XOR, DL, WorkingVT, Mask, AllOnes); + + // Compute: (T & Mask) | (F & ~Mask) + // This is constant-time because both branches are always computed + SDValue TM = DAG.getNode(ISD::AND, DL, WorkingVT, Mask, WorkingT); + + // DAG chaining: create data dependency through virtual register + // This prevents optimizations from reordering or eliminating operations + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool CanUseChaining = false; + + if (!WorkingVT.isScalableVector()) { + // For fixed-size vectors and scalars, check if type is legal + CanUseChaining = TLI.isTypeLegal(WorkingVT.getSimpleVT()); + } else { + // For scalable vectors, disable chaining (conservative approach) + CanUseChaining = false; + } + + if (CanUseChaining) { + // Apply chaining through registers for additional protection + const TargetRegisterClass *RC = TLI.getRegClassFor(WorkingVT.getSimpleVT()); + Register TMReg = MRI.createVirtualRegister(RC); + Chain = DAG.getCopyToReg(Chain, DL, TMReg, TM); + TM = DAG.getCopyFromReg(Chain, DL, TMReg, WorkingVT); + } + + SDValue FM = DAG.getNode(ISD::AND, DL, WorkingVT, Invert, WorkingF); + SDValue Result = DAG.getNode(ISD::OR, DL, WorkingVT, TM, FM); + + // Convert back to original type if needed + if (WorkingVT != VT) { + Result = DAG.getBitcast(VT, Result); + } + + return Result; +} + /// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { @@ -6674,6 +6773,38 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::ct_select: { + // Set function attribute to indicate ct.select usage + Function &F = DAG.getMachineFunction().getFunction(); + F.addFnAttr("ct-select"); + + SDLoc DL = getCurSDLoc(); + + SDValue Cond = getValue(I.getArgOperand(0)); // i1 + SDValue A = getValue(I.getArgOperand(1)); // T + SDValue B = getValue(I.getArgOperand(2)); // T + + assert((A.getValueType() == B.getValueType()) && + "Operands are of different types"); + + EVT VT = A.getValueType(); + EVT CondVT = Cond.getValueType(); + + // assert if Cond type is Vector + assert(!CondVT.isVector() && "Vector type cond not supported yet"); + + // Handle scalar types + if (TLI.isSelectSupported( + TargetLoweringBase::SelectSupportKind::CtSelect) && + !CondVT.isVector()) { + SDValue Result = DAG.getNode(ISD::CTSELECT, DL, VT, Cond, A, B); + setValue(&I, Result); + return; + } + + setValue(&I, createProtectedCtSelectFallback(DAG, DL, Cond, A, B, VT)); + return; + } case Intrinsic::call_preallocated_setup: { const CallBase *PreallocatedCall = FindPreallocatedCall(&I); SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47e19f77a15e7..5c02bd6b8a4ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -219,6 +219,9 @@ class SelectionDAGBuilder { peelDominantCaseCluster(const SwitchInst &SI, SwitchCG::CaseClusterVector &Clusters, BranchProbability &PeeledCaseProb); + SDValue createProtectedCtSelectFallback(SelectionDAG &DAG, const SDLoc &DL, + SDValue Cond, SDValue T, SDValue F, + EVT VT); private: const TargetMachine &TM; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77377d348b836..aafbc5e4401c8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -332,6 +332,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; + case ISD::CTSELECT: return "ctselect"; case ISD::SETCCCARRY: return "setcccarry"; case ISD::STRICT_FSETCC: return "strict_fsetcc"; case ISD::STRICT_FSETCCS: return "strict_fsetccs"; diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll new file mode 100644 index 0000000000000..f46bde0a05b8b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 -O3 | FileCheck %s --check-prefix=RV64 +; RUN: llc < %s -mtriple=riscv32 -O3 | FileCheck %s --check-prefix=RV32 + +; Test basic ct.select functionality for scalar types +define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { +; RV64-LABEL: test_ctselect_i8: +; RV64: # %bb.0: +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_i8: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: ret + %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) + ret i8 %result +} + +define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { +; RV64-LABEL: test_ctselect_i16: +; RV64: # %bb.0: +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_i16: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: ret + %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) + ret i16 %result +} + +define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_i32: +; RV64: # %bb.0: +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_i32: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: neg a3, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { +; RV64-LABEL: test_ctselect_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: neg a3, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a1, a3, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_i64: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: xor a2, a2, a4 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a1, a1, a0 +; RV32-NEXT: and a2, a2, a0 +; RV32-NEXT: xor a0, a1, a3 +; RV32-NEXT: xor a1, a2, a4 +; RV32-NEXT: ret + %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) + ret i64 %result +} + +define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) { +; RV64-LABEL: test_ctselect_ptr: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: neg a3, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a1, a3, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_ptr: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: neg a3, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with constant conditions +define i32 @test_ctselect_const_true(i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_const_true: +; RV64: # %bb.0: +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_const_true: +; RV32: # %bb.0: +; RV32-NEXT: ret + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_const_false(i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_const_false: +; RV64: # %bb.0: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_const_false: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: ret + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with comparison conditions +define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_icmp_eq: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: xor a0, a0, a3 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_icmp_eq: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a2, a0, a2 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret + %cond = icmp eq i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_icmp_ne: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: xor a0, a0, a3 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_icmp_ne: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a2, a0, a2 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret + %cond = icmp ne i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_icmp_slt: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: slt a0, a0, a1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: xor a0, a0, a3 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_icmp_slt: +; RV32: # %bb.0: +; RV32-NEXT: slt a0, a0, a1 +; RV32-NEXT: neg a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %cond = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { +; RV64-LABEL: test_ctselect_icmp_ult: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: xor a0, a0, a3 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_icmp_ult: +; RV32: # %bb.0: +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: neg a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %cond = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test with memory operands +define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { +; RV64-LABEL: test_ctselect_load: +; RV64: # %bb.0: +; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: lw a2, 0(a2) +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_load: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a2) +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: neg a3, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %a = load i32, ptr %p1 + %b = load i32, ptr %p2 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test nested ctselect calls +define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) { +; RV64-LABEL: test_ctselect_nested: +; RV64: # %bb.0: +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: xor a3, a3, a4 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a1, a1, 63 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: xor a1, a1, a3 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a0, a4 +; RV64-NEXT: ret +; +; RV32-LABEL: test_ctselect_nested: +; RV32: # %bb.0: +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: neg a5, a1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a2, a5, a2 +; RV32-NEXT: neg a5, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: or a1, a2, a1 +; RV32-NEXT: and a1, a5, a1 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret + %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b) + %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c) + ret i32 %result +} + +; Declare the intrinsics +declare i8 @llvm.ct.select.i8(i1, i8, i8) +declare i16 @llvm.ct.select.i16(i1, i16, i16) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr) diff --git a/llvm/test/CodeGen/X86/ctselect.ll b/llvm/test/CodeGen/X86/ctselect.ll new file mode 100644 index 0000000000000..095787a5e2a4b --- /dev/null +++ b/llvm/test/CodeGen/X86/ctselect.ll @@ -0,0 +1,779 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+cmov | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-cmov | FileCheck %s --check-prefix=X32-NOCMOV + +; Test basic ct.select functionality for scalar types + +define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { +; X64-LABEL: test_ctselect_i8: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal -1(%rdi), %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negb %cl +; X64-NEXT: andb %sil, %cl +; X64-NEXT: andb %dl, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_i8: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andb $1, %al +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negb %cl +; X32-NEXT: andb {{[0-9]+}}(%esp), %cl +; X32-NEXT: decb %al +; X32-NEXT: andb {{[0-9]+}}(%esp), %al +; X32-NEXT: orb %cl, %al +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_i8: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andb $1, %al +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negb %cl +; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %cl +; X32-NOCMOV-NEXT: decb %al +; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %al +; X32-NOCMOV-NEXT: orb %cl, %al +; X32-NOCMOV-NEXT: retl + %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) + ret i8 %result +} + +define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { +; X64-LABEL: test_ctselect_i16: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: leal -1(%rdi), %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: andl %edx, %ecx +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_i16: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: leal -1(%eax), %ecx +; X32-NEXT: andw {{[0-9]+}}(%esp), %cx +; X32-NEXT: negl %eax +; X32-NEXT: andw {{[0-9]+}}(%esp), %ax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: # kill: def $ax killed $ax killed $eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_i16: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: leal -1(%eax), %ecx +; X32-NOCMOV-NEXT: andw {{[0-9]+}}(%esp), %cx +; X32-NOCMOV-NEXT: negl %eax +; X32-NOCMOV-NEXT: andw {{[0-9]+}}(%esp), %ax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax +; X32-NOCMOV-NEXT: retl + %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) + ret i16 %result +} + +define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_i32: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: leal -1(%rdi), %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: andl %esi, %ecx +; X64-NEXT: andl %edx, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_i32: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_i32: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { +; X64-LABEL: test_ctselect_i64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: leaq -1(%rdi), %rax +; X64-NEXT: negq %rdi +; X64-NEXT: andq %rsi, %rdi +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_i64: +; X32: # %bb.0: +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl %edx, %eax +; X32-NEXT: andl $1, %esi +; X32-NEXT: negl %esi +; X32-NEXT: andl %esi, %eax +; X32-NEXT: xorl %edx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %ecx, %edx +; X32-NEXT: andl %esi, %edx +; X32-NEXT: xorl %ecx, %edx +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_i64: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: pushl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: .cfi_offset %esi, -8 +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: xorl %edx, %eax +; X32-NOCMOV-NEXT: andl $1, %esi +; X32-NOCMOV-NEXT: negl %esi +; X32-NOCMOV-NEXT: andl %esi, %eax +; X32-NOCMOV-NEXT: xorl %edx, %eax +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: xorl %ecx, %edx +; X32-NOCMOV-NEXT: andl %esi, %edx +; X32-NOCMOV-NEXT: xorl %ecx, %edx +; X32-NOCMOV-NEXT: popl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4 +; X32-NOCMOV-NEXT: retl + %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) + ret i64 %result +} + +define float @test_ctselect_f32(i1 %cond, float %a, float %b) { +; X64-LABEL: test_ctselect_f32: +; X64: # %bb.0: +; X64-NEXT: movd %xmm1, %eax +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: andl $1, %edi +; X64-NEXT: movl %edi, %edx +; X64-NEXT: negl %edx +; X64-NEXT: andl %ecx, %edx +; X64-NEXT: decl %edi +; X64-NEXT: andl %eax, %edi +; X64-NEXT: orl %edx, %edi +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_f32: +; X32: # %bb.0: +; X32-NEXT: pushl %eax +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: flds (%esp) +; X32-NEXT: popl %eax +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_f32: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: pushl %eax +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: movl %eax, (%esp) +; X32-NOCMOV-NEXT: flds (%esp) +; X32-NOCMOV-NEXT: popl %eax +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4 +; X32-NOCMOV-NEXT: retl + %result = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b) + ret float %result +} + +define double @test_ctselect_f64(i1 %cond, double %a, double %b) { +; X64-LABEL: test_ctselect_f64: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movq %xmm1, %rax +; X64-NEXT: movq %xmm0, %rcx +; X64-NEXT: andl $1, %edi +; X64-NEXT: movq %rdi, %rdx +; X64-NEXT: negq %rdx +; X64-NEXT: andq %rcx, %rdx +; X64-NEXT: decq %rdi +; X64-NEXT: andq %rax, %rdi +; X64-NEXT: orq %rdx, %rdi +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_f64: +; X32: # %bb.0: +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %edx, %esi +; X32-NEXT: andl $1, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl %ecx, %esi +; X32-NEXT: xorl %edx, %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %eax, %edx +; X32-NEXT: andl %ecx, %edx +; X32-NEXT: xorl %eax, %edx +; X32-NEXT: movl %edx, (%esp) +; X32-NEXT: fldl (%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_f64: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: pushl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: subl $8, %esp +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 16 +; X32-NOCMOV-NEXT: .cfi_offset %esi, -8 +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NOCMOV-NEXT: xorl %edx, %esi +; X32-NOCMOV-NEXT: andl $1, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl %ecx, %esi +; X32-NOCMOV-NEXT: xorl %edx, %esi +; X32-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: xorl %eax, %edx +; X32-NOCMOV-NEXT: andl %ecx, %edx +; X32-NOCMOV-NEXT: xorl %eax, %edx +; X32-NOCMOV-NEXT: movl %edx, (%esp) +; X32-NOCMOV-NEXT: fldl (%esp) +; X32-NOCMOV-NEXT: addl $8, %esp +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: popl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4 +; X32-NOCMOV-NEXT: retl + %result = call double @llvm.ct.select.f64(i1 %cond, double %a, double %b) + ret double %result +} + +define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) { +; X64-LABEL: test_ctselect_ptr: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: leaq -1(%rdi), %rax +; X64-NEXT: negq %rdi +; X64-NEXT: andq %rsi, %rdi +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_ptr: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_ptr: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with constant conditions +define i32 @test_ctselect_const_true(i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_const_true: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_const_true: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_const_true: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: retl + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_const_false(i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_const_false: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_const_false: +; X32: # %bb.0: +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_const_false: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: xorl %eax, %eax +; X32-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: retl + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with comparison conditions +define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_icmp_eq: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: sete %al +; X64-NEXT: movl %eax, %esi +; X64-NEXT: negl %esi +; X64-NEXT: andl %edx, %esi +; X64-NEXT: decl %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_icmp_eq: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sete %al +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_icmp_eq: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: xorl %eax, %eax +; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: sete %al +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %cond = icmp eq i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_icmp_ne: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: setne %al +; X64-NEXT: movl %eax, %esi +; X64-NEXT: negl %esi +; X64-NEXT: andl %edx, %esi +; X64-NEXT: decl %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_icmp_ne: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: setne %al +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_icmp_ne: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: xorl %eax, %eax +; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: setne %al +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %cond = icmp ne i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_icmp_slt: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: setl %al +; X64-NEXT: movl %eax, %esi +; X64-NEXT: negl %esi +; X64-NEXT: andl %edx, %esi +; X64-NEXT: decl %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_icmp_slt: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: setl %al +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_icmp_slt: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: xorl %eax, %eax +; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: setl %al +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %cond = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { +; X64-LABEL: test_ctselect_icmp_ult: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: andl %eax, %edx +; X64-NEXT: notl %eax +; X64-NEXT: andl %ecx, %eax +; X64-NEXT: orl %edx, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_icmp_ult: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl %eax, %ecx +; X32-NEXT: notl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_icmp_ult: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: xorl %eax, %eax +; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: sbbl %eax, %eax +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: andl %eax, %ecx +; X32-NOCMOV-NEXT: notl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: retl + %cond = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define float @test_ctselect_fcmp_oeq(float %x, float %y, float %a, float %b) { +; X64-LABEL: test_ctselect_fcmp_oeq: +; X64: # %bb.0: +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: cmpeqss %xmm1, %xmm0 +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: pand %xmm2, %xmm0 +; X64-NEXT: movd %xmm0, %edx +; X64-NEXT: notl %ecx +; X64-NEXT: andl %eax, %ecx +; X64-NEXT: orl %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_fcmp_oeq: +; X32: # %bb.0: +; X32-NEXT: pushl %eax +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: flds {{[0-9]+}}(%esp) +; X32-NEXT: flds {{[0-9]+}}(%esp) +; X32-NEXT: fucompi %st(1), %st +; X32-NEXT: fstp %st(0) +; X32-NEXT: setnp %al +; X32-NEXT: sete %cl +; X32-NEXT: andb %al, %cl +; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: negl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: flds (%esp) +; X32-NEXT: popl %eax +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_fcmp_oeq: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: pushl %eax +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp) +; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp) +; X32-NOCMOV-NEXT: fucompp +; X32-NOCMOV-NEXT: fnstsw %ax +; X32-NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax +; X32-NOCMOV-NEXT: sahf +; X32-NOCMOV-NEXT: setnp %al +; X32-NOCMOV-NEXT: sete %cl +; X32-NOCMOV-NEXT: andb %al, %cl +; X32-NOCMOV-NEXT: movzbl %cl, %eax +; X32-NOCMOV-NEXT: movl %eax, %ecx +; X32-NOCMOV-NEXT: negl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %ecx, %eax +; X32-NOCMOV-NEXT: movl %eax, (%esp) +; X32-NOCMOV-NEXT: flds (%esp) +; X32-NOCMOV-NEXT: popl %eax +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4 +; X32-NOCMOV-NEXT: retl + %cond = fcmp oeq float %x, %y + %result = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b) + ret float %result +} + +; Test with memory operands +define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { +; X64-LABEL: test_ctselect_load: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: leal -1(%rdi), %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: andl (%rsi), %ecx +; X64-NEXT: andl (%rdx), %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_load: +; X32: # %bb.0: +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: movl %eax, %esi +; X32-NEXT: negl %esi +; X32-NEXT: andl (%edx), %esi +; X32-NEXT: decl %eax +; X32-NEXT: andl (%ecx), %eax +; X32-NEXT: orl %esi, %eax +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_load: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: pushl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8 +; X32-NOCMOV-NEXT: .cfi_offset %esi, -8 +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: movl %eax, %esi +; X32-NOCMOV-NEXT: negl %esi +; X32-NOCMOV-NEXT: andl (%edx), %esi +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl (%ecx), %eax +; X32-NOCMOV-NEXT: orl %esi, %eax +; X32-NOCMOV-NEXT: popl %esi +; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4 +; X32-NOCMOV-NEXT: retl + %a = load i32, ptr %p1 + %b = load i32, ptr %p2 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test nested ctselect calls +define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) { +; X64-LABEL: test_ctselect_nested: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $1, %esi +; X64-NEXT: leal -1(%rsi), %r9d +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %edx, %eax +; X64-NEXT: andl %ecx, %r9d +; X64-NEXT: orl %eax, %r9d +; X64-NEXT: andl $1, %edi +; X64-NEXT: leal -1(%rdi), %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: andl %r9d, %ecx +; X64-NEXT: andl %r8d, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: retq +; +; X32-LABEL: test_ctselect_nested: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl $1, %ecx +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: negl %edx +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: decl %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl %edx, %ecx +; X32-NEXT: andl $1, %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: negl %edx +; X32-NEXT: andl %ecx, %edx +; X32-NEXT: decl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %edx, %eax +; X32-NEXT: retl +; +; X32-NOCMOV-LABEL: test_ctselect_nested: +; X32-NOCMOV: # %bb.0: +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: andl $1, %ecx +; X32-NOCMOV-NEXT: movl %ecx, %edx +; X32-NOCMOV-NEXT: negl %edx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NOCMOV-NEXT: decl %ecx +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NOCMOV-NEXT: orl %edx, %ecx +; X32-NOCMOV-NEXT: andl $1, %eax +; X32-NOCMOV-NEXT: movl %eax, %edx +; X32-NOCMOV-NEXT: negl %edx +; X32-NOCMOV-NEXT: andl %ecx, %edx +; X32-NOCMOV-NEXT: decl %eax +; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NOCMOV-NEXT: orl %edx, %eax +; X32-NOCMOV-NEXT: retl + %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b) + %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c) + ret i32 %result +} + +; Declare the intrinsics +declare i8 @llvm.ct.select.i8(i1, i8, i8) +declare i16 @llvm.ct.select.i16(i1, i16, i16) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) +declare float @llvm.ct.select.f32(i1, float, float) +declare double @llvm.ct.select.f64(i1, double, double) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr)