diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index c76c83d84b3c7..e05a67ae0655f 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -448,7 +448,6 @@ enum NodeType { STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, - STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 73f2c55a71125..0d140ac745f67 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1341,6 +1341,8 @@ class LLVM_ABI TargetLoweringBase { unsigned EqOpc; switch (Op) { default: llvm_unreachable("Unexpected FP pseudo-opcode"); +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: EqOpc = ISD::DAGN; break; #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 30a82bf633d57..76bed326a3e50 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -39,6 +39,12 @@ #define CMP_INSTRUCTION(N,A,R,I,D) DAG_INSTRUCTION(N,A,R,I,D) #endif +// FP_OPERATION is same as DAG_FUNCTION, but in DAG it is represented by the +// same node, as non-constrained function. +#ifndef FP_OPERATION +#define FP_OPERATION(N,A,R,I,D) DAG_FUNCTION(N,A,R,I,D) +#endif + // Arguments of the entries are: // - instruction or intrinsic function name. // - Number of original instruction/intrinsic arguments. @@ -91,7 +97,7 @@ DAG_FUNCTION(maxnum, 2, 0, experimental_constrained_maxnum, FMAXNUM DAG_FUNCTION(minnum, 2, 0, experimental_constrained_minnum, FMINNUM) DAG_FUNCTION(maximum, 2, 0, experimental_constrained_maximum, FMAXIMUM) DAG_FUNCTION(minimum, 2, 0, experimental_constrained_minimum, FMINIMUM) -DAG_FUNCTION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT) +FP_OPERATION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT) DAG_FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW) DAG_FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI) DAG_FUNCTION(ldexp, 2, 1, experimental_constrained_ldexp, FLDEXP) @@ -114,3 +120,4 @@ FUNCTION(fmuladd, 3, 1, experimental_constrained_fmuladd) #undef CMP_INSTRUCTION #undef DAG_INSTRUCTION #undef DAG_FUNCTION +#undef FP_OPERATION diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 632be7ad9e350..8edfb59455402 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -575,7 +575,7 @@ def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>; def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>; def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; -def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; +def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp, [SDNPMayHaveChain]>; def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>; @@ -653,8 +653,6 @@ def strict_lrint : SDNode<"ISD::STRICT_LRINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_llrint : SDNode<"ISD::STRICT_LLRINT", SDTFPToIntOp, [SDNPHasChain]>; -def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", - SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", @@ -1704,9 +1702,6 @@ def any_lrint : PatFrags<(ops node:$src), def any_llrint : PatFrags<(ops node:$src), [(strict_llrint node:$src), (llrint node:$src)]>; -def any_fnearbyint : PatFrags<(ops node:$src), - [(strict_fnearbyint node:$src), - (fnearbyint node:$src)]>; def any_fceil : PatFrags<(ops node:$src), [(strict_fceil node:$src), (fceil node:$src)]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5fb7e63cfb605..72fc5a7570ce7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2196,7 +2196,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, if (LC == RTLIB::UNKNOWN_LIBCALL) llvm_unreachable("Can't create an unknown libcall!"); - if (Node->isStrictFPOpcode()) { + if (Node->isStrictFPOpcode() || (Node->hasChain() && Node->isFPOperation())) { EVT RetVT = Node->getValueType(0); SmallVector Ops(drop_begin(Node->ops())); TargetLowering::MakeLibCallOptions CallOptions; @@ -4791,7 +4791,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::RINT_PPCF128, Results); break; case ISD::FNEARBYINT: - case ISD::STRICT_FNEARBYINT: ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -5760,7 +5759,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: - case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FTRUNC: @@ -5792,7 +5790,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: case ISD::STRICT_FROUNDEVEN: case ISD::STRICT_FTRUNC: @@ -5821,6 +5818,25 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; + case ISD::FNEARBYINT: + if (Node->hasChain()) { + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + } else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + } + break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 437d0f4654096..33d133ff9cba5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -115,7 +115,6 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::STRICT_FP_EXTEND: @@ -227,6 +226,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { return Tmp.first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOperation(SDNode *N, + RTLIB::Libcall LC) { + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + SmallVector Ops; + SmallVector OpsVT; + + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + OpsVT.push_back(Op.getValueType()); + Op = GetSoftenedFloat(Op); + Ops.push_back(Op); + } + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); + std::pair Tmp = + TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -582,7 +607,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + return SoftenFloatRes_FPOperation(N, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -1596,7 +1621,6 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::STRICT_FMUL: case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; case ISD::STRICT_FP_EXTEND: @@ -1688,6 +1712,21 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, GetPairElements(Tmp.first, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool HasChain = N->hasChain(); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SmallVector Ops(HasChain ? llvm::drop_begin(N->ops()) : N->ops()); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall( + DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) { ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0); @@ -1951,7 +1990,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_FPOperation(N, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -2827,6 +2866,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + // Floating-point operations with optional chain. + case ISD::FNEARBYINT: + R = PromoteFloatRes_FPOperation(N); + break; + // Unary FP Operations case ISD::FABS: case ISD::FACOS: @@ -2843,7 +2887,6 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: - case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: @@ -3071,6 +3114,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOperation(SDNode *N) { + bool HasChain = N->hasChain(); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SmallVector Ops; + + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + // FIXME Use strict conversions for strict operations. + Op = GetPromotedFloat(Op); + Ops.push_back(Op); + } + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Ops); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -3312,6 +3378,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; + // Floating-point operations with optional chain. + case ISD::FNEARBYINT: + R = SoftPromoteHalfRes_FPOperation(N); + break; + // Unary FP Operations case ISD::FACOS: case ISD::FASIN: @@ -3327,7 +3398,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: - case ISD::FNEARBYINT: case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: @@ -3714,6 +3784,38 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOperation(SDNode *N) { + SDLoc dl(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + + SmallVector Ops; + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(i)); + // FIXME Use strict conversions for strict operations. + Op = DAG.getNode(PromotionOpcode, dl, NVT, Op); + Ops.push_back(Op); + } + + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Ops); + if (HasChain) + Chain = Res.getValue(1); + + // Convert back to FP16 as an integer. + Res = DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); + + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Chain); + return Res; +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { // Expand and soften recursively. ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 603dc34ce72a7..541977c7dad03 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -573,6 +573,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { bool SoftenFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N); SDValue SoftenFloatRes_BITCAST(SDNode *N); @@ -681,6 +682,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); + void ExpandFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, + SDValue &Hi); // clang-format off void ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -788,6 +791,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); SDValue PromoteFloatRes_VECREDUCE(SDNode *N); SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N); + SDValue PromoteFloatRes_FPOperation(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); @@ -839,6 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N); + SDValue SoftPromoteHalfRes_FPOperation(SDNode *N); bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); @@ -881,6 +886,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_VecInregOp(SDNode *N); + SDValue ScalarizeVecRes_FPOperation(SDNode *N); SDValue ScalarizeVecRes_ADDRSPACECAST(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); @@ -965,6 +971,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOperation(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 8e423c4f83b38..3abc8594f3d17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -319,9 +319,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" + if (!Node->hasChain()) { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + } ValVT = Node->getValueType(0); if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || Op.getOpcode() == ISD::STRICT_UINT_TO_FP) @@ -435,7 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: - case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FFLOOR: @@ -1218,11 +1222,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::SDIVFIXSAT: case ISD::UDIVFIXSAT: break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" - ExpandStrictFPOp(Node, Results); - return; + if (Node->hasChain()) { + ExpandStrictFPOp(Node, Results); + return; + } + if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -1305,7 +1318,6 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::FDIV: case ISD::FCEIL: case ISD::FFLOOR: - case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: @@ -1318,6 +1330,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { break; } + if (Node->hasChain()) + return UnrollStrictFPOp(Node, Results); + SDValue Unrolled = DAG.UnrollVectorOp(Node); if (Node->getNumValues() == 1) { Results.push_back(Unrolled); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f7c089a..b312396a83359 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -85,6 +85,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: R = ScalarizeVecRes_VecInregOp(N); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + R = ScalarizeVecRes_FPOperation(N); + break; case ISD::ABS: case ISD::ANY_EXTEND: case ISD::BITREVERSE: @@ -108,7 +112,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: @@ -211,6 +214,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_TernaryOp(N); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -513,6 +517,41 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOperation(SDNode *N) { + SDLoc DL(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + + SmallVector Ops; + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); + } + Ops.push_back(Op); + } + + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Result; + if (HasChain) { + Result = DAG.getNode(N->getOpcode(), DL, {DestVT, MVT::Other}, Ops, + N->getFlags()); + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + } else { + Result = DAG.getNode(N->getOpcode(), DL, DestVT, Ops, N->getFlags()); + } + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); @@ -1262,6 +1301,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + SplitVecRes_FPOperation(N, Lo, Hi); + break; + case ISD::ABS: case ISD::VP_ABS: case ISD::BITREVERSE: @@ -1294,7 +1338,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::VP_FNEARBYINT: case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: @@ -1421,6 +1464,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_CMP(N, Lo, Hi); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -2065,6 +2109,45 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, } } +void DAGTypeLegalizer::SplitVecRes_FPOperation(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + + SmallVector OperandsLo; + SmallVector OperandsHi; + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + if (HasChain) { + OperandsLo.push_back(Chain); + OperandsHi.push_back(Chain); + } + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + SDValue LHSLo, LHSHi; + GetSplitVector(Op, LHSLo, LHSHi); + OperandsLo.push_back(LHSLo); + OperandsHi.push_back(LHSHi); + } + SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (HasChain) { + Lo = DAG.getNode(Opcode, dl, {LoVT, MVT::Other}, OperandsLo, Flags); + Hi = DAG.getNode(Opcode, dl, {HiVT, MVT::Other}, OperandsHi, Flags); + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Chain); + } else { + Lo = DAG.getNode(Opcode, dl, LoVT, OperandsLo, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, OperandsHi, Flags); + } +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -4988,6 +5071,21 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; + +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + if (N->hasChain()) + Res = WidenVecRes_StrictFP(N); + else if (N->getNumOperands() == 1) { + if (unrollExpandedOp()) + break; + Res = WidenVecRes_Unary(N); + } else { + llvm_unreachable("not supported yet"); + } + break; + +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -5073,7 +5171,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 08af74c258899..3b37ce1902596 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11663,6 +11663,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a0d856f..4dc4f3fd16863 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8362,6 +8362,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: \ + Opcode = ISD::DAGN; \ + break; #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case Intrinsic::INTRINSIC: \ Opcode = ISD::STRICT_##DAGN; \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6c461d3..bdb84c4417949 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -251,7 +251,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FRINT: return "frint"; case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::STRICT_FROUND: return "strict_fround"; case ISD::FROUNDEVEN: return "froundeven"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d324e74b..e6f996f7b549e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12312,8 +12312,16 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node, if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT)) return SDValue(); + bool HasChain = Node->hasChain(); + SDValue Chain = HasChain ? Node->getOperand(0) : SDValue(); + SmallVector LoOps, HiOps; - for (const SDValue &V : Node->op_values()) { + if (HasChain) { + LoOps.push_back(Chain); + HiOps.push_back(Chain); + } + for (unsigned i = HasChain, e = Node->getNumOperands(); i != e; ++i) { + SDValue V = Node->getOperand(i); auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT); LoOps.push_back(Lo); HiOps.push_back(Hi); @@ -12321,7 +12329,13 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node, SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps); SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi); + SDValue R = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi); + if (HasChain) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SplitOpLo.getValue(1), SplitOpHi.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + } + return R; } SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT, diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c23281a820b2b..b5d2153d1837e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -869,6 +869,7 @@ void TargetLoweringBase::initActions() { VT, Expand); // Constrained floating-point operations default to expand. +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ setOperationAction(ISD::STRICT_##DAGN, VT, Expand); #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 69651168f8539..a23c1d5d6bd37 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -821,7 +821,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, @@ -909,7 +908,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, - ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND, @@ -1253,7 +1252,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, + ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) setOperationAction(Op, MVT::v1f64, Expand); @@ -1408,7 +1407,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE, - ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f788c7510f80c..03bd5a9e9b615 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5465,7 +5465,7 @@ defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; @@ -5865,7 +5865,7 @@ def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; @@ -11056,7 +11056,7 @@ multiclass PromoteUnaryv8f16Tov4f32 } defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; -defm : PromoteUnaryv8f16Tov4f32; +defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2e6fa17..a41485ab1192e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1271,7 +1271,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 979ba31b0431b..1d166b487e0c6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3882,7 +3882,7 @@ def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; // Use current rounding mode -def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; +def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; // Round to nearest, ties away from zero def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; // Round towards Zero diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7123a2d706787..7057aa082fbfd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -539,7 +539,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have // complete support for all operations in LegalizeDAG. setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, + ISD::STRICT_FRINT, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP}, MVT::f16, Promote); @@ -1129,7 +1129,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, - ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, + ISD::STRICT_FROUNDEVEN}, VT, Custom); setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); @@ -1539,7 +1539,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, - ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, + ISD::STRICT_FROUNDEVEN}, VT, Custom); } @@ -3482,7 +3482,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); break; - case ISD::STRICT_FNEARBYINT: + case ISD::FNEARBYINT: Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, Mask, VL); @@ -3491,7 +3491,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, Chain = Truncated.getValue(1); // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. - if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { + if (Op.getOpcode() != ISD::FNEARBYINT) { Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), Chain, Truncated, Mask, VL); @@ -7902,6 +7902,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: + if (Op->hasChain()) + return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); @@ -8402,7 +8404,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FRINT: case ISD::STRICT_FFLOOR: case ISD::STRICT_FTRUNC: - case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: case ISD::STRICT_FROUNDEVEN: return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index a2737d247fe31..fc96c5324aaa9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -251,7 +251,7 @@ def: PatFprFpr; def: Pat<(any_frint FPR32:$rs1), (FROUNDNX_S FPR32:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(any_fnearbyint FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_DYN)>; +def: Pat<(fnearbyint FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_DYN)>; def: Pat<(any_fround FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RMM)>; def: Pat<(any_froundeven FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RNE)>; @@ -276,7 +276,7 @@ def: PatFprFpr; def: Pat<(any_frint FPR64:$rs1), (FROUNDNX_D FPR64:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(any_fnearbyint FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_DYN)>; +def: Pat<(fnearbyint FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_DYN)>; def: Pat<(any_fround FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RMM)>; def: Pat<(any_froundeven FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RNE)>; @@ -306,7 +306,7 @@ def: PatFprFpr; def: Pat<(f16 (any_frint FPR16:$rs1)), (FROUNDNX_H FPR16:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(f16 (any_fnearbyint FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_DYN)>; +def: Pat<(f16 (fnearbyint FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_DYN)>; def: Pat<(f16 (any_fround FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RMM)>; def: Pat<(f16 (any_froundeven FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RNE)>; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a318dc4..c0e9b419866a7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -601,7 +601,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); if (Subtarget.hasFPExtension()) { - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); setOperationAction(ISD::STRICT_FCEIL, VT, Legal); setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); @@ -653,7 +652,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); @@ -721,7 +719,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 33f73bc658b25..3d8a2a424e7a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -430,9 +430,9 @@ let Predicates = [FeatureFPExtension] in { } // fnearbyint is like frint but does not detect inexact conditions. - def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; - def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; - def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 479bab5ce62b8..9a4930d0f77a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1409,7 +1409,7 @@ let Predicates = [FeatureVector] in { // rounding modes. multiclass VectorRounding { def : FPConversion; - def : FPConversion; + def : FPConversion; def : FPConversion; def : FPConversion; def : FPConversion; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec125e8f..85b12e649624f 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1258,7 +1258,6 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FROUNDEVEN: case ISD::STRICT_FROUNDEVEN: case ISD::FNEARBYINT: - case ISD::STRICT_FNEARBYINT: case ISD::FRINT: case ISD::STRICT_FRINT: { // Replace fp rounding with their X86 specific equivalent so we don't @@ -1274,7 +1273,6 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FTRUNC: Imm = 0xB; break; case ISD::STRICT_FROUNDEVEN: case ISD::FROUNDEVEN: Imm = 0x8; break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: Imm = 0xC; break; case ISD::STRICT_FRINT: case ISD::FRINT: Imm = 0x4; break; @@ -1282,7 +1280,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDLoc dl(N); bool IsStrict = N->isStrictFPOpcode(); SDValue Res; - if (IsStrict) + if (IsStrict || (N->hasChain() && N->isFPOperation())) Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, {N->getValueType(0), MVT::Other}, {N->getOperand(0), N->getOperand(1), diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2841764..4c78891f78f3f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -705,7 +705,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote); setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote); setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote); setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote); setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote); @@ -1359,7 +1358,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, RoundedTy, Legal); setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal); @@ -1460,7 +1458,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); @@ -1917,7 +1914,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); @@ -2255,7 +2251,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index eac4fb6f98bf7..56e2379c7e621 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -205,96 +205,12 @@ define double @nearbyint_f64(double %f1, double %f2) strictfp { define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp { ; P8-LABEL: nearbyint_v4f32: ; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -176(r1) -; P8-NEXT: std r0, 192(r1) -; P8-NEXT: .cfi_def_cfa_offset 176 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: .cfi_offset v29, -48 -; P8-NEXT: .cfi_offset v30, -32 -; P8-NEXT: .cfi_offset v31, -16 -; P8-NEXT: xxsldwi vs0, v2, v2, 3 -; P8-NEXT: li r3, 128 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 144 -; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 160 -; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v2 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxsldwi vs0, v31, v31, 1 -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxmrghd vs0, vs1, v30 -; P8-NEXT: xscvspdpn f1, v31 -; P8-NEXT: xvcvdpsp v29, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxswapd vs0, v31 -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxmrghd vs0, v30, vs1 -; P8-NEXT: li r3, 160 -; P8-NEXT: xvcvdpsp v2, vs0 -; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 144 -; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 128 -; P8-NEXT: vmrgew v2, v2, v29 -; P8-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 176 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 +; P8-NEXT: xvrspic v2, v2 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v4f32: ; P9: # %bb.0: -; P9-NEXT: mflr r0 -; P9-NEXT: stdu r1, -80(r1) -; P9-NEXT: std r0, 96(r1) -; P9-NEXT: .cfi_def_cfa_offset 80 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: .cfi_offset v29, -48 -; P9-NEXT: .cfi_offset v30, -32 -; P9-NEXT: .cfi_offset v31, -16 -; P9-NEXT: xxsldwi vs0, v2, v2, 3 -; P9-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill -; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill -; P9-NEXT: vmr v31, v2 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxsldwi vs0, v31, v31, 1 -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxmrghd vs0, vs1, v30 -; P9-NEXT: xscvspdpn f1, v31 -; P9-NEXT: xvcvdpsp v29, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxswapd vs0, v31 -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxmrghd vs0, v30, vs1 -; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload -; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload -; P9-NEXT: xvcvdpsp v2, vs0 -; P9-NEXT: vmrgew v2, v2, v29 -; P9-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload -; P9-NEXT: addi r1, r1, 80 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 +; P9-NEXT: xvrspic v2, v2 ; P9-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( <4 x float> %vf1, @@ -306,60 +222,12 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) strictfp { ; P8-LABEL: nearbyint_v2f64: ; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r0, 176(r1) -; P8-NEXT: .cfi_def_cfa_offset 160 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: .cfi_offset v30, -32 -; P8-NEXT: .cfi_offset v31, -16 -; P8-NEXT: li r3, 128 -; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 144 -; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v2 -; P8-NEXT: xxlor f1, v31, v31 -; P8-NEXT: bl nearbyint -; P8-NEXT: nop -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xxswapd vs1, v31 -; P8-NEXT: bl nearbyint -; P8-NEXT: nop -; P8-NEXT: li r3, 144 -; P8-NEXT: xxmrghd v2, v30, vs1 -; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 128 -; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 +; P8-NEXT: xvrdpic v2, v2 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v2f64: ; P9: # %bb.0: -; P9-NEXT: mflr r0 -; P9-NEXT: stdu r1, -64(r1) -; P9-NEXT: std r0, 80(r1) -; P9-NEXT: .cfi_def_cfa_offset 64 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: .cfi_offset v30, -32 -; P9-NEXT: .cfi_offset v31, -16 -; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill -; P9-NEXT: vmr v31, v2 -; P9-NEXT: xscpsgndp f1, v31, v31 -; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill -; P9-NEXT: bl nearbyint -; P9-NEXT: nop -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xxswapd vs1, v31 -; P9-NEXT: bl nearbyint -; P9-NEXT: nop -; P9-NEXT: xxmrghd v2, v30, vs1 -; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload -; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload -; P9-NEXT: addi r1, r1, 64 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 +; P9-NEXT: xvrdpic v2, v2 ; P9-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> %vf1, diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 71c3069a406fe..cf6342726e569 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4463,52 +4463,12 @@ entry: define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 96(1) -; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 62, 1, 1 -; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxmrghd 34, 62, 1 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xvrdpic 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: std 0, 80(1) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 62, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 34, 62, 1 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: xvrdpic 34, 34 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -4611,32 +4571,21 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: xxmrghd 0, 2, 1 +; PC64LE-NEXT: fmr 1, 3 ; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 96(1) -; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill -; PC64LE-NEXT: fmr 30, 2 -; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE-NEXT: fmr 31, 3 +; PC64LE-NEXT: std 0, 80(1) ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 63, 1, 1 -; PC64LE-NEXT: fmr 1, 30 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxmrghd 63, 1, 63 -; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: xvrdpic 63, 0 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -4644,30 +4593,19 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: std 0, 80(1) -; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: xxmrghd 0, 2, 1 +; PC64LE9-NEXT: fmr 1, 3 +; PC64LE9-NEXT: std 0, 64(1) ; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill -; PC64LE9-NEXT: fmr 31, 3 -; PC64LE9-NEXT: fmr 30, 2 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 63, 1, 1 -; PC64LE9-NEXT: fmr 1, 30 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 63, 1, 63 -; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: xvrdpic 63, 0 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -4682,78 +4620,14 @@ entry: define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -96(1) -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 112(1) -; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 -; PC64LE-NEXT: li 3, 80 -; PC64LE-NEXT: xxlor 1, 62, 62 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 61, 1, 1 -; PC64LE-NEXT: xxswapd 1, 62 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxmrghd 62, 61, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 61, 1, 1 -; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 80 -; PC64LE-NEXT: vmr 2, 30 -; PC64LE-NEXT: xxmrghd 35, 61, 1 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 96 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xvrdpic 35, 35 +; PC64LE-NEXT: xvrdpic 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -80(1) -; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 -; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 31, 3 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 61, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 62 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 62, 61, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 61, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 35, 61, 1 -; PC64LE9-NEXT: vmr 2, 30 -; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 61, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 80 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: xvrdpic 35, 35 +; PC64LE9-NEXT: xvrdpic 34, 34 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 614f7b243c7e2..b8a21d741e81d 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -4506,10 +4506,10 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(ptr %a) #0 { ; ; SZ13-LABEL: constrained_vector_nearbyint_v4f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vl %v0, 16(%r2), 4 -; SZ13-NEXT: vl %v1, 0(%r2), 4 -; SZ13-NEXT: vfidb %v24, %v1, 4, 0 -; SZ13-NEXT: vfidb %v26, %v0, 4, 0 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: vl %v1, 16(%r2), 4 +; SZ13-NEXT: vfidb %v26, %v1, 4, 0 +; SZ13-NEXT: vfidb %v24, %v0, 4, 0 ; SZ13-NEXT: br %r14 entry: %b = load <4 x double>, ptr %a