diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 73f2c55a71125..0c8febc983917 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1468,6 +1468,10 @@ class LLVM_ABI TargetLoweringBase { getOperationAction(Op, VT) == Legal; } + bool isOperationLibCall(unsigned Op, EVT VT) const { + return getOperationAction(Op, VT) == LibCall; + } + /// Return how this load with extension should be treated: either it is legal, /// needs to be promoted to a larger size, needs to be expanded to some other /// code sequence, or the target has a custom expander for it. diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 2b5ced3915a2c..371d08b47aa85 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -74,11 +74,54 @@ class FRemExpander { /// Constant 1 of type \p ExTy. Value *One; + /// The frem argument/return types that can be expanded by this class. + // TODO The expansion could work for other floating point types + // as well, but this would require additional testing. + static constexpr std::array ExpandableTypes{MVT::f16, MVT::f32, + MVT::f64}; + + /// Libcalls for frem instructions of the type at the corresponding + /// positions of ExpandableTypes. + static constexpr std::array FremLibcalls{ + RTLIB::REM_F32, RTLIB::REM_F32, RTLIB::REM_F64}; + + /// Return the Libcall for frem instructions of expandable type \p VT or + /// std::nullopt if \p VT is not expandable. + static std::optional getFremLibcallForType(EVT VT) { + MVT V = VT.getSimpleVT(); + for (unsigned I = 0; I < ExpandableTypes.size(); I++) + if (ExpandableTypes[I] == V) + return FremLibcalls[I]; + + return {}; + }; + public: static bool canExpandType(Type *Ty) { - // TODO The expansion should work for other floating point types - // as well, but this would require additional testing. - return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty(); + EVT VT = EVT::getEVT(Ty); + assert(VT.isSimple() && "Can expand only simple types"); + + return is_contained(ExpandableTypes, VT.getSimpleVT()); + } + + static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) { + assert(!VT.isVector() && "Cannot handle vector type; must scalarize first"); + return (TLI.getOperationAction(ISD::FREM, VT) == + TargetLowering::LegalizeAction::Expand); + } + + static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) { + // Consider scalar type for simplicity. It seems unlikely that a + // vector type can be legalized without expansion if the scalar + // type cannot. + return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType())); + } + + /// Return true if the pass should expand "frem" instructions of some any for + /// the target represented by \p TLI. + static bool shouldExpandAnyFremType(const TargetLowering &TLI) { + return any_of(ExpandableTypes, + [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); }); } static FRemExpander create(IRBuilder<> &B, Type *Ty) { @@ -952,36 +995,6 @@ static void scalarize(Instruction *I, I->eraseFromParent(); } -// This covers all floating point types; more than we need here. -// TODO Move somewhere else for general use? -/// Return the Libcall for a frem instruction of -/// type \p Ty. -static RTLIB::Libcall fremToLibcall(Type *Ty) { - assert(Ty->isFloatingPointTy()); - if (Ty->isFloatTy() || Ty->is16bitFPTy()) - return RTLIB::REM_F32; - if (Ty->isDoubleTy()) - return RTLIB::REM_F64; - if (Ty->isFP128Ty()) - return RTLIB::REM_F128; - if (Ty->isX86_FP80Ty()) - return RTLIB::REM_F80; - if (Ty->isPPC_FP128Ty()) - return RTLIB::REM_PPCF128; - - llvm_unreachable("Unknown floating point type"); -} - -/* Return true if, according to \p LibInfo, the target either directly - supports the frem instruction for the \p Ty, has a custom lowering, - or uses a libcall. */ -static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { - if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty))) - return true; - - return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); -} - static void addToWorklist(Instruction &I, SmallVector &Worklist) { if (I.getOperand(0)->getType()->isVectorTy()) @@ -999,7 +1012,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI, if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) MaxLegalFpConvertBitWidth = ExpandFpConvertBits; - if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) + bool DisableExpandLargeFp = + MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS; + bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI); + + if (DisableExpandLargeFp && DisableFrem) return false; auto ShouldHandleInst = [&](Instruction &I) { @@ -1010,21 +1027,17 @@ static bool runImpl(Function &F, const TargetLowering &TLI, switch (I.getOpcode()) { case Instruction::FRem: - return !targetSupportsFrem(TLI, Ty) && - FRemExpander::canExpandType(Ty->getScalarType()); - + return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty); case Instruction::FPToUI: - case Instruction::FPToSI: { - auto *IntTy = cast(Ty->getScalarType()); - return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth; - } - + case Instruction::FPToSI: + return !DisableExpandLargeFp && + cast(Ty->getScalarType())->getIntegerBitWidth() > + MaxLegalFpConvertBitWidth; case Instruction::UIToFP: - case Instruction::SIToFP: { - auto *IntTy = - cast(I.getOperand(0)->getType()->getScalarType()); - return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth; - } + case Instruction::SIToFP: + return !DisableExpandLargeFp && + cast(I.getOperand(0)->getType()->getScalarType()) + ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth; } return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f7c089a..0d43d100c5a42 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4809,7 +4809,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { EVT VT = N->getValueType(0); EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && - TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType()) || + TLI.isOperationLibCall(N->getOpcode(), VT.getScalarType()))) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); if (N->getNumValues() > 1) ReplaceOtherWidenResults(N, Res.getNode(), ResNo); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 662d84b7a60a8..5c390fecb99b6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -534,9 +534,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f80, Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); + setOperationAction(ISD::FREM, MVT::f64, LibCall); + setOperationAction(ISD::FREM, MVT::f80, LibCall); setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); @@ -559,7 +559,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMUL, MVT::f128, LibCall); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, LibCall); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); @@ -734,8 +734,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom); setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom); - setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index bdaf48652d107..89fb427d64f0a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -51,7 +51,6 @@ class AMDGPUTargetLowering : public TargetLowering { /// Split a vector store into multiple scalar stores. /// \returns The resulting chain. - SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 35e1127000b8a..71e6d8314c13e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -895,7 +895,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FMUL, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FDIV, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, LibCall); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); setOperationAction(ISD::FNEG, MVT::f64, Expand); @@ -1260,8 +1260,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, LibCall); + setOperationAction(ISD::FREM, MVT::f32, LibCall); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index e5b4f6eeb7b73..46b51e43e41d3 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -117,15 +117,15 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM, }; ISD::NodeType FPOpToExpand[] = { - ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, - ISD::FREM, ISD::FCOPYSIGN, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; + ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, + ISD::FCOPYSIGN, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; if (STI.useHardFloat()) { MVT AllVTy[] = {MVT::f32, MVT::f64}; for (auto VT : AllVTy) { - setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM, VT, LibCall); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BR_CC, VT, Expand); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 9f7f434b66fa1..8f5d341c77aaa 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1661,13 +1661,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::integer_valuetypes()) setOperationAction(IntExpOp, VT, Expand); } + for (MVT VT : MVT::fp_valuetypes()) + for (unsigned FPExpOp : {ISD::FDIV, ISD::FSQRT, ISD::FSIN, ISD::FCOS, + ISD::FSINCOS, ISD::FPOW, ISD::FCOPYSIGN}) { - for (unsigned FPExpOp : - {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, - ISD::FPOW, ISD::FCOPYSIGN}) { - for (MVT VT : MVT::fp_valuetypes()) setOperationAction(FPExpOp, VT, Expand); - } + for (MVT VT : MVT::fp_valuetypes()) + setOperationAction(ISD::FREM, VT, LibCall); + } // No extending loads from i32. for (MVT VT : MVT::integer_valuetypes()) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index f7deeafc9ccfc..b454758179fca 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -235,7 +235,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); setOperationAction(ISD::FP16_TO_FP, MVT::f32, Subtarget.isSoftFPABI() ? LibCall : Custom); setOperationAction(ISD::FP_TO_FP16, MVT::f32, @@ -283,7 +283,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, LibCall); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Subtarget.isSoftFPABI() ? LibCall : Custom); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 2fd73275721b1..4c746bc39b24b 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -467,8 +467,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FEXP, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); + setOperationAction(ISD::FREM, MVT::f64, LibCall); // Lower f16 conversion operations into library calls setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8bf0d118da575..96ce29ed2ea4f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -328,7 +328,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); - setOperationAction(ISD::FREM, MVT::ppcf128, Expand); + setOperationAction(ISD::FREM, MVT::ppcf128, LibCall); // PowerPC has no SREM/UREM instructions unless we are on P9 // On P9 we may use a hardware instruction to compute the remainder. @@ -403,12 +403,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, LibCall); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); setOperationAction(ISD::FPOW , MVT::f32, Expand); // MASS transformation for LLVM intrinsics with replicating fast-math flag @@ -1208,7 +1208,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FPOWI, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, LibCall); } if (Subtarget.hasP8Altivec()) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 169465e18f103..ea4c8de932089 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -460,9 +460,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; - static const unsigned FPOpToExpand[] = { - ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, - ISD::FREM}; + static const unsigned FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, + ISD::FPOW}; + static const unsigned FPOpToLibCall[] = {ISD::FREM}; static const unsigned FPRndMode[] = { ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, @@ -558,6 +558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(FPOpToExpand, MVT::f32, Expand); + setOperationAction(FPOpToLibCall, MVT::f32, LibCall); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); @@ -616,6 +617,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setOperationAction(FPOpToExpand, MVT::f64, Expand); + setOperationAction(FPOpToLibCall, MVT::f64, LibCall); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index cbb7db68f7e7c..f776ed0756fe2 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1795,18 +1795,18 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSIN , MVT::f128, Expand); setOperationAction(ISD::FCOS , MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FREM , MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, LibCall); setOperationAction(ISD::FMA , MVT::f128, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, LibCall); setOperationAction(ISD::FMA, MVT::f64, Subtarget->isUA2007() ? Legal : Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); setOperationAction(ISD::FMA, MVT::f32, Subtarget->isUA2007() ? Legal : Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index de28faf4908e9..3806c6d35c6f0 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -587,7 +587,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM, VT, LibCall); setOperationAction(ISD::FPOW, VT, Expand); // Special treatment. diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index a068138791cb4..6bac663177a40 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -229,7 +229,7 @@ void VETargetLowering::initSPUActions() { // VE doesn't have following floating point operations. for (MVT VT : MVT::fp_valuetypes()) { setOperationAction(ISD::FNEG, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM, VT, LibCall); } // VE doesn't have fdiv of f128. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index f9739492e7fe3..caf37c2c19a5c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -137,9 +137,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : - {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA}) setOperationAction(Op, T, Expand); + setOperationAction(ISD::FREM, T, LibCall); // Note supported floating-point library function operators that otherwise // default to expand. for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b5f8ee50cba3d..e1d0642da260f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -388,10 +388,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - setOperationAction(ISD::FREM , MVT::f32 , Expand); - setOperationAction(ISD::FREM , MVT::f64 , Expand); - setOperationAction(ISD::FREM , MVT::f80 , Expand); - setOperationAction(ISD::FREM , MVT::f128 , Expand); + setOperationAction(ISD::FREM, MVT::f32, LibCall); + setOperationAction(ISD::FREM, MVT::f64, LibCall); + setOperationAction(ISD::FREM, MVT::f80, LibCall); + setOperationAction(ISD::FREM, MVT::f128, LibCall); if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) { setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom); @@ -2620,7 +2620,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::FTANH, ISD::STRICT_FTANH, // TODO: Add ISD:::STRICT_FMODF too once implemented. ISD::FMODF}) - if (isOperationExpand(Op, MVT::f32)) + if (isOperationExpand(Op, MVT::f32) + || isOperationLibCall(Op, MVT::f32)) setOperationAction(Op, MVT::f32, Promote); // clang-format on diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index c211777e69894..e57171fbab16c 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -216,7 +216,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FREM, VT, LibCall); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index c208d03ff94b7..f4721f1468ddf 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -478,7 +478,7 @@ define void @frem() { ; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F128 = frem fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F32 = frem float undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index 76f80da55af64..461ad9d7d253b 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -278,9 +278,9 @@ define void @i64() { define void @f16() { ; CHECK-NEON-LABEL: 'f16' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem half undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f16' @@ -306,9 +306,9 @@ define void @f16() { ; ; CHECK-V8R-LABEL: 'f16' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem half undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv half undef, undef @@ -321,9 +321,9 @@ define void @f16() { define void @f32() { ; CHECK-NEON-LABEL: 'f32' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem float undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f32' @@ -349,9 +349,9 @@ define void @f32() { ; ; CHECK-V8R-LABEL: 'f32' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem float undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv float undef, undef @@ -364,9 +364,9 @@ define void @f32() { define void @f64() { ; CHECK-NEON-LABEL: 'f64' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem double undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f64' @@ -392,9 +392,9 @@ define void @f64() { ; ; CHECK-V8R-LABEL: 'f64' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem double undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv double undef, undef @@ -867,11 +867,11 @@ define void @vi64() { define void @vf16() { ; CHECK-NEON-LABEL: 'vf16' ; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf16' @@ -903,11 +903,11 @@ define void @vf16() { ; ; CHECK-V8R-LABEL: 'vf16' ; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x half> undef, undef @@ -922,11 +922,11 @@ define void @vf16() { define void @vf32() { ; CHECK-NEON-LABEL: 'vf32' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf32' @@ -958,11 +958,11 @@ define void @vf32() { ; ; CHECK-V8R-LABEL: 'vf32' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x float> undef, undef @@ -977,11 +977,11 @@ define void @vf32() { define void @vf64() { ; CHECK-NEON-LABEL: 'vf64' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf64' @@ -1013,11 +1013,11 @@ define void @vf64() { ; ; CHECK-V8R-LABEL: 'vf64' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x double> undef, undef @@ -1492,11 +1492,11 @@ define void @vi64_2() { define void @vf16_2() { ; CHECK-NEON-LABEL: 'vf16_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' @@ -1528,11 +1528,11 @@ define void @vf16_2() { ; ; CHECK-V8R-LABEL: 'vf16_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x half> undef, @@ -1547,11 +1547,11 @@ define void @vf16_2() { define void @vf32_2() { ; CHECK-NEON-LABEL: 'vf32_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' @@ -1583,11 +1583,11 @@ define void @vf32_2() { ; ; CHECK-V8R-LABEL: 'vf32_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x float> undef, @@ -1602,11 +1602,11 @@ define void @vf32_2() { define void @vf64_2() { ; CHECK-NEON-LABEL: 'vf64_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' @@ -1638,11 +1638,11 @@ define void @vf64_2() { ; ; CHECK-V8R-LABEL: 'vf64_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x double> undef, diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index 673bf38d44876..03bdd439ba2fc 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -1248,35 +1248,35 @@ define void @fdiv_f16() { define void @frem() { ; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F32 = frem <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8F32 = frem <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16F32 = frem <16 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F64 = frem <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4F64 = frem <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8F64 = frem <8 x double> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call @llvm.vp.frem.nxv1f32( poison, poison, poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call @llvm.vp.frem.nxv2f32( poison, poison, poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call @llvm.vp.frem.nxv4f32( poison, poison, poison, i32 poison) @@ -1340,24 +1340,24 @@ define void @frem() { define void @frem_bf16() { ; ZVFH-LABEL: 'frem_bf16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call @llvm.vp.frem.nxv1bf16( poison, poison, poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call @llvm.vp.frem.nxv2bf16( poison, poison, poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call @llvm.vp.frem.nxv4bf16( poison, poison, poison, i32 poison) @@ -1366,24 +1366,24 @@ define void @frem_bf16() { ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'frem_bf16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call @llvm.vp.frem.nxv1bf16( poison, poison, poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call @llvm.vp.frem.nxv2bf16( poison, poison, poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call @llvm.vp.frem.nxv4bf16( poison, poison, poison, i32 poison) @@ -1392,24 +1392,24 @@ define void @frem_bf16() { ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; NO-ZFHMIN-LABEL: 'frem_bf16' -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call @llvm.vp.frem.nxv1bf16( poison, poison, poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call @llvm.vp.frem.nxv2bf16( poison, poison, poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call @llvm.vp.frem.nxv4bf16( poison, poison, poison, i32 poison) @@ -1502,24 +1502,24 @@ define void @frem_f16() { ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; NO-ZFHMIN-LABEL: 'frem_f16' -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F16 = frem half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = frem <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16 = frem <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F16 = frem <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F16 = frem <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = frem <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32F16 = frem <32 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call @llvm.vp.frem.nxv1f16( poison, poison, poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call @llvm.vp.frem.nxv2f16( poison, poison, poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call @llvm.vp.frem.nxv4f16( poison, poison, poison, i32 poison) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index a7a88b80b6670..8f9a47c7ef930 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -625,80 +625,80 @@ define i32 @fdiv(i32 %arg) { define i32 @frem(i32 %arg) { ; SSE1-LABEL: 'frem' -; SSE1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE1-NEXT: Cost Model: Found costs of 4 for: %V2F64 = frem <2 x double> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SSE2-LABEL: 'frem' -; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SSE42-LABEL: 'frem' -; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; AVX-LABEL: 'frem' -; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:46 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:78 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; AVX512-LABEL: 'frem' -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:79 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SLM-LABEL: 'frem' -; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; GLM-LABEL: 'frem' -; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %F32 = frem float undef, undef