From 4845052677d3b2a08ff56302404317397ae19ed7 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Fri, 12 Sep 2025 06:52:05 -0400
Subject: [PATCH 01/17] [AMDGPU] expand-fp: Change frem expansion criterion

The existing condition for checking whether or not to expand an frem
instruction in the pass is not sufficiently precise.  Right now, it is
sufficient to ensure the correct working of the pass. But this is only
true in conjunction with the existing check for the
MaxLegalFpConvertBitWidth value which happens to exit early on targets
on which the frem condition is insufficient.

The correct working of the pass should not rely on this interaction.
The possibility of using the pass for handling further
expansions:(e.g. merging the very similar ExpandLargDivRem into it) is
also limited by this.

This patch changes the pass to expand frem for a target iff the
target's legalization action for the instruction with the scalar type
corresponding to the instruction type is LibCall but the libcall does
not exist. The legalization action for frem in the AMDGPU backend is
adjusted accordingly.
---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 37 +++++++++++++------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  2 +-
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 9cc6c6a706c58..6f4f049cc7f8e 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -979,14 +979,22 @@ static RTLIB::Libcall fremToLibcall(Type *Ty) {
   llvm_unreachable("Unknown floating point type");
 }
 
-/* Return true if, according to \p LibInfo, the target either directly
-   supports the frem instruction for the \p Ty, has a custom lowering,
-   or uses a libcall. */
-static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
-  if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty)))
-    return true;
-
-  return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
+/// Return true if the pass should expand a "frem" instruction of the
+/// given \p Ty for the target represented by \p TLI. Expansion
+/// should happen if the legalization for the scalar type uses a
+/// non-existing libcall. The scalar type is considered because it is
+/// easier to do so and it is highly unlikely that a vector type can
+/// be legalized without a libcall if the scalar type cannot.
+static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
+  Type *ScalarTy = Ty->getScalarType();
+  EVT VT = EVT::getEVT(ScalarTy);
+
+  TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
+  if (LA != TargetLowering::LegalizeAction::LibCall)
+    return false;
+
+  bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
+  return MissingLibcall && FRemExpander::canExpandType(ScalarTy);
 }
 
 static bool runImpl(Function &F, const TargetLowering &TLI,
@@ -1000,8 +1008,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 
-  if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
-    return false;
+  bool TargetSkipExpandLargeFp =
+      MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
 
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
@@ -1011,8 +1019,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
       if (Ty->isScalableTy())
         continue;
 
-      if (targetSupportsFrem(TLI, Ty) ||
-          !FRemExpander::canExpandType(Ty->getScalarType()))
+      if (!shouldExpandFremType(TLI, Ty))
         continue;
 
       Replace.push_back(&I);
@@ -1022,6 +1029,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::FPToUI:
     case Instruction::FPToSI: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
@@ -1039,6 +1049,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::UIToFP:
     case Instruction::SIToFP: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5c9b616e9bc21..3892d7949a0fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);

From 534b3e2f0787cfa2c13e0d8cf6356db12db27741 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Fri, 12 Sep 2025 09:21:27 -0400
Subject: [PATCH 02/17] Revert Operation Action for frem to Expand

---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 3 ++-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 6f4f049cc7f8e..fd376d54ad753 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -990,7 +990,8 @@ static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
   EVT VT = EVT::getEVT(ScalarTy);
 
   TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-  if (LA != TargetLowering::LegalizeAction::LibCall)
+  if (LA != TargetLowering::LegalizeAction::LibCall &&
+      LA != TargetLowering::LegalizeAction::Expand)
     return false;
 
   bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3892d7949a0fc..5c9b616e9bc21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);

From 61ca19c0ca9b9c42bd9bd5ddbb1be175d0403307 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 16 Sep 2025 05:41:22 -0400
Subject: [PATCH 03/17] [AMDGPU] expand-fp: Add early exit for targets that
 don't require any expansion

This commit adds a function to check if a target needs expansion
of any type of frem instructions. In conjunction with the trivial
check if the large fp conversion expansions are necessary, this
can be used to perform any early exit from the pass if no
expansions are needed for a target.
---
 llvm/lib/CodeGen/ExpandFp.cpp | 112 ++++++++++++++++++++--------------
 1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index fd376d54ad753..0d0925472a6a8 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -74,11 +74,63 @@ class FRemExpander {
   /// Constant 1 of type \p ExTy.
   Value *One;
 
+  /// The frem argument/return types that can be expanded by this class.
+  // TODO The expansion could work for other floating point types
+  // as well, but this would require additional testing.
+  inline static const SmallVector<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
+                                                          MVT::f64};
+
+  /// Libcalls for frem instructions of the type at the corresponding
+  /// positions of ExpandableTypes.
+  inline static const SmallVector<RTLIB::Libcall, 3> FremLibcalls{
+      RTLIB::REM_F32, RTLIB::REM_F32, RTLIB::REM_F64};
+
+  /// Return the Libcall for frem instructions of expandable type \p VT or
+  /// std::nullopt if \p VT is not expandable.
+  static std::optional<RTLIB::Libcall> getFremLibcallForType(EVT VT) {
+    auto *It = find(ExpandableTypes, VT.getSimpleVT());
+    if (It == ExpandableTypes.end())
+      return {};
+
+    return FremLibcalls[It - ExpandableTypes.begin()];
+  };
+
 public:
   static bool canExpandType(Type *Ty) {
-    // TODO The expansion should work for other floating point types
-    // as well, but this would require additional testing.
-    return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();
+    EVT VT = EVT::getEVT(Ty);
+    assert(VT.isSimple() && "Can expand only simple types");
+
+    return find(ExpandableTypes, VT.getSimpleVT());
+  }
+
+  /// Return true if the pass should expand a frem instruction of the
+  /// given \p Ty for the target represented by \p TLI. Expansion
+  /// should happen if the legalization for the scalar type uses a
+  /// non-existing libcall.
+  static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
+    TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
+    if (LA != TargetLowering::LegalizeAction::LibCall &&
+        LA != TargetLowering::LegalizeAction::Expand)
+      return false;
+
+    auto Libcall = getFremLibcallForType(VT);
+    bool MissingLibcall = Libcall.has_value() && !TLI.getLibcallName(*Libcall);
+    return MissingLibcall;
+  }
+
+  static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
+    // Consider scalar type for simplicity.
+    // It is very unlikely that a vector type can be legalized without a libcall
+    // if the scalar type cannot.
+    return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
+  }
+
+  /// Return true if the pass should expand "frem" instructions of some any for
+  /// the target represented by \p TLI.
+  static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
+    return std::any_of(
+        ExpandableTypes.begin(), ExpandableTypes.end(),
+        [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
   }
 
   static FRemExpander create(IRBuilder<> &B, Type *Ty) {
@@ -959,45 +1011,6 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
   I->eraseFromParent();
 }
 
-// This covers all floating point types; more than we need here.
-// TODO Move somewhere else for general use?
-/// Return the Libcall for a frem instruction of
-/// type \p Ty.
-static RTLIB::Libcall fremToLibcall(Type *Ty) {
-  assert(Ty->isFloatingPointTy());
-  if (Ty->isFloatTy() || Ty->is16bitFPTy())
-    return RTLIB::REM_F32;
-  if (Ty->isDoubleTy())
-    return RTLIB::REM_F64;
-  if (Ty->isFP128Ty())
-    return RTLIB::REM_F128;
-  if (Ty->isX86_FP80Ty())
-    return RTLIB::REM_F80;
-  if (Ty->isPPC_FP128Ty())
-    return RTLIB::REM_PPCF128;
-
-  llvm_unreachable("Unknown floating point type");
-}
-
-/// Return true if the pass should expand a "frem" instruction of the
-/// given \p Ty for the target represented by \p TLI. Expansion
-/// should happen if the legalization for the scalar type uses a
-/// non-existing libcall. The scalar type is considered because it is
-/// easier to do so and it is highly unlikely that a vector type can
-/// be legalized without a libcall if the scalar type cannot.
-static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
-  Type *ScalarTy = Ty->getScalarType();
-  EVT VT = EVT::getEVT(ScalarTy);
-
-  TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-  if (LA != TargetLowering::LegalizeAction::LibCall &&
-      LA != TargetLowering::LegalizeAction::Expand)
-    return false;
-
-  bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
-  return MissingLibcall && FRemExpander::canExpandType(ScalarTy);
-}
-
 static bool runImpl(Function &F, const TargetLowering &TLI,
                     AssumptionCache *AC) {
   SmallVector<Instruction *, 4> Replace;
@@ -1009,18 +1022,25 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 
-  bool TargetSkipExpandLargeFp =
+  bool DisableExpandLargeFp =
       MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
+  bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
+
+  if (DisableExpandLargeFp && DisableFrem)
+    return false;
 
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
     case Instruction::FRem: {
+      if (DisableFrem)
+        continue;
+
       Type *Ty = I.getType();
       // TODO: This pass doesn't handle scalable vectors.
       if (Ty->isScalableTy())
         continue;
 
-      if (!shouldExpandFremType(TLI, Ty))
+      if (!FRemExpander::shouldExpandFremType(TLI, Ty))
         continue;
 
       Replace.push_back(&I);
@@ -1030,7 +1050,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::FPToUI:
     case Instruction::FPToSI: {
-      if (TargetSkipExpandLargeFp)
+      if (DisableExpandLargeFp)
         continue;
 
       // TODO: This pass doesn't handle scalable vectors.
@@ -1050,7 +1070,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::UIToFP:
     case Instruction::SIToFP: {
-      if (TargetSkipExpandLargeFp)
+      if (DisableExpandLargeFp)
         continue;
 
       // TODO: This pass doesn't handle scalable vectors.

From c1814f0b0745bf1ef61594320df8df7b8f73b6d4 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 16 Sep 2025 11:01:07 -0400
Subject: [PATCH 04/17] Review changes

* Use constexpr std::array instead of inline const SmallVector.
* shouldExpandFremType
  - Remove LibCall action handling
  - Add assert to document that vectors are not handled
  - additionally: Inline variable
---
 llvm/lib/CodeGen/ExpandFp.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 0d0925472a6a8..076fd266d3762 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -77,12 +77,12 @@ class FRemExpander {
   /// The frem argument/return types that can be expanded by this class.
   // TODO The expansion could work for other floating point types
   // as well, but this would require additional testing.
-  inline static const SmallVector<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
-                                                          MVT::f64};
+  static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
+                                                      MVT::f64};
 
   /// Libcalls for frem instructions of the type at the corresponding
   /// positions of ExpandableTypes.
-  inline static const SmallVector<RTLIB::Libcall, 3> FremLibcalls{
+  static constexpr std::array<RTLIB::Libcall, 3> FremLibcalls{
       RTLIB::REM_F32, RTLIB::REM_F32, RTLIB::REM_F64};
 
   /// Return the Libcall for frem instructions of expandable type \p VT or
@@ -108,14 +108,14 @@ class FRemExpander {
   /// should happen if the legalization for the scalar type uses a
   /// non-existing libcall.
   static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
+    assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
+
     TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-    if (LA != TargetLowering::LegalizeAction::LibCall &&
-        LA != TargetLowering::LegalizeAction::Expand)
+    if (LA != TargetLowering::LegalizeAction::Expand)
       return false;
 
     auto Libcall = getFremLibcallForType(VT);
-    bool MissingLibcall = Libcall.has_value() && !TLI.getLibcallName(*Libcall);
-    return MissingLibcall;
+    return Libcall.has_value() && !TLI.getLibcallName(*Libcall);;
   }
 
   static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {

From a752a2f3888f1c6a87c796b0f0e899205bfce274 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 16 Sep 2025 11:09:46 -0400
Subject: [PATCH 05/17] fixup! Review changes

---
 llvm/lib/CodeGen/ExpandFp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 076fd266d3762..acef7e087a907 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -115,7 +115,7 @@ class FRemExpander {
       return false;
 
     auto Libcall = getFremLibcallForType(VT);
-    return Libcall.has_value() && !TLI.getLibcallName(*Libcall);;
+    return Libcall.has_value() && !TLI.getLibcallName(*Libcall);
   }
 
   static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {

From 307252aa5db6afae0af9baf4fe7f064038088c12 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 16 Sep 2025 12:02:21 -0400
Subject: [PATCH 06/17] Try fix Windows build problem

The compiler in the Windows build failed to deduced the type in the auto.
---
 llvm/lib/CodeGen/ExpandFp.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index acef7e087a907..5904ee82df831 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -88,11 +88,12 @@ class FRemExpander {
   /// Return the Libcall for frem instructions of expandable type \p VT or
   /// std::nullopt if \p VT is not expandable.
   static std::optional<RTLIB::Libcall> getFremLibcallForType(EVT VT) {
-    auto *It = find(ExpandableTypes, VT.getSimpleVT());
-    if (It == ExpandableTypes.end())
-      return {};
+    MVT V = VT.getSimpleVT();
+    for (unsigned I = 0; I < ExpandableTypes.size(); I++)
+      if (ExpandableTypes[I] == V)
+        return FremLibcalls[I];
 
-    return FremLibcalls[It - ExpandableTypes.begin()];
+    return {};
   };
 
 public:

From 4728696cf569ee95b140bba738a831d35dbaa520 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 16 Sep 2025 13:04:49 -0400
Subject: [PATCH 07/17] Furhter fixup for Windows build

---
 llvm/lib/CodeGen/ExpandFp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 5904ee82df831..291a6447fe36c 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -101,7 +101,7 @@ class FRemExpander {
     EVT VT = EVT::getEVT(Ty);
     assert(VT.isSimple() && "Can expand only simple types");
 
-    return find(ExpandableTypes, VT.getSimpleVT());
+    return find(ExpandableTypes, VT.getSimpleVT()) != ExpandableTypes.end();
   }
 
   /// Return true if the pass should expand a frem instruction of the

From 77d862b3cae814fd87dbe854a2d4a9a7ba0a9f87 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Thu, 2 Oct 2025 09:38:52 -0400
Subject: [PATCH 08/17] Change ISD::FREM legalization actions from Expand to
 LibCall for scalar types

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   4 +
 .../SelectionDAG/LegalizeVectorOps.cpp        |   1 -
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   3 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  10 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |   1 -
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   6 +-
 llvm/lib/Target/CSKY/CSKYISelLowering.cpp     |   6 +-
 .../Target/Hexagon/HexagonISelLowering.cpp    |  11 +-
 .../LoongArch/LoongArchISelLowering.cpp       |   4 +-
 llvm/lib/Target/Mips/MipsISelLowering.cpp     |   4 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   8 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  16 ++-
 llvm/lib/Target/Sparc/SparcISelLowering.cpp   |   6 +-
 .../Target/SystemZ/SystemZISelLowering.cpp    |   2 +-
 llvm/lib/Target/VE/VEISelLowering.cpp         |   2 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   |   4 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  11 +-
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp |   2 +-
 .../Analysis/CostModel/AArch64/arith-fp.ll    |   2 +-
 llvm/test/Analysis/CostModel/ARM/divrem.ll    |  96 ++++++-------
 .../test/Analysis/CostModel/RISCV/arith-fp.ll | 136 +++++++++---------
 llvm/test/Analysis/CostModel/X86/arith-fp.ll  | 112 +++++++--------
 22 files changed, 227 insertions(+), 220 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2ba8b29e775e0..cb640c723d28a 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1476,6 +1476,10 @@ class LLVM_ABI TargetLoweringBase {
            getOperationAction(Op, VT) == Legal;
   }
 
+  bool isOperationLibCall(unsigned Op, EVT VT) const {
+    return getOperationAction(Op, VT) == LibCall;
+  }
+
   /// Return how this load with extension should be treated: either it is legal,
   /// needs to be promoted to a larger size, needs to be expanded to some other
   /// code sequence, or the target has a custom expander for it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8e423c4f83b38..81cf570e38b9a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -17,7 +17,6 @@
 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
 // the operation must be unrolled, which introduces nodes with the illegal
 // type i8 which must be promoted.
-//
 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
 // or operations that happen to take a vector which are custom-lowered;
 // the legalization for such operations never produces nodes
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ff7cd665446cc..e0fe5142f37f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4809,7 +4809,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     EVT VT = N->getValueType(0);
     EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
     if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
-        TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+        (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType()) ||
+         TLI.isOperationLibCall(N->getOpcode(), VT.getScalarType()))) {
       Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
       if (N->getNumValues() > 1)
         ReplaceOtherWidenResults(N, Res.getNode(), ResNo);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fa55a58d05e76..f61e5fd51ea5b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -534,9 +534,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
   setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 
-  setOperationAction(ISD::FREM, MVT::f32, Expand);
-  setOperationAction(ISD::FREM, MVT::f64, Expand);
-  setOperationAction(ISD::FREM, MVT::f80, Expand);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
+  setOperationAction(ISD::FREM, MVT::f80, LibCall);
 
   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 
@@ -559,7 +559,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FMUL, MVT::f128, LibCall);
   setOperationAction(ISD::FNEG, MVT::f128, Expand);
   setOperationAction(ISD::FPOW, MVT::f128, Expand);
-  setOperationAction(ISD::FREM, MVT::f128, Expand);
+  setOperationAction(ISD::FREM, MVT::f128, LibCall);
   setOperationAction(ISD::FRINT, MVT::f128, Expand);
   setOperationAction(ISD::FSIN, MVT::f128, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
@@ -734,8 +734,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
   setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom);
 
-  setOperationAction(ISD::FSIN, MVT::f32, Expand);
   setOperationAction(ISD::FSIN, MVT::f64, Expand);
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
   setOperationAction(ISD::FCOS, MVT::f32, Expand);
   setOperationAction(ISD::FCOS, MVT::f64, Expand);
   setOperationAction(ISD::FPOW, MVT::f32, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..89fb427d64f0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -51,7 +51,6 @@ class AMDGPUTargetLowering : public TargetLowering {
   /// Split a vector store into multiple scalar stores.
   /// \returns The resulting chain.
 
-  SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4af2721562d7c..ec5c6df0bb7fb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -893,7 +893,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
     setOperationAction(ISD::FMUL,       MVT::f64, Expand);
     setOperationAction(ISD::FMA,        MVT::f64, Expand);
     setOperationAction(ISD::FDIV,       MVT::f64, Expand);
-    setOperationAction(ISD::FREM,       MVT::f64, Expand);
+    setOperationAction(ISD::FREM,       MVT::f64, LibCall);
     setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
     setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
     setOperationAction(ISD::FNEG,       MVT::f64, Expand);
@@ -1258,8 +1258,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
-  setOperationAction(ISD::FREM,      MVT::f64, Expand);
-  setOperationAction(ISD::FREM,      MVT::f32, Expand);
+  setOperationAction(ISD::FREM,      MVT::f64, LibCall);
+  setOperationAction(ISD::FREM,      MVT::f32, LibCall);
   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index e5b4f6eeb7b73..46b51e43e41d3 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -117,15 +117,15 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
   };
 
   ISD::NodeType FPOpToExpand[] = {
-      ISD::FSIN, ISD::FCOS,      ISD::FSINCOS,    ISD::FPOW,
-      ISD::FREM, ISD::FCOPYSIGN, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
+      ISD::FSIN,      ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
+      ISD::FCOPYSIGN, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
 
   if (STI.useHardFloat()) {
 
     MVT AllVTy[] = {MVT::f32, MVT::f64};
 
     for (auto VT : AllVTy) {
-      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FREM, VT, LibCall);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
       setOperationAction(ISD::BR_CC, VT, Expand);
 
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c54b67ccd8843..e1025e8062047 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1661,13 +1661,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     for (MVT VT : MVT::integer_valuetypes())
       setOperationAction(IntExpOp, VT, Expand);
   }
+  for (MVT VT : MVT::fp_valuetypes())
+    for (unsigned FPExpOp : {ISD::FDIV, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
+                             ISD::FSINCOS, ISD::FPOW, ISD::FCOPYSIGN}) {
 
-  for (unsigned FPExpOp :
-       {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
-        ISD::FPOW, ISD::FCOPYSIGN}) {
-    for (MVT VT : MVT::fp_valuetypes())
       setOperationAction(FPExpOp, VT, Expand);
-  }
+      for (MVT VT : MVT::fp_valuetypes())
+        setOperationAction(ISD::FREM, VT, LibCall);
+    }
 
   // No extending loads from i32.
   for (MVT VT : MVT::integer_valuetypes()) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 55238a2819363..35b6b655e9ad9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -205,7 +205,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOS, MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
     setOperationAction(ISD::FPOW, MVT::f32, Expand);
-    setOperationAction(ISD::FREM, MVT::f32, Expand);
+    setOperationAction(ISD::FREM, MVT::f32, LibCall);
     setOperationAction(ISD::FP16_TO_FP, MVT::f32,
                        Subtarget.isSoftFPABI() ? LibCall : Custom);
     setOperationAction(ISD::FP_TO_FP16, MVT::f32,
@@ -252,7 +252,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOS, MVT::f64, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
     setOperationAction(ISD::FPOW, MVT::f64, Expand);
-    setOperationAction(ISD::FREM, MVT::f64, Expand);
+    setOperationAction(ISD::FREM, MVT::f64, LibCall);
     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f64,
                        Subtarget.isSoftFPABI() ? LibCall : Custom);
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 1491300e37d3e..b3ede9fb4ce5c 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -467,8 +467,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
   setOperationAction(ISD::FMA,               MVT::f32,   Expand);
   setOperationAction(ISD::FMA,               MVT::f64,   Expand);
-  setOperationAction(ISD::FREM,              MVT::f32,   Expand);
-  setOperationAction(ISD::FREM,              MVT::f64,   Expand);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
 
   // Lower f16 conversion operations into library calls
   setOperationAction(ISD::FP16_TO_FP,        MVT::f32,   Expand);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa104e4f69d7f..7b0efc8ca4729 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -328,7 +328,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
-  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
 
   // PowerPC has no SREM/UREM instructions unless we are on P9
   // On P9 we may use a hardware instruction to compute the remainder.
@@ -403,12 +403,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 
   // MASS transformation for LLVM intrinsics with replicating fast-math flag
@@ -1204,7 +1204,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FCOS, MVT::f128, Expand);
       setOperationAction(ISD::FPOW, MVT::f128, Expand);
       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
-      setOperationAction(ISD::FREM, MVT::f128, Expand);
+      setOperationAction(ISD::FREM, MVT::f128, LibCall);
     }
 
     if (Subtarget.hasP8Altivec()) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9d90eb0a65218..e4c2c670fa0a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -466,9 +466,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
 
-  static const unsigned FPOpToExpand[] = {
-      ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
-      ISD::FREM};
+  static const unsigned FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+                                          ISD::FPOW};
+  static const unsigned FPOpToLibCall[] = {ISD::FREM};
 
   static const unsigned FPRndMode[] = {
       ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
@@ -536,10 +536,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(
         ISD::FNEARBYINT, MVT::f16,
         Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
-    setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
-                        ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
-                        ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
-                        ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
+    setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN,
+                        ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
+                        ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FLDEXP,
+                        ISD::FFREXP},
                        MVT::f16, Promote);
 
     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
@@ -564,6 +564,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SELECT, MVT::f32, Custom);
     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
     setOperationAction(FPOpToExpand, MVT::f32, Expand);
+    setOperationAction(FPOpToLibCall, MVT::f32, LibCall);
     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
@@ -622,6 +623,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
     setOperationAction(FPOpToExpand, MVT::f64, Expand);
+    setOperationAction(FPOpToLibCall, MVT::f64, LibCall);
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2737cca62cd20..bb6b01e53ca0b 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1793,18 +1793,18 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FSIN , MVT::f128, Expand);
   setOperationAction(ISD::FCOS , MVT::f128, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
-  setOperationAction(ISD::FREM , MVT::f128, Expand);
+  setOperationAction(ISD::FREM, MVT::f128, LibCall);
   setOperationAction(ISD::FMA  , MVT::f128, Expand);
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
   setOperationAction(ISD::FMA, MVT::f64,
                      Subtarget->isUA2007() ? Legal : Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
   setOperationAction(ISD::FMA, MVT::f32,
                      Subtarget->isUA2007() ? Legal : Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b7d11a318dc4..a81c73b5a804a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -585,7 +585,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FSIN, VT, Expand);
       setOperationAction(ISD::FCOS, VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
-      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FREM, VT, LibCall);
       setOperationAction(ISD::FPOW, VT, Expand);
 
       // Special treatment.
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 2cfdc751a55e0..521bd242d28c1 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -229,7 +229,7 @@ void VETargetLowering::initSPUActions() {
   // VE doesn't have following floating point operations.
   for (MVT VT : MVT::fp_valuetypes()) {
     setOperationAction(ISD::FNEG, VT, Expand);
-    setOperationAction(ISD::FREM, VT, Expand);
+    setOperationAction(ISD::FREM, VT, LibCall);
   }
 
   // VE doesn't have fdiv of f128.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index aea27ba32d37e..66e9e3c207974 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -137,9 +137,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
       setCondCodeAction(CC, T, Expand);
     // Expand floating-point library function operators.
-    for (auto Op :
-         {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
+    for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA})
       setOperationAction(Op, T, Expand);
+    setOperationAction(ISD::FREM, T, LibCall);
     // Note supported floating-point library function operators that otherwise
     // default to expand.
     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f81efdc6414aa..8c7993c6413ed 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -389,10 +389,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
 
-  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f128 , Expand);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
+  setOperationAction(ISD::FREM, MVT::f80, LibCall);
+  setOperationAction(ISD::FREM, MVT::f128, LibCall);
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
     setOperationAction(ISD::GET_ROUNDING   , MVT::i32  , Custom);
@@ -2621,7 +2621,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
           ISD::FTANH,  ISD::STRICT_FTANH,
           // TODO: Add ISD:::STRICT_FMODF too once implemented.
           ISD::FMODF})
-      if (isOperationExpand(Op, MVT::f32))
+      if (isOperationExpand(Op, MVT::f32)
+	  || isOperationLibCall(Op, MVT::f32))
         setOperationAction(Op, MVT::f32, Promote);
   // clang-format on
 
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index c211777e69894..e57171fbab16c 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -216,7 +216,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FSQRT, VT, Expand);
       setOperationAction(ISD::FSIN, VT, Expand);
       setOperationAction(ISD::FCOS, VT, Expand);
-      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FREM, VT, LibCall);
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::FSQRT, VT, Expand);
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index c208d03ff94b7..f4721f1468ddf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -478,7 +478,7 @@ define void @frem() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F128 = frem fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = frem float undef, undef
diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll
index 76f80da55af64..461ad9d7d253b 100644
--- a/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll
@@ -278,9 +278,9 @@ define void @i64() {
 define void @f16() {
 ; CHECK-NEON-LABEL: 'f16'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %2 = frem half undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f16'
@@ -306,9 +306,9 @@ define void @f16() {
 ;
 ; CHECK-V8R-LABEL: 'f16'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %2 = frem half undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv half undef, undef
@@ -321,9 +321,9 @@ define void @f16() {
 define void @f32() {
 ; CHECK-NEON-LABEL: 'f32'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %2 = frem float undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f32'
@@ -349,9 +349,9 @@ define void @f32() {
 ;
 ; CHECK-V8R-LABEL: 'f32'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %2 = frem float undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv float undef, undef
@@ -364,9 +364,9 @@ define void @f32() {
 define void @f64() {
 ; CHECK-NEON-LABEL: 'f64'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %2 = frem double undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f64'
@@ -392,9 +392,9 @@ define void @f64() {
 ;
 ; CHECK-V8R-LABEL: 'f64'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %2 = frem double undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv double undef, undef
@@ -867,11 +867,11 @@ define void @vi64() {
 define void @vf16() {
 ; CHECK-NEON-LABEL: 'vf16'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16'
@@ -903,11 +903,11 @@ define void @vf16() {
 ;
 ; CHECK-V8R-LABEL: 'vf16'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x half> undef, undef
@@ -922,11 +922,11 @@ define void @vf16() {
 define void @vf32() {
 ; CHECK-NEON-LABEL: 'vf32'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32'
@@ -958,11 +958,11 @@ define void @vf32() {
 ;
 ; CHECK-V8R-LABEL: 'vf32'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x float> undef, undef
@@ -977,11 +977,11 @@ define void @vf32() {
 define void @vf64() {
 ; CHECK-NEON-LABEL: 'vf64'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64'
@@ -1013,11 +1013,11 @@ define void @vf64() {
 ;
 ; CHECK-V8R-LABEL: 'vf64'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x double> undef, undef
@@ -1492,11 +1492,11 @@ define void @vi64_2() {
 define void @vf16_2() {
 ; CHECK-NEON-LABEL: 'vf16_2'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16_2'
@@ -1528,11 +1528,11 @@ define void @vf16_2() {
 ;
 ; CHECK-V8R-LABEL: 'vf16_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x half> undef, <half 2., half 2.>
@@ -1547,11 +1547,11 @@ define void @vf16_2() {
 define void @vf32_2() {
 ; CHECK-NEON-LABEL: 'vf32_2'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32_2'
@@ -1583,11 +1583,11 @@ define void @vf32_2() {
 ;
 ; CHECK-V8R-LABEL: 'vf32_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x float> undef, <float 2., float 2.>
@@ -1602,11 +1602,11 @@ define void @vf32_2() {
 define void @vf64_2() {
 ; CHECK-NEON-LABEL: 'vf64_2'
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64_2'
@@ -1638,11 +1638,11 @@ define void @vf64_2() {
 ;
 ; CHECK-V8R-LABEL: 'vf64_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x double> undef, <double 2., double 2.>
diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index 673bf38d44876..03bdd439ba2fc 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -1248,35 +1248,35 @@ define void @fdiv_f16() {
 
 define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F32 = frem <1 x float> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <2 x float> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V8F32 = frem <8 x float> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V16F32 = frem <16 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F64 = frem <1 x double> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <2 x double> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4F64 = frem <4 x double> poison, poison
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V8F64 = frem <8 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -1340,24 +1340,24 @@ define void @frem() {
 
 define void @frem_bf16() {
 ; ZVFH-LABEL: 'frem_bf16'
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -1366,24 +1366,24 @@ define void @frem_bf16() {
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; ZVFHMIN-LABEL: 'frem_bf16'
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -1392,24 +1392,24 @@ define void @frem_bf16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; NO-ZFHMIN-LABEL: 'frem_bf16'
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -1502,24 +1502,24 @@ define void @frem_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; NO-ZFHMIN-LABEL: 'frem_f16'
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F16 = frem half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = frem <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F16 = frem <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F16 = frem <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8F16 = frem <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = frem <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32F16 = frem <32 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
-; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
 ; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index a7a88b80b6670..8f9a47c7ef930 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -625,80 +625,80 @@ define i32 @fdiv(i32 %arg) {
 
 define i32 @frem(i32 %arg) {
 ; SSE1-LABEL: 'frem'
-; SSE1-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of 4 for: %V2F64 = frem <2 x double> undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; SSE1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; SSE1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; SSE2-LABEL: 'frem'
-; SSE2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; SSE2-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; SSE2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; SSE42-LABEL: 'frem'
-; SSE42-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; SSE42-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; SSE42-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; AVX-LABEL: 'frem'
-; AVX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:46 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; AVX-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:78 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; AVX-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; AVX512-LABEL: 'frem'
-; AVX512-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; AVX512-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:79 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; AVX512-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; SLM-LABEL: 'frem'
-; SLM-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; SLM-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; SLM-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; GLM-LABEL: 'frem'
-; GLM-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; GLM-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of 4 for: %F32 = frem float undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of 4 for: %F64 = frem double undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef
 ; GLM-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %F32 = frem float undef, undef

From df7066c0204242be241bf3564b68a6f1013bed72 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Thu, 16 Oct 2025 07:52:38 -0400
Subject: [PATCH 09/17] expand-fp: always expand frem if legalization action is
 "Expand"

---
 llvm/lib/CodeGen/ExpandFp.cpp | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 291a6447fe36c..39451669b495d 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -104,25 +104,16 @@ class FRemExpander {
     return find(ExpandableTypes, VT.getSimpleVT()) != ExpandableTypes.end();
   }
 
-  /// Return true if the pass should expand a frem instruction of the
-  /// given \p Ty for the target represented by \p TLI. Expansion
-  /// should happen if the legalization for the scalar type uses a
-  /// non-existing libcall.
   static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
     assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
-
-    TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-    if (LA != TargetLowering::LegalizeAction::Expand)
-      return false;
-
-    auto Libcall = getFremLibcallForType(VT);
-    return Libcall.has_value() && !TLI.getLibcallName(*Libcall);
+    return (TLI.getOperationAction(ISD::FREM, VT) ==
+            TargetLowering::LegalizeAction::Expand);
   }
 
   static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
-    // Consider scalar type for simplicity.
-    // It is very unlikely that a vector type can be legalized without a libcall
-    // if the scalar type cannot.
+    // Consider scalar type for simplicity.  It seems unlikely that a
+    // vector type can be legalized without expansion if the scalar
+    // type cannot.
     return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
   }
 

From a33ab1d35484e7aa7329f653b9f6872031770be3 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Thu, 16 Oct 2025 10:43:05 -0400
Subject: [PATCH 10/17] Revert deletion of comment line

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 81cf570e38b9a..8e423c4f83b38 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -17,6 +17,7 @@
 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
 // the operation must be unrolled, which introduces nodes with the illegal
 // type i8 which must be promoted.
+//
 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
 // or operations that happen to take a vector which are custom-lowered;
 // the legalization for such operations never produces nodes

From a58e1c7ff50207a9ba02f412f2a1239230fc7085 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Thu, 16 Oct 2025 10:46:20 -0400
Subject: [PATCH 11/17] Add back deleted line

---
 llvm/lib/CodeGen/ExpandFp.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 9b5a7246650a4..f043ff130a597 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -1055,7 +1055,6 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
 
-
       auto *IntTy = cast<IntegerType>(Ty->getScalarType());
 
       if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)

From 1ea0b3a9da909c1d0825336ee6f5eed5e2d44b25 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Thu, 16 Oct 2025 11:11:47 -0400
Subject: [PATCH 12/17] clang-format changes

---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7d3bcf26fd0a3..48f33555d8c02 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -895,7 +895,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
     setOperationAction(ISD::FMUL,       MVT::f64, Expand);
     setOperationAction(ISD::FMA,        MVT::f64, Expand);
     setOperationAction(ISD::FDIV,       MVT::f64, Expand);
-    setOperationAction(ISD::FREM,       MVT::f64, LibCall);
+    setOperationAction(ISD::FREM, MVT::f64, LibCall);
     setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
     setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
     setOperationAction(ISD::FNEG,       MVT::f64, Expand);
@@ -1260,8 +1260,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
-  setOperationAction(ISD::FREM,      MVT::f64, LibCall);
-  setOperationAction(ISD::FREM,      MVT::f32, LibCall);
+  setOperationAction(ISD::FREM, MVT::f64, LibCall);
+  setOperationAction(ISD::FREM, MVT::f32, LibCall);
   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);

From 4d5d984a209c69536177b20892e4d891888940f8 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Fri, 17 Oct 2025 04:10:32 -0400
Subject: [PATCH 13/17] Replace two function uses by better llvm alternatives

---
 llvm/lib/CodeGen/ExpandFp.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index f043ff130a597..6b49969aebff4 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -101,7 +101,7 @@ class FRemExpander {
     EVT VT = EVT::getEVT(Ty);
     assert(VT.isSimple() && "Can expand only simple types");
 
-    return find(ExpandableTypes, VT.getSimpleVT()) != ExpandableTypes.end();
+    return is_contained(ExpandableTypes, VT.getSimpleVT());
   }
 
   static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
@@ -120,9 +120,8 @@ class FRemExpander {
   /// Return true if the pass should expand "frem" instructions of some any for
   /// the target represented by \p TLI.
   static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
-    return std::any_of(
-        ExpandableTypes.begin(), ExpandableTypes.end(),
-        [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
+    return any_of(ExpandableTypes,
+                  [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
   }
 
   static FRemExpander create(IRBuilder<> &B, Type *Ty) {

From 17f470bd805bbffe546e3a3e5664c43006fd20c4 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Fri, 17 Oct 2025 05:34:44 -0400
Subject: [PATCH 14/17] trigger CI


From 1f6811d55fe547d7218c39a5a94428ea601c94ed Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Mon, 20 Oct 2025 08:41:52 -0400
Subject: [PATCH 15/17] fixup! Merge remote-tracking branch 'upstream/main'
 into expand-fp-frem-expansion-criterion

---
 llvm/lib/CodeGen/ExpandFp.cpp | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index f86a65da08c52..371d08b47aa85 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -1029,25 +1029,15 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     case Instruction::FRem:
       return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
     case Instruction::FPToUI:
-    case Instruction::FPToSI: {
+    case Instruction::FPToSI:
       return !DisableExpandLargeFp &&
              cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
                  MaxLegalFpConvertBitWidth;
-    }
-
     case Instruction::UIToFP:
-    case Instruction::SIToFP: {
-      if (DisableExpandLargeFp)
-        continue;
-
-      // TODO: This pass doesn't handle scalable vectors.
-      if (I.getOperand(0)->getType()->isScalableTy())
-        continue;
-
-      auto *IntTy =
-          cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
-      return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
-    }
+    case Instruction::SIToFP:
+      return !DisableExpandLargeFp &&
+             cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())
+                     ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
     }
 
     return false;

From 91cf7ce147ded7f10ff76ccabcfb8a90397e020d Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Mon, 20 Oct 2025 10:30:48 -0400
Subject: [PATCH 16/17] trigger build


From f2b71812f47b3e256d9862ba5f172400eb32708c Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath@amd.com>
Date: Tue, 21 Oct 2025 05:23:29 -0400
Subject: [PATCH 17/17] Trigger CI