diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870f4be28..5a08e7d6db347 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,6 +1238,13 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
+def FeatureDisableAggressiveFMAFusion : SubtargetFeature<
+  "disable-aggressive-fma-fusion",
+  "DisableAggressiveFMAFusion",
+  "true",
+  "Do not fold fmul and fadd/fsub into fma."
+>;
+
 // Ugly hack to accomodate assembling modules with mixed
 // wavesizes. Ideally we would have a mapping symbol in assembly which
 // would keep track of which sections of code should be treated as
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed03ef21b6dda..0c380a7e4dc84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@ class AMDGPUSubtarget {
   bool EnablePromoteAlloca = false;
   bool HasTrigReducedRange = false;
   bool FastFMAF32 = false;
+  bool DisableAggressiveFMAFusion = false;
   unsigned EUsPerCU = 4;
   unsigned MaxWavesPerEU = 10;
   unsigned LocalMemorySize = 0;
@@ -303,6 +304,10 @@ class AMDGPUSubtarget {
     return FastFMAF32;
   }
 
+  bool hasDisableAggressiveFMAFusion() const {
+    return DisableAggressiveFMAFusion;
+  }
+
   bool isPromoteAllocaEnabled() const {
     return EnablePromoteAlloca;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 03d16fdd54c42..554549063dbcc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -288,6 +288,7 @@ const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
     AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
     AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
     AMDGPU::FeatureUnalignedAccessMode,
+    AMDGPU::FeatureDisableAggressiveFMAFusion,
 
     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16530087444d2..f0ac08c3b69f9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6502,10 +6502,12 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   // When fma is quarter rate, for f64 where add / sub are at best half rate,
   // most of these combines appear to be cycle neutral but save on instruction
   // count / code size.
-  return true;
+  return !Subtarget->hasDisableAggressiveFMAFusion();
 }
 
-bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
+  return !Subtarget->hasDisableAggressiveFMAFusion();
+}
 
 EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
                                          EVT VT) const {