llvm · LewisCrawford · May 30, 2025 · May 16, 2025
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -64,6 +64,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool> DisableFPCallFolding(
+    "disable-fp-call-folding",
+    cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
+    cl::init(false), cl::Hidden);
+
 namespace {
 
 //===----------------------------------------------------------------------===//
@@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
     return false;
   if (Call->getFunctionType() != F->getFunctionType())
     return false;
+
+  // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
+  // This can be useful for GPU targets or in cross-compilation scenarios
+  // when the exact target FP behaviour is required, and the host compiler's
+  // behaviour may be slightly different from the device's run-time behaviour.
+  if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
+                               any_of(F->args(), [](const Argument &Arg) {
+                                 return Arg.getType()->isFloatingPointTy();
+                               })))
+    return false;
+
   switch (F->getIntrinsicID()) {
   // Operations that do not operate floating-point numbers and do not depend on
   // FP environment can be folded even in strictfp functions.
@@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::x86_avx512_vcvtsd2usi64:
   case Intrinsic::x86_avx512_cvttsd2usi:
   case Intrinsic::x86_avx512_cvttsd2usi64:
-    return !Call->isStrictFP();
 
   // NVVM FMax intrinsics
   case Intrinsic::nvvm_fmax_d:
@@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::nvvm_d2ull_rn:
   case Intrinsic::nvvm_d2ull_rp:
   case Intrinsic::nvvm_d2ull_rz:
+    return !Call->isStrictFP();
 
   // Sign operations are actually bitwise operations, they do not raise
   // exceptions even for SNANs.
@@ -3886,8 +3902,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
 Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                             Constant *RHS, Type *Ty,
                                             Instruction *FMFSource) {
-  return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},
-                                    dyn_cast_if_present<CallBase>(FMFSource));
+  auto *Call = dyn_cast_if_present<CallBase>(FMFSource);
+  // Ensure we check flags like StrictFP that might prevent this from getting
+  // folded before generating a result.
+  if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
+    return nullptr;
+  return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
 }
 
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,

diff --git a/llvm/test/Transforms/InstSimplify/disable_folding.ll b/llvm/test/Transforms/InstSimplify/disable_folding.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_ENABLED
+; RUN: opt < %s -disable-fp-call-folding -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_DISABLED
+
+; Check that we can disable folding of intrinsic calls via both the -disable-fp-call-folding flag and the strictfp attribute.
+
+; Should be folded by default unless -disable-fp-call-folding is set
+define float @test_fmax_ftz_nan_xorsign_abs_f() {
+; FOLDING_ENABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
+; FOLDING_ENABLED-NEXT:    ret float -2.000000e+00
+;
+; FOLDING_DISABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
+; FOLDING_DISABLED-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.250000e+00, float -2.000000e+00)
+; FOLDING_DISABLED-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+; Check that -disable-fp-call-folding triggers for LLVM instrincis, not just NVPTX target-specific ones.
+define float @test_llvm_sin() {
+; FOLDING_ENABLED-LABEL: define float @test_llvm_sin() {
+; FOLDING_ENABLED-NEXT:    ret float 0x3FDEAEE880000000
+;
+; FOLDING_DISABLED-LABEL: define float @test_llvm_sin() {
+; FOLDING_DISABLED-NEXT:    [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01)
+; FOLDING_DISABLED-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.sin.f32(float 0.5)
+  ret float %res
+}
+
+; Should not be folded, even when -disable-fp-call-folding is not set, as it is marked as strictfp.
+define float @test_fmax_ftz_nan_f_strictfp() {
+; CHECK-LABEL: define float @test_fmax_ftz_nan_f_strictfp() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.250000e+00, float -2.000000e+00) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0) #1
+  ret float %res
+}
+
+; Check that strictfp disables folding for LLVM math intrinsics like sin.f32
+; even when -disable-fp-call-folding is not set.
+define float @test_llvm_sin_strictfp() {
+; CHECK-LABEL: define float @test_llvm_sin_strictfp() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01) #[[ATTR1]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.sin.f32(float 0.5) #1
+  ret float %res
+}
+
+attributes #1 = { strictfp }