diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 412a0e8979193..2b02db88e809d 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -64,6 +64,11 @@ using namespace llvm; +static cl::opt DisableFPCallFolding( + "disable-fp-call-folding", + cl::desc("Disable constant-folding of FP intrinsics and libcalls."), + cl::init(false), cl::Hidden); + namespace { //===----------------------------------------------------------------------===// @@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { return false; if (Call->getFunctionType() != F->getFunctionType()) return false; + + // Allow FP calls (both libcalls and intrinsics) to avoid being folded. + // This can be useful for GPU targets or in cross-compilation scenarios + // when the exact target FP behaviour is required, and the host compiler's + // behaviour may be slightly different from the device's run-time behaviour. + if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() || + any_of(F->args(), [](const Argument &Arg) { + return Arg.getType()->isFloatingPointTy(); + }))) + return false; + switch (F->getIntrinsicID()) { // Operations that do not operate floating-point numbers and do not depend on // FP environment can be folded even in strictfp functions. @@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::x86_avx512_vcvtsd2usi64: case Intrinsic::x86_avx512_cvttsd2usi: case Intrinsic::x86_avx512_cvttsd2usi64: - return !Call->isStrictFP(); // NVVM FMax intrinsics case Intrinsic::nvvm_fmax_d: @@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nvvm_d2ull_rn: case Intrinsic::nvvm_d2ull_rp: case Intrinsic::nvvm_d2ull_rz: + return !Call->isStrictFP(); // Sign operations are actually bitwise operations, they do not raise // exceptions even for SNANs. @@ -3886,8 +3902,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, Constant *RHS, Type *Ty, Instruction *FMFSource) { - return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, - dyn_cast_if_present(FMFSource)); + auto *Call = dyn_cast_if_present(FMFSource); + // Ensure we check flags like StrictFP that might prevent this from getting + // folded before generating a result. + if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction())) + return nullptr; + return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call); } Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, diff --git a/llvm/test/Transforms/InstSimplify/disable_folding.ll b/llvm/test/Transforms/InstSimplify/disable_folding.ll new file mode 100644 index 0000000000000..66adf6af1e97f --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/disable_folding.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_ENABLED +; RUN: opt < %s -disable-fp-call-folding -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_DISABLED + +; Check that we can disable folding of intrinsic calls via both the -disable-fp-call-folding flag and the strictfp attribute. + +; Should be folded by default unless -disable-fp-call-folding is set +define float @test_fmax_ftz_nan_xorsign_abs_f() { +; FOLDING_ENABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() { +; FOLDING_ENABLED-NEXT: ret float -2.000000e+00 +; +; FOLDING_DISABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() { +; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.250000e+00, float -2.000000e+00) +; FOLDING_DISABLED-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +; Check that -disable-fp-call-folding triggers for LLVM instrincis, not just NVPTX target-specific ones. +define float @test_llvm_sin() { +; FOLDING_ENABLED-LABEL: define float @test_llvm_sin() { +; FOLDING_ENABLED-NEXT: ret float 0x3FDEAEE880000000 +; +; FOLDING_DISABLED-LABEL: define float @test_llvm_sin() { +; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01) +; FOLDING_DISABLED-NEXT: ret float [[RES]] +; + %res = call float @llvm.sin.f32(float 0.5) + ret float %res +} + +; Should not be folded, even when -disable-fp-call-folding is not set, as it is marked as strictfp. +define float @test_fmax_ftz_nan_f_strictfp() { +; CHECK-LABEL: define float @test_fmax_ftz_nan_f_strictfp() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.250000e+00, float -2.000000e+00) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0) #1 + ret float %res +} + +; Check that strictfp disables folding for LLVM math intrinsics like sin.f32 +; even when -disable-fp-call-folding is not set. +define float @test_llvm_sin_strictfp() { +; CHECK-LABEL: define float @test_llvm_sin_strictfp() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01) #[[ATTR1]] +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.sin.f32(float 0.5) #1 + ret float %res +} + +attributes #1 = { strictfp }