Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions llvm/lib/Analysis/ConstantFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@

using namespace llvm;

static cl::opt<bool> DisableFPCallFolding(
"disable-fp-call-folding",
cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
cl::init(false), cl::Hidden);

namespace {

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
return false;
if (Call->getFunctionType() != F->getFunctionType())
return false;

// Allow FP calls (both libcalls and intrinsics) to avoid being folded.
// This can be useful for GPU targets or in cross-compilation scenarios
// when the exact target FP behaviour is required, and the host compiler's
// behaviour may be slightly different from the device's run-time behaviour.
if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
any_of(F->args(), [](const Argument &Arg) {
return Arg.getType()->isFloatingPointTy();
})))
return false;

switch (F->getIntrinsicID()) {
// Operations that do not operate floating-point numbers and do not depend on
// FP environment can be folded even in strictfp functions.
Expand Down Expand Up @@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_cvttsd2usi:
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();

// NVVM FMax intrinsics
case Intrinsic::nvvm_fmax_d:
Expand Down Expand Up @@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
return !Call->isStrictFP();

// Sign operations are actually bitwise operations, they do not raise
// exceptions even for SNANs.
Expand Down Expand Up @@ -3886,8 +3902,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
Constant *RHS, Type *Ty,
Instruction *FMFSource) {
return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},
dyn_cast_if_present<CallBase>(FMFSource));
auto *Call = dyn_cast_if_present<CallBase>(FMFSource);
// Ensure we check flags like StrictFP that might prevent this from getting
// folded before generating a result.
if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
return nullptr;
return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
}

Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/Transforms/InstSimplify/disable_folding.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_ENABLED
; RUN: opt < %s -disable-fp-call-folding -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_DISABLED

; Check that we can disable folding of intrinsic calls via both the -disable-fp-call-folding flag and the strictfp attribute.

; Should be folded by default unless -disable-fp-call-folding is set
define float @test_fmax_ftz_nan_xorsign_abs_f() {
; FOLDING_ENABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
; FOLDING_ENABLED-NEXT: ret float -2.000000e+00
;
; FOLDING_DISABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.250000e+00, float -2.000000e+00)
; FOLDING_DISABLED-NEXT: ret float [[RES]]
;
%res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
ret float %res
}

; Check that -disable-fp-call-folding triggers for LLVM instrincis, not just NVPTX target-specific ones.
define float @test_llvm_sin() {
; FOLDING_ENABLED-LABEL: define float @test_llvm_sin() {
; FOLDING_ENABLED-NEXT: ret float 0x3FDEAEE880000000
;
; FOLDING_DISABLED-LABEL: define float @test_llvm_sin() {
; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01)
; FOLDING_DISABLED-NEXT: ret float [[RES]]
;
%res = call float @llvm.sin.f32(float 0.5)
ret float %res
}

; Should not be folded, even when -disable-fp-call-folding is not set, as it is marked as strictfp.
define float @test_fmax_ftz_nan_f_strictfp() {
; CHECK-LABEL: define float @test_fmax_ftz_nan_f_strictfp() {
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.250000e+00, float -2.000000e+00) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: ret float [[RES]]
;
%res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0) #1
ret float %res
}

; Check that strictfp disables folding for LLVM math intrinsics like sin.f32
; even when -disable-fp-call-folding is not set.
define float @test_llvm_sin_strictfp() {
; CHECK-LABEL: define float @test_llvm_sin_strictfp() {
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01) #[[ATTR1]]
; CHECK-NEXT: ret float [[RES]]
;
%res = call float @llvm.sin.f32(float 0.5) #1
ret float %res
}

attributes #1 = { strictfp }