|
64 | 64 |
|
65 | 65 | using namespace llvm; |
66 | 66 |
|
| 67 | +static cl::opt<bool> DisableFPCallFolding( |
| 68 | + "disable-fp-call-folding", |
| 69 | + cl::desc("Disable constant-folding of FP intrinsics and libcalls."), |
| 70 | + cl::init(false), cl::Hidden); |
| 71 | + |
67 | 72 | namespace { |
68 | 73 |
|
69 | 74 | //===----------------------------------------------------------------------===// |
@@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { |
1576 | 1581 | return false; |
1577 | 1582 | if (Call->getFunctionType() != F->getFunctionType()) |
1578 | 1583 | return false; |
| 1584 | + |
| 1585 | + // Allow FP calls (both libcalls and intrinsics) to avoid being folded. |
| 1586 | + // This can be useful for GPU targets or in cross-compilation scenarios |
| 1587 | + // when the exact target FP behaviour is required, and the host compiler's |
| 1588 | + // behaviour may be slightly different from the device's run-time behaviour. |
| 1589 | + if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() || |
| 1590 | + any_of(F->args(), [](const Argument &Arg) { |
| 1591 | + return Arg.getType()->isFloatingPointTy(); |
| 1592 | + }))) |
| 1593 | + return false; |
| 1594 | + |
1579 | 1595 | switch (F->getIntrinsicID()) { |
1580 | 1596 | // Operations that do not operate floating-point numbers and do not depend on |
1581 | 1597 | // FP environment can be folded even in strictfp functions. |
@@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { |
1700 | 1716 | case Intrinsic::x86_avx512_vcvtsd2usi64: |
1701 | 1717 | case Intrinsic::x86_avx512_cvttsd2usi: |
1702 | 1718 | case Intrinsic::x86_avx512_cvttsd2usi64: |
1703 | | - return !Call->isStrictFP(); |
1704 | 1719 |
|
1705 | 1720 | // NVVM FMax intrinsics |
1706 | 1721 | case Intrinsic::nvvm_fmax_d: |
@@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { |
1775 | 1790 | case Intrinsic::nvvm_d2ull_rn: |
1776 | 1791 | case Intrinsic::nvvm_d2ull_rp: |
1777 | 1792 | case Intrinsic::nvvm_d2ull_rz: |
| 1793 | + return !Call->isStrictFP(); |
1778 | 1794 |
|
1779 | 1795 | // Sign operations are actually bitwise operations, they do not raise |
1780 | 1796 | // exceptions even for SNANs. |
@@ -3909,8 +3925,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, |
3909 | 3925 | Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, |
3910 | 3926 | Constant *RHS, Type *Ty, |
3911 | 3927 | Instruction *FMFSource) { |
3912 | | - return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, |
3913 | | - dyn_cast_if_present<CallBase>(FMFSource)); |
| 3928 | + auto *Call = dyn_cast_if_present<CallBase>(FMFSource); |
| 3929 | + // Ensure we check flags like StrictFP that might prevent this from getting |
| 3930 | + // folded before generating a result. |
| 3931 | + if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction())) |
| 3932 | + return nullptr; |
| 3933 | + return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call); |
3914 | 3934 | } |
3915 | 3935 |
|
3916 | 3936 | Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, |
|
0 commit comments