Skip to content

Commit e520c47

Browse files
committed
[NVPTX] Constant-folding for f2i, d2ui, f2ll etc.
Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.
1 parent 52e9f2c commit e520c47

File tree

3 files changed

+2523
-0
lines changed

3 files changed

+2523
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/IR/IntrinsicsAArch64.h"
4646
#include "llvm/IR/IntrinsicsAMDGPU.h"
4747
#include "llvm/IR/IntrinsicsARM.h"
48+
#include "llvm/IR/IntrinsicsNVPTX.h"
4849
#include "llvm/IR/IntrinsicsWebAssembly.h"
4950
#include "llvm/IR/IntrinsicsX86.h"
5051
#include "llvm/IR/Operator.h"
@@ -1687,6 +1688,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16871688
case Intrinsic::x86_avx512_cvttsd2usi64:
16881689
return !Call->isStrictFP();
16891690

1691+
// NVVM float/double to int32/uint32 conversion intrinsics
1692+
case Intrinsic::nvvm_f2i_rm:
1693+
case Intrinsic::nvvm_f2i_rn:
1694+
case Intrinsic::nvvm_f2i_rp:
1695+
case Intrinsic::nvvm_f2i_rz:
1696+
case Intrinsic::nvvm_f2i_rm_ftz:
1697+
case Intrinsic::nvvm_f2i_rn_ftz:
1698+
case Intrinsic::nvvm_f2i_rp_ftz:
1699+
case Intrinsic::nvvm_f2i_rz_ftz:
1700+
case Intrinsic::nvvm_f2ui_rm:
1701+
case Intrinsic::nvvm_f2ui_rn:
1702+
case Intrinsic::nvvm_f2ui_rp:
1703+
case Intrinsic::nvvm_f2ui_rz:
1704+
case Intrinsic::nvvm_f2ui_rm_ftz:
1705+
case Intrinsic::nvvm_f2ui_rn_ftz:
1706+
case Intrinsic::nvvm_f2ui_rp_ftz:
1707+
case Intrinsic::nvvm_f2ui_rz_ftz:
1708+
case Intrinsic::nvvm_d2i_rm:
1709+
case Intrinsic::nvvm_d2i_rn:
1710+
case Intrinsic::nvvm_d2i_rp:
1711+
case Intrinsic::nvvm_d2i_rz:
1712+
case Intrinsic::nvvm_d2ui_rm:
1713+
case Intrinsic::nvvm_d2ui_rn:
1714+
case Intrinsic::nvvm_d2ui_rp:
1715+
case Intrinsic::nvvm_d2ui_rz:
1716+
1717+
// NVVM float/double to int64/uint64 conversion intrinsics
1718+
case Intrinsic::nvvm_f2ll_rm:
1719+
case Intrinsic::nvvm_f2ll_rn:
1720+
case Intrinsic::nvvm_f2ll_rp:
1721+
case Intrinsic::nvvm_f2ll_rz:
1722+
case Intrinsic::nvvm_f2ll_rm_ftz:
1723+
case Intrinsic::nvvm_f2ll_rn_ftz:
1724+
case Intrinsic::nvvm_f2ll_rp_ftz:
1725+
case Intrinsic::nvvm_f2ll_rz_ftz:
1726+
case Intrinsic::nvvm_f2ull_rm:
1727+
case Intrinsic::nvvm_f2ull_rn:
1728+
case Intrinsic::nvvm_f2ull_rp:
1729+
case Intrinsic::nvvm_f2ull_rz:
1730+
case Intrinsic::nvvm_f2ull_rm_ftz:
1731+
case Intrinsic::nvvm_f2ull_rn_ftz:
1732+
case Intrinsic::nvvm_f2ull_rp_ftz:
1733+
case Intrinsic::nvvm_f2ull_rz_ftz:
1734+
case Intrinsic::nvvm_d2ll_rm:
1735+
case Intrinsic::nvvm_d2ll_rn:
1736+
case Intrinsic::nvvm_d2ll_rp:
1737+
case Intrinsic::nvvm_d2ll_rz:
1738+
case Intrinsic::nvvm_d2ull_rm:
1739+
case Intrinsic::nvvm_d2ull_rn:
1740+
case Intrinsic::nvvm_d2ull_rp:
1741+
case Intrinsic::nvvm_d2ull_rz:
1742+
16901743
// Sign operations are actually bitwise operations, they do not raise
16911744
// exceptions even for SNANs.
16921745
case Intrinsic::fabs:
@@ -1849,6 +1902,13 @@ inline bool llvm_fenv_testexcept() {
18491902
return false;
18501903
}
18511904

1905+
static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
1906+
if (V.isDenormal())
1907+
return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
1908+
1909+
return V;
1910+
}
1911+
18521912
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
18531913
Type *Ty) {
18541914
llvm_fenv_clearexcept();
@@ -2309,6 +2369,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
23092369
return ConstantFP::get(Ty->getContext(), U);
23102370
}
23112371

2372+
// NVVM float/double to signed/unsigned int32/int64 conversions:
2373+
switch (IntrinsicID) {
2374+
// f2i
2375+
case Intrinsic::nvvm_f2i_rm:
2376+
case Intrinsic::nvvm_f2i_rn:
2377+
case Intrinsic::nvvm_f2i_rp:
2378+
case Intrinsic::nvvm_f2i_rz:
2379+
case Intrinsic::nvvm_f2i_rm_ftz:
2380+
case Intrinsic::nvvm_f2i_rn_ftz:
2381+
case Intrinsic::nvvm_f2i_rp_ftz:
2382+
case Intrinsic::nvvm_f2i_rz_ftz:
2383+
// f2ui
2384+
case Intrinsic::nvvm_f2ui_rm:
2385+
case Intrinsic::nvvm_f2ui_rn:
2386+
case Intrinsic::nvvm_f2ui_rp:
2387+
case Intrinsic::nvvm_f2ui_rz:
2388+
case Intrinsic::nvvm_f2ui_rm_ftz:
2389+
case Intrinsic::nvvm_f2ui_rn_ftz:
2390+
case Intrinsic::nvvm_f2ui_rp_ftz:
2391+
case Intrinsic::nvvm_f2ui_rz_ftz:
2392+
// d2i
2393+
case Intrinsic::nvvm_d2i_rm:
2394+
case Intrinsic::nvvm_d2i_rn:
2395+
case Intrinsic::nvvm_d2i_rp:
2396+
case Intrinsic::nvvm_d2i_rz:
2397+
// d2ui
2398+
case Intrinsic::nvvm_d2ui_rm:
2399+
case Intrinsic::nvvm_d2ui_rn:
2400+
case Intrinsic::nvvm_d2ui_rp:
2401+
case Intrinsic::nvvm_d2ui_rz:
2402+
// f2ll
2403+
case Intrinsic::nvvm_f2ll_rm:
2404+
case Intrinsic::nvvm_f2ll_rn:
2405+
case Intrinsic::nvvm_f2ll_rp:
2406+
case Intrinsic::nvvm_f2ll_rz:
2407+
case Intrinsic::nvvm_f2ll_rm_ftz:
2408+
case Intrinsic::nvvm_f2ll_rn_ftz:
2409+
case Intrinsic::nvvm_f2ll_rp_ftz:
2410+
case Intrinsic::nvvm_f2ll_rz_ftz:
2411+
// f2ull
2412+
case Intrinsic::nvvm_f2ull_rm:
2413+
case Intrinsic::nvvm_f2ull_rn:
2414+
case Intrinsic::nvvm_f2ull_rp:
2415+
case Intrinsic::nvvm_f2ull_rz:
2416+
case Intrinsic::nvvm_f2ull_rm_ftz:
2417+
case Intrinsic::nvvm_f2ull_rn_ftz:
2418+
case Intrinsic::nvvm_f2ull_rp_ftz:
2419+
case Intrinsic::nvvm_f2ull_rz_ftz:
2420+
// d2ll
2421+
case Intrinsic::nvvm_d2ll_rm:
2422+
case Intrinsic::nvvm_d2ll_rn:
2423+
case Intrinsic::nvvm_d2ll_rp:
2424+
case Intrinsic::nvvm_d2ll_rz:
2425+
// d2ull
2426+
case Intrinsic::nvvm_d2ull_rm:
2427+
case Intrinsic::nvvm_d2ull_rn:
2428+
case Intrinsic::nvvm_d2ull_rp:
2429+
case Intrinsic::nvvm_d2ull_rz: {
2430+
// In float-to-integer conversion, NaN inputs are converted to 0.
2431+
if (U.isNaN())
2432+
return ConstantInt::get(Ty, 0);
2433+
2434+
APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
2435+
switch (IntrinsicID) {
2436+
// i_rm
2437+
case Intrinsic::nvvm_f2i_rm:
2438+
case Intrinsic::nvvm_f2ui_rm:
2439+
case Intrinsic::nvvm_f2i_rm_ftz:
2440+
case Intrinsic::nvvm_f2ui_rm_ftz:
2441+
case Intrinsic::nvvm_d2i_rm:
2442+
case Intrinsic::nvvm_d2ui_rm:
2443+
// ll_rm
2444+
case Intrinsic::nvvm_f2ll_rm:
2445+
case Intrinsic::nvvm_f2ull_rm:
2446+
case Intrinsic::nvvm_f2ll_rm_ftz:
2447+
case Intrinsic::nvvm_f2ull_rm_ftz:
2448+
case Intrinsic::nvvm_d2ll_rm:
2449+
case Intrinsic::nvvm_d2ull_rm:
2450+
RMode = APFloat::rmTowardNegative;
2451+
break;
2452+
2453+
// i_rn
2454+
case Intrinsic::nvvm_f2i_rn:
2455+
case Intrinsic::nvvm_f2ui_rn:
2456+
case Intrinsic::nvvm_f2i_rn_ftz:
2457+
case Intrinsic::nvvm_f2ui_rn_ftz:
2458+
case Intrinsic::nvvm_d2i_rn:
2459+
case Intrinsic::nvvm_d2ui_rn:
2460+
// ll_rn
2461+
case Intrinsic::nvvm_f2ll_rn:
2462+
case Intrinsic::nvvm_f2ull_rn:
2463+
case Intrinsic::nvvm_f2ll_rn_ftz:
2464+
case Intrinsic::nvvm_f2ull_rn_ftz:
2465+
case Intrinsic::nvvm_d2ll_rn:
2466+
case Intrinsic::nvvm_d2ull_rn:
2467+
RMode = APFloat::rmNearestTiesToEven;
2468+
break;
2469+
2470+
// i_rp
2471+
case Intrinsic::nvvm_f2i_rp:
2472+
case Intrinsic::nvvm_f2ui_rp:
2473+
case Intrinsic::nvvm_f2i_rp_ftz:
2474+
case Intrinsic::nvvm_f2ui_rp_ftz:
2475+
case Intrinsic::nvvm_d2i_rp:
2476+
case Intrinsic::nvvm_d2ui_rp:
2477+
// ll_rp
2478+
case Intrinsic::nvvm_f2ll_rp:
2479+
case Intrinsic::nvvm_f2ull_rp:
2480+
case Intrinsic::nvvm_f2ll_rp_ftz:
2481+
case Intrinsic::nvvm_f2ull_rp_ftz:
2482+
case Intrinsic::nvvm_d2ll_rp:
2483+
case Intrinsic::nvvm_d2ull_rp:
2484+
RMode = APFloat::rmTowardPositive;
2485+
break;
2486+
2487+
// i_rz
2488+
case Intrinsic::nvvm_f2i_rz:
2489+
case Intrinsic::nvvm_f2ui_rz:
2490+
case Intrinsic::nvvm_f2i_rz_ftz:
2491+
case Intrinsic::nvvm_f2ui_rz_ftz:
2492+
case Intrinsic::nvvm_d2i_rz:
2493+
case Intrinsic::nvvm_d2ui_rz:
2494+
// ll_rz
2495+
case Intrinsic::nvvm_f2ll_rz:
2496+
case Intrinsic::nvvm_f2ull_rz:
2497+
case Intrinsic::nvvm_f2ll_rz_ftz:
2498+
case Intrinsic::nvvm_f2ull_rz_ftz:
2499+
case Intrinsic::nvvm_d2ll_rz:
2500+
case Intrinsic::nvvm_d2ull_rz:
2501+
RMode = APFloat::rmTowardZero;
2502+
break;
2503+
default:
2504+
llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
2505+
}
2506+
assert(RM != APFloat::roundingMode::Invalid);
2507+
2508+
bool IsFTZ = false;
2509+
switch (IntrinsicID) {
2510+
case Intrinsic::nvvm_f2i_rm_ftz:
2511+
case Intrinsic::nvvm_f2i_rn_ftz:
2512+
case Intrinsic::nvvm_f2i_rp_ftz:
2513+
case Intrinsic::nvvm_f2i_rz_ftz:
2514+
case Intrinsic::nvvm_f2ui_rm_ftz:
2515+
case Intrinsic::nvvm_f2ui_rn_ftz:
2516+
case Intrinsic::nvvm_f2ui_rp_ftz:
2517+
case Intrinsic::nvvm_f2ui_rz_ftz:
2518+
case Intrinsic::nvvm_f2ll_rm_ftz:
2519+
case Intrinsic::nvvm_f2ll_rn_ftz:
2520+
case Intrinsic::nvvm_f2ll_rp_ftz:
2521+
case Intrinsic::nvvm_f2ll_rz_ftz:
2522+
case Intrinsic::nvvm_f2ull_rm_ftz:
2523+
case Intrinsic::nvvm_f2ull_rn_ftz:
2524+
case Intrinsic::nvvm_f2ull_rp_ftz:
2525+
case Intrinsic::nvvm_f2ull_rz_ftz:
2526+
IsFTZ = true;
2527+
break;
2528+
}
2529+
2530+
bool IsSigned = false;
2531+
switch (IntrinsicID) {
2532+
// f2i
2533+
case Intrinsic::nvvm_f2i_rm:
2534+
case Intrinsic::nvvm_f2i_rm_ftz:
2535+
case Intrinsic::nvvm_f2i_rn:
2536+
case Intrinsic::nvvm_f2i_rn_ftz:
2537+
case Intrinsic::nvvm_f2i_rp:
2538+
case Intrinsic::nvvm_f2i_rp_ftz:
2539+
case Intrinsic::nvvm_f2i_rz:
2540+
case Intrinsic::nvvm_f2i_rz_ftz:
2541+
// d2i
2542+
case Intrinsic::nvvm_d2i_rm:
2543+
case Intrinsic::nvvm_d2i_rn:
2544+
case Intrinsic::nvvm_d2i_rp:
2545+
case Intrinsic::nvvm_d2i_rz:
2546+
// f2ll
2547+
case Intrinsic::nvvm_f2ll_rm:
2548+
case Intrinsic::nvvm_f2ll_rm_ftz:
2549+
case Intrinsic::nvvm_f2ll_rn:
2550+
case Intrinsic::nvvm_f2ll_rn_ftz:
2551+
case Intrinsic::nvvm_f2ll_rp:
2552+
case Intrinsic::nvvm_f2ll_rp_ftz:
2553+
case Intrinsic::nvvm_f2ll_rz:
2554+
case Intrinsic::nvvm_f2ll_rz_ftz:
2555+
// d2ll
2556+
case Intrinsic::nvvm_d2ll_rm:
2557+
case Intrinsic::nvvm_d2ll_rn:
2558+
case Intrinsic::nvvm_d2ll_rp:
2559+
case Intrinsic::nvvm_d2ll_rz:
2560+
IsSigned = true;
2561+
break;
2562+
}
2563+
2564+
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2565+
auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
2566+
2567+
bool IsExact = false;
2568+
APFloat::opStatus Status =
2569+
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2570+
2571+
if (Status != APFloat::opInvalidOp)
2572+
return ConstantInt::get(Ty, ResInt);
2573+
return nullptr;
2574+
}
2575+
}
2576+
23122577
/// We only fold functions with finite arguments. Folding NaN and inf is
23132578
/// likely to be aborted with an exception anyway, and some host libms
23142579
/// have known errors raising exceptions.

0 commit comments

Comments
 (0)