|
45 | 45 | #include "llvm/IR/IntrinsicsAArch64.h" |
46 | 46 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
47 | 47 | #include "llvm/IR/IntrinsicsARM.h" |
| 48 | +#include "llvm/IR/IntrinsicsNVPTX.h" |
48 | 49 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
49 | 50 | #include "llvm/IR/IntrinsicsX86.h" |
50 | 51 | #include "llvm/IR/Operator.h" |
@@ -1687,6 +1688,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { |
1687 | 1688 | case Intrinsic::x86_avx512_cvttsd2usi64: |
1688 | 1689 | return !Call->isStrictFP(); |
1689 | 1690 |
|
| 1691 | + // NVVM float/double to int32/uint32 conversion intrinsics |
| 1692 | + case Intrinsic::nvvm_f2i_rm: |
| 1693 | + case Intrinsic::nvvm_f2i_rn: |
| 1694 | + case Intrinsic::nvvm_f2i_rp: |
| 1695 | + case Intrinsic::nvvm_f2i_rz: |
| 1696 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 1697 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 1698 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 1699 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 1700 | + case Intrinsic::nvvm_f2ui_rm: |
| 1701 | + case Intrinsic::nvvm_f2ui_rn: |
| 1702 | + case Intrinsic::nvvm_f2ui_rp: |
| 1703 | + case Intrinsic::nvvm_f2ui_rz: |
| 1704 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 1705 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 1706 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 1707 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 1708 | + case Intrinsic::nvvm_d2i_rm: |
| 1709 | + case Intrinsic::nvvm_d2i_rn: |
| 1710 | + case Intrinsic::nvvm_d2i_rp: |
| 1711 | + case Intrinsic::nvvm_d2i_rz: |
| 1712 | + case Intrinsic::nvvm_d2ui_rm: |
| 1713 | + case Intrinsic::nvvm_d2ui_rn: |
| 1714 | + case Intrinsic::nvvm_d2ui_rp: |
| 1715 | + case Intrinsic::nvvm_d2ui_rz: |
| 1716 | + |
| 1717 | + // NVVM float/double to int64/uint64 conversion intrinsics |
| 1718 | + case Intrinsic::nvvm_f2ll_rm: |
| 1719 | + case Intrinsic::nvvm_f2ll_rn: |
| 1720 | + case Intrinsic::nvvm_f2ll_rp: |
| 1721 | + case Intrinsic::nvvm_f2ll_rz: |
| 1722 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 1723 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 1724 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 1725 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 1726 | + case Intrinsic::nvvm_f2ull_rm: |
| 1727 | + case Intrinsic::nvvm_f2ull_rn: |
| 1728 | + case Intrinsic::nvvm_f2ull_rp: |
| 1729 | + case Intrinsic::nvvm_f2ull_rz: |
| 1730 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 1731 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 1732 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 1733 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 1734 | + case Intrinsic::nvvm_d2ll_rm: |
| 1735 | + case Intrinsic::nvvm_d2ll_rn: |
| 1736 | + case Intrinsic::nvvm_d2ll_rp: |
| 1737 | + case Intrinsic::nvvm_d2ll_rz: |
| 1738 | + case Intrinsic::nvvm_d2ull_rm: |
| 1739 | + case Intrinsic::nvvm_d2ull_rn: |
| 1740 | + case Intrinsic::nvvm_d2ull_rp: |
| 1741 | + case Intrinsic::nvvm_d2ull_rz: |
| 1742 | + |
1690 | 1743 | // Sign operations are actually bitwise operations, they do not raise |
1691 | 1744 | // exceptions even for SNANs. |
1692 | 1745 | case Intrinsic::fabs: |
@@ -1849,6 +1902,13 @@ inline bool llvm_fenv_testexcept() { |
1849 | 1902 | return false; |
1850 | 1903 | } |
1851 | 1904 |
|
| 1905 | +static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) { |
| 1906 | + if (V.isDenormal()) |
| 1907 | + return APFloat::getZero(Ty->getFltSemantics(), V.isNegative()); |
| 1908 | + |
| 1909 | + return V; |
| 1910 | +} |
| 1911 | + |
1852 | 1912 | Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, |
1853 | 1913 | Type *Ty) { |
1854 | 1914 | llvm_fenv_clearexcept(); |
@@ -2309,6 +2369,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, |
2309 | 2369 | return ConstantFP::get(Ty->getContext(), U); |
2310 | 2370 | } |
2311 | 2371 |
|
| 2372 | + // NVVM float/double to signed/unsigned int32/int64 conversions: |
| 2373 | + switch (IntrinsicID) { |
| 2374 | + // f2i |
| 2375 | + case Intrinsic::nvvm_f2i_rm: |
| 2376 | + case Intrinsic::nvvm_f2i_rn: |
| 2377 | + case Intrinsic::nvvm_f2i_rp: |
| 2378 | + case Intrinsic::nvvm_f2i_rz: |
| 2379 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 2380 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 2381 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 2382 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 2383 | + // f2ui |
| 2384 | + case Intrinsic::nvvm_f2ui_rm: |
| 2385 | + case Intrinsic::nvvm_f2ui_rn: |
| 2386 | + case Intrinsic::nvvm_f2ui_rp: |
| 2387 | + case Intrinsic::nvvm_f2ui_rz: |
| 2388 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 2389 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 2390 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 2391 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 2392 | + // d2i |
| 2393 | + case Intrinsic::nvvm_d2i_rm: |
| 2394 | + case Intrinsic::nvvm_d2i_rn: |
| 2395 | + case Intrinsic::nvvm_d2i_rp: |
| 2396 | + case Intrinsic::nvvm_d2i_rz: |
| 2397 | + // d2ui |
| 2398 | + case Intrinsic::nvvm_d2ui_rm: |
| 2399 | + case Intrinsic::nvvm_d2ui_rn: |
| 2400 | + case Intrinsic::nvvm_d2ui_rp: |
| 2401 | + case Intrinsic::nvvm_d2ui_rz: |
| 2402 | + // f2ll |
| 2403 | + case Intrinsic::nvvm_f2ll_rm: |
| 2404 | + case Intrinsic::nvvm_f2ll_rn: |
| 2405 | + case Intrinsic::nvvm_f2ll_rp: |
| 2406 | + case Intrinsic::nvvm_f2ll_rz: |
| 2407 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 2408 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 2409 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 2410 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 2411 | + // f2ull |
| 2412 | + case Intrinsic::nvvm_f2ull_rm: |
| 2413 | + case Intrinsic::nvvm_f2ull_rn: |
| 2414 | + case Intrinsic::nvvm_f2ull_rp: |
| 2415 | + case Intrinsic::nvvm_f2ull_rz: |
| 2416 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 2417 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 2418 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 2419 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 2420 | + // d2ll |
| 2421 | + case Intrinsic::nvvm_d2ll_rm: |
| 2422 | + case Intrinsic::nvvm_d2ll_rn: |
| 2423 | + case Intrinsic::nvvm_d2ll_rp: |
| 2424 | + case Intrinsic::nvvm_d2ll_rz: |
| 2425 | + // d2ull |
| 2426 | + case Intrinsic::nvvm_d2ull_rm: |
| 2427 | + case Intrinsic::nvvm_d2ull_rn: |
| 2428 | + case Intrinsic::nvvm_d2ull_rp: |
| 2429 | + case Intrinsic::nvvm_d2ull_rz: { |
| 2430 | + // In float-to-integer conversion, NaN inputs are converted to 0. |
| 2431 | + if (U.isNaN()) |
| 2432 | + return ConstantInt::get(Ty, 0); |
| 2433 | + |
| 2434 | + APFloat::roundingMode RMode = APFloat::roundingMode::Invalid; |
| 2435 | + switch (IntrinsicID) { |
| 2436 | + // i_rm |
| 2437 | + case Intrinsic::nvvm_f2i_rm: |
| 2438 | + case Intrinsic::nvvm_f2ui_rm: |
| 2439 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 2440 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 2441 | + case Intrinsic::nvvm_d2i_rm: |
| 2442 | + case Intrinsic::nvvm_d2ui_rm: |
| 2443 | + // ll_rm |
| 2444 | + case Intrinsic::nvvm_f2ll_rm: |
| 2445 | + case Intrinsic::nvvm_f2ull_rm: |
| 2446 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 2447 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 2448 | + case Intrinsic::nvvm_d2ll_rm: |
| 2449 | + case Intrinsic::nvvm_d2ull_rm: |
| 2450 | + RMode = APFloat::rmTowardNegative; |
| 2451 | + break; |
| 2452 | + |
| 2453 | + // i_rn |
| 2454 | + case Intrinsic::nvvm_f2i_rn: |
| 2455 | + case Intrinsic::nvvm_f2ui_rn: |
| 2456 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 2457 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 2458 | + case Intrinsic::nvvm_d2i_rn: |
| 2459 | + case Intrinsic::nvvm_d2ui_rn: |
| 2460 | + // ll_rn |
| 2461 | + case Intrinsic::nvvm_f2ll_rn: |
| 2462 | + case Intrinsic::nvvm_f2ull_rn: |
| 2463 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 2464 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 2465 | + case Intrinsic::nvvm_d2ll_rn: |
| 2466 | + case Intrinsic::nvvm_d2ull_rn: |
| 2467 | + RMode = APFloat::rmNearestTiesToEven; |
| 2468 | + break; |
| 2469 | + |
| 2470 | + // i_rp |
| 2471 | + case Intrinsic::nvvm_f2i_rp: |
| 2472 | + case Intrinsic::nvvm_f2ui_rp: |
| 2473 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 2474 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 2475 | + case Intrinsic::nvvm_d2i_rp: |
| 2476 | + case Intrinsic::nvvm_d2ui_rp: |
| 2477 | + // ll_rp |
| 2478 | + case Intrinsic::nvvm_f2ll_rp: |
| 2479 | + case Intrinsic::nvvm_f2ull_rp: |
| 2480 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 2481 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 2482 | + case Intrinsic::nvvm_d2ll_rp: |
| 2483 | + case Intrinsic::nvvm_d2ull_rp: |
| 2484 | + RMode = APFloat::rmTowardPositive; |
| 2485 | + break; |
| 2486 | + |
| 2487 | + // i_rz |
| 2488 | + case Intrinsic::nvvm_f2i_rz: |
| 2489 | + case Intrinsic::nvvm_f2ui_rz: |
| 2490 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 2491 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 2492 | + case Intrinsic::nvvm_d2i_rz: |
| 2493 | + case Intrinsic::nvvm_d2ui_rz: |
| 2494 | + // ll_rz |
| 2495 | + case Intrinsic::nvvm_f2ll_rz: |
| 2496 | + case Intrinsic::nvvm_f2ull_rz: |
| 2497 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 2498 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 2499 | + case Intrinsic::nvvm_d2ll_rz: |
| 2500 | + case Intrinsic::nvvm_d2ull_rz: |
| 2501 | + RMode = APFloat::rmTowardZero; |
| 2502 | + break; |
| 2503 | + default: |
| 2504 | + llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); |
| 2505 | + } |
| 2506 | + assert(RM != APFloat::roundingMode::Invalid); |
| 2507 | + |
| 2508 | + bool IsFTZ = false; |
| 2509 | + switch (IntrinsicID) { |
| 2510 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 2511 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 2512 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 2513 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 2514 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 2515 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 2516 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 2517 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 2518 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 2519 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 2520 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 2521 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 2522 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 2523 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 2524 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 2525 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 2526 | + IsFTZ = true; |
| 2527 | + break; |
| 2528 | + } |
| 2529 | + |
| 2530 | + bool IsSigned = false; |
| 2531 | + switch (IntrinsicID) { |
| 2532 | + // f2i |
| 2533 | + case Intrinsic::nvvm_f2i_rm: |
| 2534 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 2535 | + case Intrinsic::nvvm_f2i_rn: |
| 2536 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 2537 | + case Intrinsic::nvvm_f2i_rp: |
| 2538 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 2539 | + case Intrinsic::nvvm_f2i_rz: |
| 2540 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 2541 | + // d2i |
| 2542 | + case Intrinsic::nvvm_d2i_rm: |
| 2543 | + case Intrinsic::nvvm_d2i_rn: |
| 2544 | + case Intrinsic::nvvm_d2i_rp: |
| 2545 | + case Intrinsic::nvvm_d2i_rz: |
| 2546 | + // f2ll |
| 2547 | + case Intrinsic::nvvm_f2ll_rm: |
| 2548 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 2549 | + case Intrinsic::nvvm_f2ll_rn: |
| 2550 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 2551 | + case Intrinsic::nvvm_f2ll_rp: |
| 2552 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 2553 | + case Intrinsic::nvvm_f2ll_rz: |
| 2554 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 2555 | + // d2ll |
| 2556 | + case Intrinsic::nvvm_d2ll_rm: |
| 2557 | + case Intrinsic::nvvm_d2ll_rn: |
| 2558 | + case Intrinsic::nvvm_d2ll_rp: |
| 2559 | + case Intrinsic::nvvm_d2ll_rz: |
| 2560 | + IsSigned = true; |
| 2561 | + break; |
| 2562 | + } |
| 2563 | + |
| 2564 | + APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); |
| 2565 | + auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U; |
| 2566 | + |
| 2567 | + bool IsExact = false; |
| 2568 | + APFloat::opStatus Status = |
| 2569 | + FloatToRound.convertToInteger(ResInt, RMode, &IsExact); |
| 2570 | + |
| 2571 | + if (Status != APFloat::opInvalidOp) |
| 2572 | + return ConstantInt::get(Ty, ResInt); |
| 2573 | + return nullptr; |
| 2574 | + } |
| 2575 | + } |
| 2576 | + |
2312 | 2577 | /// We only fold functions with finite arguments. Folding NaN and inf is |
2313 | 2578 | /// likely to be aborted with an exception anyway, and some host libms |
2314 | 2579 | /// have known errors raising exceptions. |
|
0 commit comments