@@ -1801,6 +1801,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
1801
1801
case Intrinsic::nvvm_d2ull_rn:
1802
1802
case Intrinsic::nvvm_d2ull_rp:
1803
1803
case Intrinsic::nvvm_d2ull_rz:
1804
+
1805
+ // NVVM math intrinsics:
1806
+ case Intrinsic::nvvm_ceil_d:
1807
+ case Intrinsic::nvvm_ceil_f:
1808
+ case Intrinsic::nvvm_ceil_ftz_f:
1809
+
1810
+ case Intrinsic::nvvm_fabs:
1811
+ case Intrinsic::nvvm_fabs_ftz:
1812
+
1813
+ case Intrinsic::nvvm_floor_d:
1814
+ case Intrinsic::nvvm_floor_f:
1815
+ case Intrinsic::nvvm_floor_ftz_f:
1816
+
1817
+ case Intrinsic::nvvm_rcp_rm_d:
1818
+ case Intrinsic::nvvm_rcp_rm_f:
1819
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
1820
+ case Intrinsic::nvvm_rcp_rn_d:
1821
+ case Intrinsic::nvvm_rcp_rn_f:
1822
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
1823
+ case Intrinsic::nvvm_rcp_rp_d:
1824
+ case Intrinsic::nvvm_rcp_rp_f:
1825
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
1826
+ case Intrinsic::nvvm_rcp_rz_d:
1827
+ case Intrinsic::nvvm_rcp_rz_f:
1828
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
1829
+
1830
+ case Intrinsic::nvvm_round_d:
1831
+ case Intrinsic::nvvm_round_f:
1832
+ case Intrinsic::nvvm_round_ftz_f:
1833
+
1834
+ case Intrinsic::nvvm_saturate_d:
1835
+ case Intrinsic::nvvm_saturate_f:
1836
+ case Intrinsic::nvvm_saturate_ftz_f:
1837
+
1838
+ case Intrinsic::nvvm_sqrt_f:
1839
+ case Intrinsic::nvvm_sqrt_rn_d:
1840
+ case Intrinsic::nvvm_sqrt_rn_f:
1841
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
1804
1842
return !Call->isStrictFP ();
1805
1843
1806
1844
// Sign operations are actually bitwise operations, they do not raise
@@ -1818,6 +1856,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
1818
1856
case Intrinsic::nearbyint:
1819
1857
case Intrinsic::rint:
1820
1858
case Intrinsic::canonicalize:
1859
+
1821
1860
// Constrained intrinsics can be folded if FP environment is known
1822
1861
// to compiler.
1823
1862
case Intrinsic::experimental_constrained_fma:
@@ -1965,22 +2004,56 @@ inline bool llvm_fenv_testexcept() {
1965
2004
return false ;
1966
2005
}
1967
2006
1968
- static APFloat FTZPreserveSign (const APFloat &V) {
2007
+ static const APFloat FTZPreserveSign (const APFloat &V) {
1969
2008
if (V.isDenormal ())
1970
2009
return APFloat::getZero (V.getSemantics (), V.isNegative ());
1971
2010
return V;
1972
2011
}
1973
2012
1974
- Constant *ConstantFoldFP (double (*NativeFP)(double ), const APFloat &V,
1975
- Type *Ty) {
2013
+ static const APFloat FlushToPositiveZero (const APFloat &V) {
2014
+ if (V.isDenormal ())
2015
+ return APFloat::getZero (V.getSemantics (), false );
2016
+ return V;
2017
+ }
2018
+
2019
+ static const APFloat
2020
+ FlushWithDenormKind (const APFloat &V,
2021
+ DenormalMode::DenormalModeKind DenormKind) {
2022
+ assert (DenormKind != DenormalMode::DenormalModeKind::Invalid &&
2023
+ DenormKind != DenormalMode::DenormalModeKind::Dynamic);
2024
+ switch (DenormKind) {
2025
+ case DenormalMode::DenormalModeKind::IEEE:
2026
+ return V;
2027
+ case DenormalMode::DenormalModeKind::PreserveSign:
2028
+ return FTZPreserveSign (V);
2029
+ case DenormalMode::DenormalModeKind::PositiveZero:
2030
+ return FlushToPositiveZero (V);
2031
+ default :
2032
+ llvm_unreachable (" Invalid denormal mode!" );
2033
+ }
2034
+ }
2035
+
2036
+ Constant *ConstantFoldFP (double (*NativeFP)(double ), const APFloat &V, Type *Ty,
2037
+ DenormalMode DenormMode = DenormalMode::getIEEE()) {
2038
+ if (!DenormMode.isValid () ||
2039
+ DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
2040
+ DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
2041
+ return nullptr ;
2042
+
1976
2043
llvm_fenv_clearexcept ();
1977
- double Result = NativeFP (V.convertToDouble ());
2044
+ auto Input = FlushWithDenormKind (V, DenormMode.Input );
2045
+ double Result = NativeFP (Input.convertToDouble ());
1978
2046
if (llvm_fenv_testexcept ()) {
1979
2047
llvm_fenv_clearexcept ();
1980
2048
return nullptr ;
1981
2049
}
1982
2050
1983
- return GetConstantFoldFPValue (Result, Ty);
2051
+ Constant *Output = GetConstantFoldFPValue (Result, Ty);
2052
+ if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
2053
+ return Output;
2054
+ const auto *CFP = static_cast <ConstantFP *>(Output);
2055
+ const auto Res = FlushWithDenormKind (CFP->getValueAPF (), DenormMode.Output );
2056
+ return ConstantFP::get (Ty->getContext (), Res);
1984
2057
}
1985
2058
1986
2059
#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
@@ -2550,6 +2623,91 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
2550
2623
return ConstantFoldFP (atan, APF, Ty);
2551
2624
case Intrinsic::sqrt:
2552
2625
return ConstantFoldFP (sqrt, APF, Ty);
2626
+
2627
+ // NVVM Intrinsics:
2628
+ case Intrinsic::nvvm_ceil_ftz_f:
2629
+ case Intrinsic::nvvm_ceil_f:
2630
+ case Intrinsic::nvvm_ceil_d:
2631
+ return ConstantFoldFP (
2632
+ ceil, APF, Ty,
2633
+ nvvm::GetNVVMDenromMode (
2634
+ nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2635
+
2636
+ case Intrinsic::nvvm_fabs_ftz:
2637
+ case Intrinsic::nvvm_fabs:
2638
+ return ConstantFoldFP (
2639
+ fabs, APF, Ty,
2640
+ nvvm::GetNVVMDenromMode (
2641
+ nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2642
+
2643
+ case Intrinsic::nvvm_floor_ftz_f:
2644
+ case Intrinsic::nvvm_floor_f:
2645
+ case Intrinsic::nvvm_floor_d:
2646
+ return ConstantFoldFP (
2647
+ floor, APF, Ty,
2648
+ nvvm::GetNVVMDenromMode (
2649
+ nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2650
+
2651
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
2652
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
2653
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
2654
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
2655
+ case Intrinsic::nvvm_rcp_rm_d:
2656
+ case Intrinsic::nvvm_rcp_rm_f:
2657
+ case Intrinsic::nvvm_rcp_rn_d:
2658
+ case Intrinsic::nvvm_rcp_rn_f:
2659
+ case Intrinsic::nvvm_rcp_rp_d:
2660
+ case Intrinsic::nvvm_rcp_rp_f:
2661
+ case Intrinsic::nvvm_rcp_rz_d:
2662
+ case Intrinsic::nvvm_rcp_rz_f: {
2663
+ APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode (IntrinsicID);
2664
+ bool IsFTZ = nvvm::RCPShouldFTZ (IntrinsicID);
2665
+
2666
+ auto Denominator = IsFTZ ? FTZPreserveSign (APF) : APF;
2667
+ APFloat Res = APFloat::getOne (APF.getSemantics ());
2668
+ APFloat::opStatus Status = Res.divide (Denominator, RoundMode);
2669
+
2670
+ if (Status == APFloat::opOK || Status == APFloat::opInexact) {
2671
+ if (IsFTZ)
2672
+ Res = FTZPreserveSign (Res);
2673
+ return ConstantFP::get (Ty->getContext (), Res);
2674
+ }
2675
+ return nullptr ;
2676
+ }
2677
+
2678
+ case Intrinsic::nvvm_round_ftz_f:
2679
+ case Intrinsic::nvvm_round_f:
2680
+ case Intrinsic::nvvm_round_d:
2681
+ return ConstantFoldFP (
2682
+ round, APF, Ty,
2683
+ nvvm::GetNVVMDenromMode (
2684
+ nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2685
+
2686
+ case Intrinsic::nvvm_saturate_ftz_f:
2687
+ case Intrinsic::nvvm_saturate_d:
2688
+ case Intrinsic::nvvm_saturate_f: {
2689
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID);
2690
+ auto V = IsFTZ ? FTZPreserveSign (APF) : APF;
2691
+ if (V.isNegative () || V.isZero () || V.isNaN ())
2692
+ return ConstantFP::getZero (Ty);
2693
+ APFloat One = APFloat::getOne (APF.getSemantics ());
2694
+ if (V > One)
2695
+ return ConstantFP::get (Ty->getContext (), One);
2696
+ return ConstantFP::get (Ty->getContext (), APF);
2697
+ }
2698
+
2699
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
2700
+ case Intrinsic::nvvm_sqrt_f:
2701
+ case Intrinsic::nvvm_sqrt_rn_d:
2702
+ case Intrinsic::nvvm_sqrt_rn_f:
2703
+ if (APF.isNegative ())
2704
+ return nullptr ;
2705
+ return ConstantFoldFP (
2706
+ sqrt, APF, Ty,
2707
+ nvvm::GetNVVMDenromMode (
2708
+ nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2709
+
2710
+ // AMDGCN Intrinsics:
2553
2711
case Intrinsic::amdgcn_cos:
2554
2712
case Intrinsic::amdgcn_sin: {
2555
2713
double V = getValueAsDouble (Op);
0 commit comments