@@ -2006,6 +2006,15 @@ static const APFloat FTZPreserveSign(const APFloat &V) {
20062006 return V;
20072007}
20082008
2009+ // Get only the upper word of the input double in 1.11.20 format
2010+ // by making the lower 32-bits of the mantissa all 0.
2011+ static const APFloat ZeroLower32Bits (const APFloat &V) {
2012+ assert (V.getSizeInBits (V.getSemantics ()) == 64 );
2013+ uint64_t DoubleBits = V.bitcastToAPInt ().getZExtValue ();
2014+ DoubleBits &= 0xffffffff00000000 ;
2015+ return APFloat (V.getSemantics (), APInt (64 , DoubleBits, false , false ));
2016+ }
2017+
20092018Constant *ConstantFoldFP (double (*NativeFP)(double ), const APFloat &V, Type *Ty,
20102019 bool ShouldFTZPreservingSign = false) {
20112020 llvm_fenv_clearexcept ();
@@ -2651,6 +2660,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26512660 bool IsFTZ = nvvm::RCPShouldFTZ (IntrinsicID);
26522661
26532662 auto Denominator = IsFTZ ? FTZPreserveSign (APF) : APF;
2663+ if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2664+ Denominator = ZeroLower32Bits (Denominator);
26542665 if (IsApprox && Denominator.isZero ()) {
26552666 // According to the PTX spec, approximate rcp should return infinity
26562667 // with the same sign as the denominator when dividing by 0.
@@ -2663,6 +2674,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26632674 if (Status == APFloat::opOK || Status == APFloat::opInexact) {
26642675 if (IsFTZ)
26652676 Res = FTZPreserveSign (Res);
2677+ if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2678+ Res = ZeroLower32Bits (Res);
26662679 return ConstantFP::get (Ty->getContext (), Res);
26672680 }
26682681 return nullptr ;
@@ -2680,14 +2693,24 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26802693 case Intrinsic::nvvm_rsqrt_approx_f: {
26812694 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID);
26822695 auto V = IsFTZ ? FTZPreserveSign (APF) : APF;
2696+
2697+ if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2698+ V = ZeroLower32Bits (V);
2699+
26832700 APFloat SqrtV (sqrt (V.convertToDouble ()));
26842701
2685- bool lost;
2686- SqrtV.convert (APF.getSemantics (), APFloat::rmNearestTiesToEven, &lost);
2702+ if (Ty->isFloatTy ()) {
2703+ bool lost;
2704+ SqrtV.convert (APF.getSemantics (), APFloat::rmNearestTiesToEven,
2705+ &lost);
2706+ }
26872707
26882708 APFloat Res = APFloat::getOne (APF.getSemantics ());
26892709 Res.divide (SqrtV, APFloat::rmNearestTiesToEven);
26902710
2711+ if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2712+ Res = ZeroLower32Bits (Res);
2713+
26912714 // We do not need to flush the output for ftz because it is impossible
26922715 // for 1/sqrt(x) to be a denormal value. If x is the largest fp value,
26932716 // sqrt(x) will be a number with the exponent approximately halved and
0 commit comments