Skip to content

Commit 9854230

Browse files
committed
Remove all approximate intrinsic folding
1 parent ca39bbc commit 9854230

File tree

3 files changed

+1
-515
lines changed

3 files changed

+1
-515
lines changed

llvm/include/llvm/IR/NVVMIntrinsicUtils.h

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -340,40 +340,24 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
340340
inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
341341
switch (IntrinsicID) {
342342
case Intrinsic::nvvm_ceil_ftz_f:
343-
case Intrinsic::nvvm_cos_approx_ftz_f:
344-
case Intrinsic::nvvm_ex2_approx_ftz_f:
345343
case Intrinsic::nvvm_fabs_ftz:
346344
case Intrinsic::nvvm_floor_ftz_f:
347-
case Intrinsic::nvvm_lg2_approx_ftz_f:
348345
case Intrinsic::nvvm_round_ftz_f:
349-
case Intrinsic::nvvm_rsqrt_approx_ftz_d:
350-
case Intrinsic::nvvm_rsqrt_approx_ftz_f:
351346
case Intrinsic::nvvm_saturate_ftz_f:
352-
case Intrinsic::nvvm_sin_approx_ftz_f:
353347
case Intrinsic::nvvm_sqrt_rn_ftz_f:
354-
case Intrinsic::nvvm_sqrt_approx_ftz_f:
355348
return true;
356349
case Intrinsic::nvvm_ceil_f:
357350
case Intrinsic::nvvm_ceil_d:
358-
case Intrinsic::nvvm_cos_approx_f:
359-
case Intrinsic::nvvm_ex2_approx_d:
360-
case Intrinsic::nvvm_ex2_approx_f:
361351
case Intrinsic::nvvm_fabs:
362352
case Intrinsic::nvvm_floor_f:
363353
case Intrinsic::nvvm_floor_d:
364-
case Intrinsic::nvvm_lg2_approx_d:
365-
case Intrinsic::nvvm_lg2_approx_f:
366354
case Intrinsic::nvvm_round_f:
367355
case Intrinsic::nvvm_round_d:
368-
case Intrinsic::nvvm_rsqrt_approx_d:
369-
case Intrinsic::nvvm_rsqrt_approx_f:
370356
case Intrinsic::nvvm_saturate_d:
371357
case Intrinsic::nvvm_saturate_f:
372-
case Intrinsic::nvvm_sin_approx_f:
373358
case Intrinsic::nvvm_sqrt_f:
374359
case Intrinsic::nvvm_sqrt_rn_d:
375360
case Intrinsic::nvvm_sqrt_rn_f:
376-
case Intrinsic::nvvm_sqrt_approx_f:
377361
return false;
378362
}
379363
llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
@@ -385,8 +369,6 @@ inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
385369
case Intrinsic::nvvm_rcp_rn_ftz_f:
386370
case Intrinsic::nvvm_rcp_rp_ftz_f:
387371
case Intrinsic::nvvm_rcp_rz_ftz_f:
388-
case Intrinsic::nvvm_rcp_approx_ftz_f:
389-
case Intrinsic::nvvm_rcp_approx_ftz_d:
390372
return true;
391373
case Intrinsic::nvvm_rcp_rm_d:
392374
case Intrinsic::nvvm_rcp_rm_f:
@@ -408,8 +390,6 @@ inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
408390
case Intrinsic::nvvm_rcp_rm_ftz_f:
409391
return APFloat::rmTowardNegative;
410392

411-
case Intrinsic::nvvm_rcp_approx_ftz_f:
412-
case Intrinsic::nvvm_rcp_approx_ftz_d:
413393
case Intrinsic::nvvm_rcp_rn_f:
414394
case Intrinsic::nvvm_rcp_rn_d:
415395
case Intrinsic::nvvm_rcp_rn_ftz_f:
@@ -428,32 +408,6 @@ inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
428408
llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
429409
}
430410

431-
inline bool RCPIsApprox(Intrinsic::ID IntrinsicID) {
432-
switch (IntrinsicID) {
433-
case Intrinsic::nvvm_rcp_approx_ftz_f:
434-
case Intrinsic::nvvm_rcp_approx_ftz_d:
435-
return true;
436-
437-
case Intrinsic::nvvm_rcp_rm_f:
438-
case Intrinsic::nvvm_rcp_rm_d:
439-
case Intrinsic::nvvm_rcp_rm_ftz_f:
440-
441-
case Intrinsic::nvvm_rcp_rn_f:
442-
case Intrinsic::nvvm_rcp_rn_d:
443-
case Intrinsic::nvvm_rcp_rn_ftz_f:
444-
445-
case Intrinsic::nvvm_rcp_rp_f:
446-
case Intrinsic::nvvm_rcp_rp_d:
447-
case Intrinsic::nvvm_rcp_rp_ftz_f:
448-
449-
case Intrinsic::nvvm_rcp_rz_f:
450-
case Intrinsic::nvvm_rcp_rz_d:
451-
case Intrinsic::nvvm_rcp_rz_ftz_f:
452-
return false;
453-
}
454-
llvm_unreachable("Checking approx flag for invalid rcp intrinsic");
455-
}
456-
457411
inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
458412
if (ShouldFTZ)
459413
return DenormalMode::getPreserveSign();

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,24 +1805,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
18051805
case Intrinsic::nvvm_ceil_f:
18061806
case Intrinsic::nvvm_ceil_ftz_f:
18071807

1808-
case Intrinsic::nvvm_cos_approx_f:
1809-
case Intrinsic::nvvm_cos_approx_ftz_f:
1810-
1811-
case Intrinsic::nvvm_ex2_approx_d:
1812-
case Intrinsic::nvvm_ex2_approx_f:
1813-
case Intrinsic::nvvm_ex2_approx_ftz_f:
1814-
18151808
case Intrinsic::nvvm_fabs:
18161809
case Intrinsic::nvvm_fabs_ftz:
18171810

18181811
case Intrinsic::nvvm_floor_d:
18191812
case Intrinsic::nvvm_floor_f:
18201813
case Intrinsic::nvvm_floor_ftz_f:
18211814

1822-
case Intrinsic::nvvm_lg2_approx_d:
1823-
case Intrinsic::nvvm_lg2_approx_f:
1824-
case Intrinsic::nvvm_lg2_approx_ftz_f:
1825-
18261815
case Intrinsic::nvvm_rcp_rm_d:
18271816
case Intrinsic::nvvm_rcp_rm_f:
18281817
case Intrinsic::nvvm_rcp_rm_ftz_f:
@@ -1835,31 +1824,19 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
18351824
case Intrinsic::nvvm_rcp_rz_d:
18361825
case Intrinsic::nvvm_rcp_rz_f:
18371826
case Intrinsic::nvvm_rcp_rz_ftz_f:
1838-
case Intrinsic::nvvm_rcp_approx_ftz_d:
1839-
case Intrinsic::nvvm_rcp_approx_ftz_f:
18401827

18411828
case Intrinsic::nvvm_round_d:
18421829
case Intrinsic::nvvm_round_f:
18431830
case Intrinsic::nvvm_round_ftz_f:
18441831

1845-
case Intrinsic::nvvm_rsqrt_approx_d:
1846-
case Intrinsic::nvvm_rsqrt_approx_f:
1847-
case Intrinsic::nvvm_rsqrt_approx_ftz_d:
1848-
case Intrinsic::nvvm_rsqrt_approx_ftz_f:
1849-
18501832
case Intrinsic::nvvm_saturate_d:
18511833
case Intrinsic::nvvm_saturate_f:
18521834
case Intrinsic::nvvm_saturate_ftz_f:
18531835

1854-
case Intrinsic::nvvm_sin_approx_f:
1855-
case Intrinsic::nvvm_sin_approx_ftz_f:
1856-
18571836
case Intrinsic::nvvm_sqrt_f:
18581837
case Intrinsic::nvvm_sqrt_rn_d:
18591838
case Intrinsic::nvvm_sqrt_rn_f:
18601839
case Intrinsic::nvvm_sqrt_rn_ftz_f:
1861-
case Intrinsic::nvvm_sqrt_approx_f:
1862-
case Intrinsic::nvvm_sqrt_approx_ftz_f:
18631840
return !Call->isStrictFP();
18641841

18651842
// Sign operations are actually bitwise operations, they do not raise
@@ -2025,15 +2002,6 @@ inline bool llvm_fenv_testexcept() {
20252002
return false;
20262003
}
20272004

2028-
// Get only the upper word of the input double in 1.11.20 format
2029-
// by making the lower 32-bits of the mantissa all 0.
2030-
static const APFloat ZeroLower32Bits(const APFloat &V) {
2031-
assert(V.getSizeInBits(V.getSemantics()) == 64);
2032-
uint64_t DoubleBits = V.bitcastToAPInt().getZExtValue();
2033-
DoubleBits &= 0xffffffff00000000;
2034-
return APFloat(V.getSemantics(), APInt(64, DoubleBits, false, false));
2035-
}
2036-
20372005
static const APFloat FTZPreserveSign(const APFloat &V) {
20382006
if (V.isDenormal())
20392007
return APFloat::getZero(V.getSemantics(), V.isNegative());
@@ -2663,21 +2631,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26632631
nvvm::GetNVVMDenromMode(
26642632
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
26652633

2666-
case Intrinsic::nvvm_cos_approx_ftz_f:
2667-
case Intrinsic::nvvm_cos_approx_f:
2668-
return ConstantFoldFP(
2669-
cos, APF, Ty,
2670-
nvvm::GetNVVMDenromMode(
2671-
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2672-
2673-
case Intrinsic::nvvm_ex2_approx_ftz_f:
2674-
case Intrinsic::nvvm_ex2_approx_d:
2675-
case Intrinsic::nvvm_ex2_approx_f:
2676-
return ConstantFoldFP(
2677-
exp2, APF, Ty,
2678-
nvvm::GetNVVMDenromMode(
2679-
(nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))));
2680-
26812634
case Intrinsic::nvvm_fabs_ftz:
26822635
case Intrinsic::nvvm_fabs:
26832636
return ConstantFoldFP(
@@ -2693,23 +2646,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26932646
nvvm::GetNVVMDenromMode(
26942647
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
26952648

2696-
case Intrinsic::nvvm_lg2_approx_ftz_f:
2697-
case Intrinsic::nvvm_lg2_approx_d:
2698-
case Intrinsic::nvvm_lg2_approx_f: {
2699-
if (APF.isNegative() || APF.isZero())
2700-
return nullptr;
2701-
return ConstantFoldFP(
2702-
log2, APF, Ty,
2703-
nvvm::GetNVVMDenromMode(
2704-
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2705-
}
2706-
27072649
case Intrinsic::nvvm_rcp_rm_ftz_f:
27082650
case Intrinsic::nvvm_rcp_rn_ftz_f:
27092651
case Intrinsic::nvvm_rcp_rp_ftz_f:
27102652
case Intrinsic::nvvm_rcp_rz_ftz_f:
2711-
case Intrinsic::nvvm_rcp_approx_ftz_f:
2712-
case Intrinsic::nvvm_rcp_approx_ftz_d:
27132653
case Intrinsic::nvvm_rcp_rm_d:
27142654
case Intrinsic::nvvm_rcp_rm_f:
27152655
case Intrinsic::nvvm_rcp_rn_d:
@@ -2719,26 +2659,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27192659
case Intrinsic::nvvm_rcp_rz_d:
27202660
case Intrinsic::nvvm_rcp_rz_f: {
27212661
APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
2722-
bool IsApprox = nvvm::RCPIsApprox(IntrinsicID);
27232662
bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
27242663

27252664
auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
2726-
if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2727-
Denominator = ZeroLower32Bits(Denominator);
2728-
if (IsApprox && Denominator.isZero()) {
2729-
// According to the PTX spec, approximate rcp should return infinity
2730-
// with the same sign as the denominator when dividing by 0.
2731-
APFloat Inf = APFloat::getInf(APF.getSemantics(), APF.isNegative());
2732-
return ConstantFP::get(Ty->getContext(), Inf);
2733-
}
27342665
APFloat Res = APFloat::getOne(APF.getSemantics());
27352666
APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
27362667

27372668
if (Status == APFloat::opOK || Status == APFloat::opInexact) {
27382669
if (IsFTZ)
27392670
Res = FTZPreserveSign(Res);
2740-
if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2741-
Res = ZeroLower32Bits(Res);
27422671
return ConstantFP::get(Ty->getContext(), Res);
27432672
}
27442673
return nullptr;
@@ -2752,37 +2681,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27522681
nvvm::GetNVVMDenromMode(
27532682
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
27542683

2755-
case Intrinsic::nvvm_rsqrt_approx_ftz_d:
2756-
case Intrinsic::nvvm_rsqrt_approx_ftz_f:
2757-
case Intrinsic::nvvm_rsqrt_approx_d:
2758-
case Intrinsic::nvvm_rsqrt_approx_f: {
2759-
bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2760-
auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2761-
2762-
if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2763-
V = ZeroLower32Bits(V);
2764-
2765-
APFloat SqrtV(sqrt(V.convertToDouble()));
2766-
2767-
if (Ty->isFloatTy()) {
2768-
bool lost;
2769-
SqrtV.convert(APF.getSemantics(), APFloat::rmNearestTiesToEven,
2770-
&lost);
2771-
}
2772-
2773-
APFloat Res = APFloat::getOne(APF.getSemantics());
2774-
Res.divide(SqrtV, APFloat::rmNearestTiesToEven);
2775-
2776-
if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2777-
Res = ZeroLower32Bits(Res);
2778-
2779-
// We do not need to flush the output for ftz because it is impossible
2780-
// for 1/sqrt(x) to be a denormal value. If x is the largest fp value,
2781-
// sqrt(x) will be a number with the exponent approximately halved and
2782-
// the reciprocal of that number can't be small enough to be denormal.
2783-
return ConstantFP::get(Ty->getContext(), Res);
2784-
}
2785-
27862684
case Intrinsic::nvvm_saturate_ftz_f:
27872685
case Intrinsic::nvvm_saturate_d:
27882686
case Intrinsic::nvvm_saturate_f: {
@@ -2796,19 +2694,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27962694
return ConstantFP::get(Ty->getContext(), APF);
27972695
}
27982696

2799-
case Intrinsic::nvvm_sin_approx_ftz_f:
2800-
case Intrinsic::nvvm_sin_approx_f:
2801-
return ConstantFoldFP(
2802-
sin, APF, Ty,
2803-
nvvm::GetNVVMDenromMode(
2804-
nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2805-
28062697
case Intrinsic::nvvm_sqrt_rn_ftz_f:
2807-
case Intrinsic::nvvm_sqrt_approx_ftz_f:
28082698
case Intrinsic::nvvm_sqrt_f:
28092699
case Intrinsic::nvvm_sqrt_rn_d:
28102700
case Intrinsic::nvvm_sqrt_rn_f:
2811-
case Intrinsic::nvvm_sqrt_approx_f:
28122701
if (APF.isNegative())
28132702
return nullptr;
28142703
return ConstantFoldFP(

0 commit comments

Comments
 (0)