|
| 1 | +//===-- include/flang/Common/erfc-scaled.h-----------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef FORTRAN_COMMON_ERFC_SCALED_H_ |
| 10 | +#define FORTRAN_COMMON_ERFC_SCALED_H_ |
| 11 | + |
| 12 | +namespace Fortran::common { |
| 13 | +template <typename T> inline T ErfcScaled(T arg) { |
| 14 | + // Coefficients for approximation to erfc in the first interval. |
| 15 | + static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02, |
| 16 | + 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1}; |
| 17 | + static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02, |
| 18 | + 1.28261652607737228e03, 2.84423683343917062e03}; |
| 19 | + |
| 20 | + // Coefficients for approximation to erfc in the second interval. |
| 21 | + static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00, |
| 22 | + 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, |
| 23 | + 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03, |
| 24 | + 2.15311535474403846e-8}; |
| 25 | + static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02, |
| 26 | + 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, |
| 27 | + 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03}; |
| 28 | + |
| 29 | + // Coefficients for approximation to erfc in the third interval. |
| 30 | + static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1, |
| 31 | + 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4, |
| 32 | + 1.63153871373020978e-2}; |
| 33 | + static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00, |
| 34 | + 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3}; |
| 35 | + |
| 36 | + constexpr T sqrtpi{1.7724538509078120380404576221783883301349L}; |
| 37 | + constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L}; |
| 38 | + constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5}; |
| 39 | + constexpr T xneg{-26.628e0}; |
| 40 | + constexpr T xhuge{6.71e7}; |
| 41 | + constexpr T thresh{0.46875e0}; |
| 42 | + constexpr T zero{0.0}; |
| 43 | + constexpr T one{1.0}; |
| 44 | + constexpr T four{4.0}; |
| 45 | + constexpr T sixteen{16.0}; |
| 46 | + constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())}; |
| 47 | + static_assert(xmax > xhuge, "xmax must be greater than xhuge"); |
| 48 | + |
| 49 | + T ysq; |
| 50 | + T xnum; |
| 51 | + T xden; |
| 52 | + T del; |
| 53 | + T result; |
| 54 | + |
| 55 | + auto x{arg}; |
| 56 | + auto y{std::fabs(x)}; |
| 57 | + |
| 58 | + if (y <= thresh) { |
| 59 | + // evaluate erf for |x| <= 0.46875 |
| 60 | + ysq = zero; |
| 61 | + if (y > epsilonby2) { |
| 62 | + ysq = y * y; |
| 63 | + } |
| 64 | + xnum = a[4] * ysq; |
| 65 | + xden = ysq; |
| 66 | + for (int i{0}; i < 3; i++) { |
| 67 | + xnum = (xnum + a[i]) * ysq; |
| 68 | + xden = (xden + b[i]) * ysq; |
| 69 | + } |
| 70 | + result = x * (xnum + a[3]) / (xden + b[3]); |
| 71 | + result = one - result; |
| 72 | + result = std::exp(ysq) * result; |
| 73 | + return result; |
| 74 | + } else if (y <= four) { |
| 75 | + // evaluate erfc for 0.46875 < |x| <= 4.0 |
| 76 | + xnum = c[8] * y; |
| 77 | + xden = y; |
| 78 | + for (int i{0}; i < 7; ++i) { |
| 79 | + xnum = (xnum + c[i]) * y; |
| 80 | + xden = (xden + d[i]) * y; |
| 81 | + } |
| 82 | + result = (xnum + c[7]) / (xden + d[7]); |
| 83 | + } else { |
| 84 | + // evaluate erfc for |x| > 4.0 |
| 85 | + result = zero; |
| 86 | + if (y >= xhuge) { |
| 87 | + if (y < xmax) { |
| 88 | + result = rsqrtpi / y; |
| 89 | + } |
| 90 | + } else { |
| 91 | + ysq = one / (y * y); |
| 92 | + xnum = p[5] * ysq; |
| 93 | + xden = ysq; |
| 94 | + for (int i{0}; i < 4; ++i) { |
| 95 | + xnum = (xnum + p[i]) * ysq; |
| 96 | + xden = (xden + q[i]) * ysq; |
| 97 | + } |
| 98 | + result = ysq * (xnum + p[4]) / (xden + q[4]); |
| 99 | + result = (rsqrtpi - result) / y; |
| 100 | + } |
| 101 | + } |
| 102 | + // fix up for negative argument, erf, etc. |
| 103 | + if (x < zero) { |
| 104 | + if (x < xneg) { |
| 105 | + result = std::numeric_limits<T>::max(); |
| 106 | + } else { |
| 107 | + ysq = trunc(x * sixteen) / sixteen; |
| 108 | + del = (x - ysq) * (x + ysq); |
| 109 | + y = std::exp((ysq * ysq)) * std::exp((del)); |
| 110 | + result = (y + y) - result; |
| 111 | + } |
| 112 | + } |
| 113 | + return result; |
| 114 | +} |
| 115 | +} // namespace Fortran::common |
| 116 | +#endif // FORTRAN_COMMON_ERFC_SCALED_H_ |
0 commit comments