|
10 | 10 | CONSTATTR double
|
11 | 11 | MATH_MANGLE(tgamma)(double x)
|
12 | 12 | {
|
13 |
| - const double pi = 3.14159265358979323846; |
14 |
| - |
15 |
| - |
16 | 13 | double ax = BUILTIN_ABS_F64(x);
|
17 | 14 | double ret;
|
18 | 15 |
|
19 |
| - if (ax > 0x1.0p-11) { |
20 |
| - // For x < 4, push to [1-3] range using gamma(x) = gamma(x+1) / x |
21 |
| - // For 4.5 < x < 6.5, push above 6.5 |
22 |
| - // [4,4.5) left alone |
23 |
| - double nterm = 1.0; |
24 |
| - double dterm = 1.0; |
25 |
| - double z = ax; |
26 |
| - if (ax < 4.5) { |
27 |
| - if (ax < 1.0) { |
28 |
| - dterm = z; |
29 |
| - z += 1.0; |
30 |
| - } else if (ax < 3.0) { |
31 |
| - ; // do nothing |
32 |
| - } else if (ax < 4.0) { |
33 |
| - z -= 1.0; |
34 |
| - nterm = z; |
| 16 | + if (ax < 16.0) { |
| 17 | + double n, d; |
| 18 | + double y = x; |
| 19 | + if (x > 0.0) { |
| 20 | + n = 1.0; |
| 21 | + while (y > 2.5) { |
| 22 | + n = MATH_MAD(n, y, -n); |
| 23 | + y = y - 1.0; |
| 24 | + n = MATH_MAD(n, y, -n); |
| 25 | + y = y - 1.0; |
35 | 26 | }
|
36 |
| - } else if (ax < 5.5) { |
37 |
| - dterm = MATH_MAD(z,z,z); |
38 |
| - z += 2.0; |
39 |
| - } else if (ax < 6.5) { |
40 |
| - dterm = z; |
41 |
| - z += 1.0; |
42 |
| - } |
43 |
| - |
44 |
| - double negadj = 1.0; |
45 |
| - if (x < 0.0) { |
46 |
| - negadj = -x * MATH_MANGLE(sinpi)(x); |
47 |
| - } |
48 |
| - |
49 |
| - double etonegz = MATH_MANGLE(exp)(-z); |
50 |
| - |
51 |
| - if (z < 4.5) { |
52 |
| - const double rn0 = 297.312130630940277; |
53 |
| - const double rn1 = 16926.1409177878806; |
54 |
| - const double rn2 = 131675.407800922036; |
55 |
| - const double rn3 = 344586.743316038732; |
56 |
| - const double rn4 = 440619.954224349898; |
57 |
| - const double rn5 = 275507.567385621460; |
58 |
| - const double rn6 = 84657.9644812230335; |
59 |
| - |
60 |
| - const double rd0 = 1.00000000000000000; |
61 |
| - const double rd1 = -13.3400904528209096; |
62 |
| - const double rd2 = 3270.94389286527964; |
63 |
| - const double rd3 = 41972.5365974090031; |
64 |
| - const double rd4 = 123293.896672792281; |
65 |
| - const double rd5 = 166739.899991898533; |
66 |
| - const double rd6 = 107097.146935059144; |
67 |
| - const double rd7 = 33773.6414083704053; |
68 |
| - |
69 |
| - double num = MATH_MAD(z, |
70 |
| - MATH_MAD(z, |
71 |
| - MATH_MAD(z, |
72 |
| - MATH_MAD(z, |
73 |
| - MATH_MAD(z, |
74 |
| - MATH_MAD(z, rn6, rn5), |
75 |
| - rn4), |
76 |
| - rn3), |
77 |
| - rn2), |
78 |
| - rn1), |
79 |
| - rn0) * nterm; |
80 |
| - |
81 |
| - double den = MATH_MAD(z, |
82 |
| - MATH_MAD(z, |
83 |
| - MATH_MAD(z, |
84 |
| - MATH_MAD(z, |
85 |
| - MATH_MAD(z, |
86 |
| - MATH_MAD(z, |
87 |
| - MATH_MAD(z, rd7, rd6), |
88 |
| - rd5), |
89 |
| - rd4), |
90 |
| - rd3), |
91 |
| - rd2), |
92 |
| - rd1), |
93 |
| - rd0) * dterm; |
94 |
| - |
95 |
| - double zpow = MATH_MANGLE(powr)(z, z+0.5); |
96 |
| - |
97 |
| - if (x >= 0.0) { |
98 |
| - ret = etonegz * zpow * MATH_DIV(num,den); |
99 |
| - } else { |
100 |
| - ret = MATH_DIV(den*pi, negadj*etonegz*zpow*num); |
101 |
| - ret = BUILTIN_FRACTION_F64(x) == 0.0 ? QNAN_F64 : ret; |
| 27 | + if (y > 1.5) { |
| 28 | + n = MATH_MAD(n, y, -n); |
| 29 | + y = y - 1.0; |
102 | 30 | }
|
| 31 | + if (x >= 0.5) |
| 32 | + y = y - 1.0; |
| 33 | + d = x < 0.5 ? x : 1.0; |
103 | 34 | } else {
|
104 |
| - const double c0 = 2.5066282746310007; |
105 |
| - const double c1 = 0.20888568955258338; |
106 |
| - const double c2 = 0.008703570398024307; |
107 |
| - const double c3 = -0.0067210904740298821; |
108 |
| - const double c4 = -0.00057520123811017124; |
109 |
| - const double c5 = 0.0019652948815832029; |
110 |
| - const double c6 = 0.00017478252120455912; |
111 |
| - const double c7 = -0.0014843411351582762; |
112 |
| - const double c8 = -0.00012963757321125544; |
113 |
| - const double c9 = 0.0021043112297532062; |
114 |
| - const double c10 = 0.00018059994565555043; |
115 |
| - const double c11 = -0.0047987856705463457; |
116 |
| - const double c12 = -0.0004073678593815252; |
117 |
| - const double c13 = 0.01605085033194459500; |
118 |
| - const double c14 = 0.0013539922801590941; |
119 |
| - const double c15 = -0.074015421268427375; |
120 |
| - const double c16 = -0.0062208086788087787; |
121 |
| - const double c17 = 0.45004033385625097; |
122 |
| - |
123 |
| - double rz = MATH_RCP(z); |
124 |
| - |
125 |
| - double poly = MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, |
126 |
| - MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, |
127 |
| - MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, MATH_MAD(rz, |
128 |
| - MATH_MAD(rz, MATH_MAD(rz, |
129 |
| - c17, c16), c15), c14), c13), c12), c11), c10), c9), c8), |
130 |
| - c7), c6), c5), c4), c3), c2), c1), c0); |
131 |
| - |
132 |
| - double zpow = MATH_MANGLE(powr)(z, MATH_MAD(0.5, z, -0.25)); |
133 |
| - if (x >= 0.0) { |
134 |
| - ret = MATH_DIV(etonegz*zpow*zpow*poly, dterm); |
135 |
| - ret = x > 0x1.573fae561f647p+7 ? PINF_F64 : ret; |
136 |
| - } else if (x < 0.0) { |
137 |
| - if (x >= -170.5) { |
138 |
| - ret = MATH_DIV(pi*dterm, etonegz*zpow*zpow*poly*negadj); |
139 |
| - } else if (x >= -184.0) { |
140 |
| - ret = MATH_DIV(MATH_DIV(pi*dterm, etonegz*zpow*poly), zpow*negadj); |
141 |
| - } else { |
142 |
| - ret = BUILTIN_COPYSIGN_F64(0.0, negadj); |
143 |
| - } |
144 |
| - ret = BUILTIN_FRACTION_F64(x) == 0.0 ? QNAN_F64 : ret; |
145 |
| - } else { |
146 |
| - ret = x; |
| 35 | + d = x; |
| 36 | + while (y < -1.5) { |
| 37 | + d = MATH_MAD(d, y, d); |
| 38 | + y = y + 1.0; |
| 39 | + d = MATH_MAD(d, y, d); |
| 40 | + y = y + 1.0; |
147 | 41 | }
|
| 42 | + if (y < -0.5) { |
| 43 | + d = MATH_MAD(d, y, d); |
| 44 | + y = y + 1.0; |
| 45 | + } |
| 46 | + n = 1.0; |
148 | 47 | }
|
| 48 | + double qt = MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, |
| 49 | + MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, |
| 50 | + MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, MATH_MAD(y, |
| 51 | + MATH_MAD(y, |
| 52 | + -0x1.aed75feec7b9ap-23, 0x1.31854a0be3cd3p-20), |
| 53 | + -0x1.5037d6a97a8b7p-20), -0x1.51d67f2cdbcfbp-16), |
| 54 | + 0x1.0c8ab2ac5112dp-13), -0x1.c364ce9b5e149p-13), |
| 55 | + -0x1.317113a39f929p-10), 0x1.d919c501178a3p-8), |
| 56 | + -0x1.3b4af282da690p-7), -0x1.59af103bf2cd0p-5), |
| 57 | + 0x1.5512320b432ccp-3), -0x1.5815e8fa28886p-5), |
| 58 | + -0x1.4fcf4026afa24p-1), 0x1.2788cfc6fb61cp-1); |
| 59 | + |
| 60 | + ret = MATH_DIV(n, MATH_MAD(d, y*qt, d)); |
| 61 | + ret = x == 0.0 ? BUILTIN_COPYSIGN_F64(PINF_F64, x) : ret; |
| 62 | + ret = x < 0.0 && BUILTIN_FRACTION_F64(x) == 0.0 ? QNAN_F64 : ret; |
149 | 63 | } else {
|
150 |
| - const double c0 = -0x1.2788cfc6fb619p-1; |
151 |
| - const double c1 = 0x1.fa658c23b1578p-1; |
152 |
| - const double c2 = -0x1.d0a118f324b63p-1; |
153 |
| - const double c3 = 0x1.f6a51055096b5p-1; |
154 |
| - |
155 |
| - ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, c3, c2), c1), c0) + MATH_RCP(x); |
| 64 | + const double sqrt2pi = 0x1.40d931ff62706p+1; |
| 65 | + const double sqrtpiby2 = 0x1.40d931ff62706p+0; |
| 66 | + |
| 67 | + double t1 = MATH_MANGLE(powr)(ax, MATH_MAD(ax, 0.5, -0.25)); |
| 68 | + double t2 = MATH_MANGLE(exp)(-ax); |
| 69 | + double xr = MATH_FAST_RCP(ax); |
| 70 | + double pt = MATH_MAD(xr, MATH_MAD(xr, MATH_MAD(xr, MATH_MAD(xr, |
| 71 | + MATH_MAD(xr, MATH_MAD(xr, |
| 72 | + -0x1.2b04c5ea74bbfp-11, 0x1.14869344f1d9bp-14), |
| 73 | + 0x1.9b3457156ffefp-11), -0x1.e1427e86ee097p-13), |
| 74 | + -0x1.5f7266f67c4e0p-9), 0x1.c71c71c0f96adp-9), |
| 75 | + 0x1.5555555555a28p-4); |
| 76 | + |
| 77 | + if (x > 0.0) { |
| 78 | + double gt = sqrt2pi*t2*t1*t1; |
| 79 | + double g = MATH_MAD(gt, xr*pt, gt); |
| 80 | + ret = x > 0x1.573fae561f646p+7 ? PINF_F64 : g; |
| 81 | + } else { |
| 82 | + double s = -x * MATH_MANGLE(sinpi)(x); |
| 83 | + if (x > -170.5) { |
| 84 | + double d = s*t2*t1*t1; |
| 85 | + ret = MATH_DIV(sqrtpiby2, MATH_MAD(d, xr*pt, d)); |
| 86 | + } else if (x > -184.0) { |
| 87 | + double d = t2*t1; |
| 88 | + ret = MATH_DIV(MATH_DIV(sqrtpiby2, MATH_MAD(d, xr*pt, d)), s*t1); |
| 89 | + } else |
| 90 | + ret = BUILTIN_COPYSIGN_F64(0.0, s); |
| 91 | + ret = BUILTIN_FRACTION_F64(x) == 0.0 || BUILTIN_ISNAN_F64(x) ? QNAN_F64 : ret; |
| 92 | + } |
156 | 93 | }
|
157 | 94 |
|
158 | 95 | return ret;
|
|
0 commit comments