|
13 | 13 |
|
14 | 14 | static const struct data |
15 | 15 | { |
16 | | - double poly_even[2]; |
17 | | - double c0, c2, c1, c3, c5; |
| 16 | + double c1, c3, c2, c4, c0; |
18 | 17 | double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound; |
19 | 18 | } data = { |
20 | 19 | /* Coefficients generated using Remez algorithm. |
21 | 20 | rel error: 0x1.9fcb9b3p-60 |
22 | 21 | abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ] |
23 | 22 | max ulp err 0.52 +0.5. */ |
24 | | - .poly_even = { 0x1.53524c73cd32ap1, 0x1.2bd77b1361ef6p0 }, |
| 23 | + .c0 = 0x1.26bb1bbb55516p1, |
| 24 | + .c1 = 0x1.53524c73cd32ap1, |
| 25 | + .c2 = 0x1.0470591daeafbp1, |
| 26 | + .c3 = 0x1.2bd77b1361ef6p0, |
| 27 | + .c4 = 0x1.142b5d54e9621p-1, |
25 | 28 | /* 1.5*2^46+1023. This value is further explained below. */ |
26 | | - .c1 = 0x1.0470591daeafbp1, |
27 | | - .c3 = 0x1.142b5d54e9621p-1, |
28 | | - .c5 = 0x1.26bb1bbb55516p1, |
29 | 29 | .shift = 0x1.800000000ffc0p+46, |
30 | 30 | .log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */ |
31 | 31 | .log2_10_hi = 0x1.34413509f79ffp-2, /* log2(10). */ |
@@ -95,14 +95,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg) |
95 | 95 | comes at significant performance cost. */ |
96 | 96 | svuint64_t u = svreinterpret_u64 (z); |
97 | 97 | svfloat64_t scale = svexpa (u); |
98 | | - svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); |
| 98 | + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); |
99 | 99 | /* Approximate exp10(r) using polynomial. */ |
100 | 100 | svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); |
101 | | - svfloat64_t p01 = svmla_lane (sv_f64 (d->poly_even[0]), r, c13, 0); |
102 | | - svfloat64_t p23 = svmla_lane (sv_f64 (d->poly_even[1]), r, c13, 1); |
103 | | - svfloat64_t p04 = svmla_x (pg, p01, p23, r2); |
| 101 | + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); |
| 102 | + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); |
| 103 | + svfloat64_t p14 = svmla_x (pg, p12, p34, r2); |
104 | 104 |
|
105 | | - svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c5), r2, p04); |
| 105 | + svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14); |
106 | 106 |
|
107 | 107 | /* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound |
108 | 108 | multiplication may overflow, so use special case routine. */ |
|
0 commit comments