Skip to content

Commit 37f38ee

Browse files
authored
Improve accuracy
1 parent 1e94e6d commit 37f38ee

File tree

1 file changed

+163
-89
lines changed

1 file changed

+163
-89
lines changed

glm/gtx/fast_exponential.inl

Lines changed: 163 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,144 @@
22

33
namespace glm
44
{
5+
template<>
6+
GLM_FUNC_QUALIFIER float fastExp2(float x)
7+
{
8+
/*
9+
approximated using "exp2(x)" formula
10+
for +x = exp2(x)
11+
for -x = 1/exp2(abs(x))
12+
*/
13+
float mx, out, C0, C1, C2, C3, C4, C5;
14+
bool reciprocal;
15+
uint32_t exponent, tmpx;
16+
/* coefficients interval [0, 1] */
17+
C0 = 0.00189646114543331e-00f;
18+
C1 = 0.00894282898410912e-00f;
19+
C2 = 0.05586624630452070e-00f;
20+
C3 = 0.24013971109076948e-00f;
21+
C4 = 0.69315475247516734e-00f;
22+
C5 = 0.99999989311082671e-00f;
23+
mx = x;
24+
reciprocal = (*(uint32_t*)&x) & 0x80000000;
25+
tmpx = (*(uint32_t*)&x) & 0x7FFFFFFF;
26+
mx = *(float*)&tmpx;
27+
exponent = (uint32_t)mx;
28+
mx = mx - (float)exponent;
29+
out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
30+
tmpx = (exponent + 127) << 23;
31+
out = (*(float*)&tmpx) * out;
32+
out = reciprocal ? (1.0f / out) : out;
33+
return out;
34+
}
35+
36+
template<>
37+
GLM_FUNC_QUALIFIER double fastExp2(double x)
38+
{
39+
/*
40+
approximated using "exp2(x)" formula
41+
for +x = exp2(x)
42+
for -x = 1/exp2(abs(x))
43+
*/
44+
double mx, out, C0, C1, C2, C3, C4, C5;
45+
bool reciprocal;
46+
uint64_t exponent, tmpx;
47+
/* coefficients interval [0, 1] */
48+
C0 = 0.00189646114543331e-00;
49+
C1 = 0.00894282898410912e-00;
50+
C2 = 0.05586624630452070e-00;
51+
C3 = 0.24013971109076948e-00;
52+
C4 = 0.69315475247516734e-00;
53+
C5 = 0.99999989311082671e-00;
54+
mx = x;
55+
reciprocal = (*(uint64_t*)&x) & 0x8000000000000000;
56+
tmpx = (*(uint64_t*)&x) & 0x7FFFFFFFFFFFFFFF;
57+
mx = *(double*)&tmpx;
58+
exponent = (uint64_t)mx;
59+
mx = mx - (double)exponent;
60+
out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
61+
tmpx = (exponent + 1023) << 52;
62+
out = (*(double*)&tmpx) * out;
63+
out = reciprocal ? (1.0 / out) : out;
64+
return out;
65+
}
66+
67+
template<>
68+
GLM_FUNC_QUALIFIER float fastLog2(float x)
69+
{
70+
/*
71+
approximated using this formula "log2(x)"
72+
interval [1, 1.5]
73+
for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
74+
for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
75+
for x <= 1.0 = -log2(1/x)
76+
*/
77+
float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
78+
bool low, low_mantissa;
79+
uint32_t tmpx;
80+
/* coefficients */
81+
C0 = -0.067508561412635e-00f;
82+
C1 = 0.605786644896372e-00f;
83+
C2 = -2.351461115180550e-00f;
84+
C3 = 5.175006419193522e-00f;
85+
C4 = -7.182524695365589e-00f;
86+
C5 = 7.064678543395325e-00f;
87+
C6 = -3.243977190921664e-00f;
88+
/* constants */
89+
l2 = 0.5849625f; /* log2(1.5) */
90+
inv_three_half = 0.6666667f; /* 1.0 / 1.5 */
91+
mx = x;
92+
low = mx < 1.0f;
93+
mx = low ? (1.0f / mx) : mx;
94+
tmpx = 1065353216 | ((*(uint32_t*)&mx) & 0x007FFFFF);
95+
lx = *(float*)&tmpx;
96+
low_mantissa = lx <= 1.5f;
97+
lx = low_mantissa ? lx : (lx * inv_three_half);
98+
poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
99+
poly = low_mantissa ? poly : (poly + l2);
100+
out = (((*(uint32_t*)&mx) >> 23) - 127) + poly;
101+
out = low ? -out : out;
102+
return out;
103+
}
104+
105+
template<>
106+
GLM_FUNC_QUALIFIER double fastLog2(double x)
107+
{
108+
/*
109+
approximated using this formula "log2(x)"
110+
interval [1, 1.5]
111+
for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
112+
for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
113+
for x <= 1.0 = -log2(1/x)
114+
*/
115+
double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
116+
bool low, low_mantissa;
117+
uint64_t tmpx;
118+
/* coefficients */
119+
C0 = -0.067508561412635e-00;
120+
C1 = 0.605786644896372e-00;
121+
C2 = -2.351461115180550e-00;
122+
C3 = 5.175006419193522e-00;
123+
C4 = -7.182524695365589e-00;
124+
C5 = 7.064678543395325e-00;
125+
C6 = -3.243977190921664e-00;
126+
/* constants */
127+
l2 = 0.5849625; /* log2(1.5) */
128+
inv_three_half = 0.6666667; /* 1.0 / 1.5 */
129+
mx = x;
130+
low = mx < 1.0;
131+
mx = low ? (1.0 / mx) : mx;
132+
tmpx = 4607182418800017408U | ((*(uint64_t*)&mx) & 0x000FFFFFFFFFFFFF);
133+
lx = *(double*)&tmpx;
134+
low_mantissa = lx <= 1.5;
135+
lx = low_mantissa ? lx : (lx * inv_three_half);
136+
poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
137+
poly = low_mantissa ? poly : (poly + l2);
138+
out = (((*(uint64_t*)&mx) >> 52) - 1023) + poly;
139+
out = low ? -out : out;
140+
return out;
141+
}
142+
5143
// fastPow
6144
template<typename genType>
7145
GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y)
@@ -12,27 +150,13 @@ namespace glm
12150
template<>
13151
GLM_FUNC_QUALIFIER float fastPow(float x, float y)
14152
{
15-
//negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y))
16-
//when x is lower than 1.0, you can use this formula. pow(x, y) = 1 / glm::fastPow(1 / x, y)
17-
// ex: when x is '0.3'... pow(0.3, y) = 1 / glm::fastPow(1 / 0.3, y)
18-
uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
19-
float mx = *(float*)&mantissa;
20-
float lnx = y * float(1.44269504088896338700) * ((((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((((float(-0.056571767549593956) * mx + float(0.4471786103806078)) * mx - float(1.4699399880154467)) * mx + float(2.8211719569953488)) * mx - float(1.7417780977156199)));
21-
uint32_t out = ((127U + uint32_t(lnx)) << 23U);
22-
lnx -= uint32_t(lnx);
23-
return (*(float*)(&out)) * ((float(0.34400110689651969) * lnx + float(0.65104678030290897)) * lnx + float(1.0024760564002857));
153+
return fastExp2<float>(y * fastLog2<float>(x));
24154
}
25155

26156
template<>
27157
GLM_FUNC_QUALIFIER double fastPow(double x, double y)
28158
{
29-
//negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y))
30-
uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF);
31-
double mx = *(double*)&mantissa;
32-
double lnx = y * double(1.44269504088896338700) * ((((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((((double(-0.056571767549593956) * mx + double(0.4471786103806078)) * mx - double(1.4699399880154467)) * mx + double(2.8211719569953488)) * mx - double(1.7417780977156199)));
33-
uint64_t out = ((uint64_t)(1023U + uint64_t(lnx)) << 52U);
34-
lnx -= uint64_t(lnx);
35-
return (*(double*)(&out)) * ((double(0.34400110689651969) * lnx + double(0.65104678030290897)) * lnx + double(1.0024760564002857));
159+
return fastExp2<double>(y * fastLog2<double>(x));
36160
}
37161

38162
template<length_t L, typename T, qualifier Q>
@@ -63,27 +187,13 @@ namespace glm
63187
template<>
64188
GLM_FUNC_QUALIFIER float fastExp(float x)
65189
{
66-
//negative x always reciprocal, exp(-x) = 1 / exp(abs(-x))
67-
uint32_t out = ((127U + uint32_t(x *= 1.44269504088896338700f)) << 23U);
68-
float mx = x - uint32_t(x);
69-
/*
70-
//(accurate) 2 degree polynomial exp2(), interval [0, 1]
71-
return (*(float*)(&out)) * ((float(0.34400110689651969) * mx + float(0.65104678030290897)) * mx + float(1.0024760564002857));
72-
*/
73-
return (*(float*)(&out)) * (mx + 0.95696433397203284f);
190+
return fastExp2<float>(1.4426950409f * x);
74191
}
75192

76193
template<>
77194
GLM_FUNC_QUALIFIER double fastExp(double x)
78195
{
79-
//negative x always reciprocal, exp(-x) = 1 / exp(abs(-x))
80-
uint64_t out = ((uint64_t)(1023U + uint64_t(x *= 1.44269504088896338700)) << 52U);
81-
double mx = x - uint64_t(x);
82-
/*
83-
//(accurate) 2 degree polynomial exp2(), interval [0, 1]
84-
return (*(double*)(&out)) * ((double(0.34400110689651969) * mx + double(0.65104678030290897)) * mx + double(1.0024760564002857));
85-
*/
86-
return (*(double*)(&out)) * (mx + 0.95696433397203284);
196+
return fastExp2<double>(1.4426950409 * x);
87197
}
88198
/* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance
89199
GLM_FUNC_QUALIFIER float fastExp(float x)
@@ -125,61 +235,13 @@ namespace glm
125235
return detail::functor1<vec, L, T, T, Q>::call(fastExp, x);
126236
}
127237

128-
// fastLog
129-
template<typename genType>
130-
GLM_FUNC_QUALIFIER genType fastLog(genType x)
131-
{
132-
return std::log(x);
133-
}
134-
135-
// Slower than the VC7.1 function...
136-
template<>
137-
GLM_FUNC_QUALIFIER float fastLog(float x)
138-
{
139-
//x less than 1 always negative log(0.2) = -log(1 / 0.2)
140-
uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
141-
return (((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((float(-0.2390307190544787) * (*(float*)&mantissa) + float(1.4033913763229589)) * (*(float*)&mantissa) - float(1.1609366765682689));
142-
}
143-
144-
template<>
145-
GLM_FUNC_QUALIFIER double fastLog(double x)
146-
{
147-
//x less than 1 always negative log(0.2) = -log(1 / 0.2)
148-
uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF);
149-
return (((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((double(-0.2390307190544787) * (*(double*)&mantissa) + double(1.4033913763229589)) * (*(double*)&mantissa) - double(1.1609366765682689));
150-
}
151-
152-
template<length_t L, typename T, qualifier Q>
153-
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog(vec<L, T, Q> const& x)
154-
{
155-
return detail::functor1<vec, L, T, T, Q>::call(fastLog, x);
156-
}
157-
158238
//fastExp2, ln2 = 0.69314718055994530941723212145818f
159239
template<typename genType>
160240
GLM_FUNC_QUALIFIER genType fastExp2(genType x)
161241
{
162242
return fastExp(static_cast<genType>(0.69314718055994530941723212145818) * x);
163243
}
164244

165-
template<>
166-
GLM_FUNC_QUALIFIER float fastExp2(float x)
167-
{
168-
//negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x))
169-
uint32_t out = ((127U + uint32_t(x)) << 23U);
170-
x -= uint32_t(x);
171-
return (*(float*)(&out)) * ((float(0.34400110689651969) * x + float(0.65104678030290897)) * x + float(1.0024760564002857));
172-
}
173-
174-
template<>
175-
GLM_FUNC_QUALIFIER double fastExp2(double x)
176-
{
177-
//negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x))
178-
uint64_t out = ((uint64_t)(1023U + uint64_t(x)) << 52U);
179-
x -= uint64_t(x);
180-
return (*(double*)(&out)) * ((double(0.34400110689651969) * x + double(0.65104678030290897)) * x + double(1.0024760564002857));
181-
}
182-
183245
template<length_t L, typename T, qualifier Q>
184246
GLM_FUNC_QUALIFIER vec<L, T, Q> fastExp2(vec<L, T, Q> const& x)
185247
{
@@ -193,23 +255,35 @@ namespace glm
193255
return fastLog(x) / static_cast<genType>(0.69314718055994530941723212145818);
194256
}
195257

196-
template<>
197-
GLM_FUNC_QUALIFIER float fastLog2(float x)
258+
template<length_t L, typename T, qualifier Q>
259+
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog2(vec<L, T, Q> const& x)
198260
{
199-
uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
200-
return (((*(uint32_t*)&x) >> 23)-127) + ((float(-0.34484843300001944) * (*(float*)&mantissa) + float(2.0246657790474698)) * (*(float*)&mantissa) - float(1.674877586071156));
261+
return detail::functor1<vec, L, T, T, Q>::call(fastLog2, x);
201262
}
202263

264+
// fastLog
265+
template<typename genType>
266+
GLM_FUNC_QUALIFIER genType fastLog(genType x)
267+
{
268+
return std::log(x);
269+
}
270+
271+
// Slower than the VC7.1 function...
203272
template<>
204-
GLM_FUNC_QUALIFIER double fastLog2(double x)
273+
GLM_FUNC_QUALIFIER float fastLog(float x)
205274
{
206-
uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF);
207-
return (((*(uint64_t*)&x) >> 52)-1023) + ((double(-0.34484843300001944) * (*(double*)&mantissa) + double(2.0246657790474698)) * (*(double*)&mantissa) - double(1.674877586071156));
275+
return fastLog2<float>(x) * 0.69314718055994530941723212145818f;
276+
}
277+
278+
template<>
279+
GLM_FUNC_QUALIFIER double fastLog(double x)
280+
{
281+
return fastLog2<double>(x) * 0.69314718055994530941723212145818;
208282
}
209283

210284
template<length_t L, typename T, qualifier Q>
211-
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog2(vec<L, T, Q> const& x)
285+
GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog(vec<L, T, Q> const& x)
212286
{
213-
return detail::functor1<vec, L, T, T, Q>::call(fastLog2, x);
287+
return detail::functor1<vec, L, T, T, Q>::call(fastLog, x);
214288
}
215289
}//namespace glm

0 commit comments

Comments
 (0)