22
33namespace glm
44{
5+ template <>
6+ GLM_FUNC_QUALIFIER float fastExp2 (float x)
7+ {
8+ /*
9+ approximated using "exp2(x)" formula
10+ for +x = exp2(x)
11+ for -x = 1/exp2(abs(x))
12+ */
13+ float mx, out, C0, C1, C2, C3, C4, C5;
14+ bool reciprocal;
15+ uint32_t exponent, tmpx;
16+ /* coefficients interval [0, 1] */
17+ C0 = 0 .00189646114543331e-00f ;
18+ C1 = 0 .00894282898410912e-00f ;
19+ C2 = 0 .05586624630452070e-00f ;
20+ C3 = 0 .24013971109076948e-00f ;
21+ C4 = 0 .69315475247516734e-00f ;
22+ C5 = 0 .99999989311082671e-00f ;
23+ mx = x;
24+ reciprocal = (*(uint32_t *)&x) & 0x80000000 ;
25+ tmpx = (*(uint32_t *)&x) & 0x7FFFFFFF ;
26+ mx = *(float *)&tmpx;
27+ exponent = (uint32_t )mx;
28+ mx = mx - (float )exponent;
29+ out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
30+ tmpx = (exponent + 127 ) << 23 ;
31+ out = (*(float *)&tmpx) * out;
32+ out = reciprocal ? (1 .0f / out) : out;
33+ return out;
34+ }
35+
36+ template <>
37+ GLM_FUNC_QUALIFIER double fastExp2 (double x)
38+ {
39+ /*
40+ approximated using "exp2(x)" formula
41+ for +x = exp2(x)
42+ for -x = 1/exp2(abs(x))
43+ */
44+ double mx, out, C0, C1, C2, C3, C4, C5;
45+ bool reciprocal;
46+ uint64_t exponent, tmpx;
47+ /* coefficients interval [0, 1] */
48+ C0 = 0.00189646114543331e-00 ;
49+ C1 = 0.00894282898410912e-00 ;
50+ C2 = 0.05586624630452070e-00 ;
51+ C3 = 0.24013971109076948e-00 ;
52+ C4 = 0.69315475247516734e-00 ;
53+ C5 = 0.99999989311082671e-00 ;
54+ mx = x;
55+ reciprocal = (*(uint64_t *)&x) & 0x8000000000000000 ;
56+ tmpx = (*(uint64_t *)&x) & 0x7FFFFFFFFFFFFFFF ;
57+ mx = *(double *)&tmpx;
58+ exponent = (uint64_t )mx;
59+ mx = mx - (double )exponent;
60+ out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5);
61+ tmpx = (exponent + 1023 ) << 52 ;
62+ out = (*(double *)&tmpx) * out;
63+ out = reciprocal ? (1.0 / out) : out;
64+ return out;
65+ }
66+
67+ template <>
68+ GLM_FUNC_QUALIFIER float fastLog2 (float x)
69+ {
70+ /*
71+ approximated using this formula "log2(x)"
72+ interval [1, 1.5]
73+ for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
74+ for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
75+ for x <= 1.0 = -log2(1/x)
76+ */
77+ float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
78+ bool low, low_mantissa;
79+ uint32_t tmpx;
80+ /* coefficients */
81+ C0 = -0 .067508561412635e-00f ;
82+ C1 = 0 .605786644896372e-00f ;
83+ C2 = -2 .351461115180550e-00f ;
84+ C3 = 5 .175006419193522e-00f ;
85+ C4 = -7 .182524695365589e-00f ;
86+ C5 = 7 .064678543395325e-00f ;
87+ C6 = -3 .243977190921664e-00f ;
88+ /* constants */
89+ l2 = 0 .5849625f ; /* log2(1.5) */
90+ inv_three_half = 0 .6666667f ; /* 1.0 / 1.5 */
91+ mx = x;
92+ low = mx < 1 .0f ;
93+ mx = low ? (1 .0f / mx) : mx;
94+ tmpx = 1065353216 | ((*(uint32_t *)&mx) & 0x007FFFFF );
95+ lx = *(float *)&tmpx;
96+ low_mantissa = lx <= 1 .5f ;
97+ lx = low_mantissa ? lx : (lx * inv_three_half);
98+ poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
99+ poly = low_mantissa ? poly : (poly + l2);
100+ out = (((*(uint32_t *)&mx) >> 23 ) - 127 ) + poly;
101+ out = low ? -out : out;
102+ return out;
103+ }
104+
105+ template <>
106+ GLM_FUNC_QUALIFIER double fastLog2 (double x)
107+ {
108+ /*
109+ approximated using this formula "log2(x)"
110+ interval [1, 1.5]
111+ for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2...
112+ for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5)
113+ for x <= 1.0 = -log2(1/x)
114+ */
115+ double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6;
116+ bool low, low_mantissa;
117+ uint64_t tmpx;
118+ /* coefficients */
119+ C0 = -0.067508561412635e-00 ;
120+ C1 = 0.605786644896372e-00 ;
121+ C2 = -2.351461115180550e-00 ;
122+ C3 = 5.175006419193522e-00 ;
123+ C4 = -7.182524695365589e-00 ;
124+ C5 = 7.064678543395325e-00 ;
125+ C6 = -3.243977190921664e-00 ;
126+ /* constants */
127+ l2 = 0.5849625 ; /* log2(1.5) */
128+ inv_three_half = 0.6666667 ; /* 1.0 / 1.5 */
129+ mx = x;
130+ low = mx < 1.0 ;
131+ mx = low ? (1.0 / mx) : mx;
132+ tmpx = 4607182418800017408U | ((*(uint64_t *)&mx) & 0x000FFFFFFFFFFFFF );
133+ lx = *(double *)&tmpx;
134+ low_mantissa = lx <= 1.5 ;
135+ lx = low_mantissa ? lx : (lx * inv_three_half);
136+ poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6);
137+ poly = low_mantissa ? poly : (poly + l2);
138+ out = (((*(uint64_t *)&mx) >> 52 ) - 1023 ) + poly;
139+ out = low ? -out : out;
140+ return out;
141+ }
142+
5143 // fastPow
6144 template <typename genType>
7145 GLM_FUNC_QUALIFIER genType fastPow (genType x, genType y)
@@ -12,27 +150,13 @@ namespace glm
12150 template <>
13151 GLM_FUNC_QUALIFIER float fastPow (float x, float y)
14152 {
15- // negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y))
16- // when x is lower than 1.0, you can use this formula. pow(x, y) = 1 / glm::fastPow(1 / x, y)
17- // ex: when x is '0.3'... pow(0.3, y) = 1 / glm::fastPow(1 / 0.3, y)
18- uint32_t mantissa = 1065353216U | ((*(uint32_t *)&x) & 0x007FFFFF );
19- float mx = *(float *)&mantissa;
20- float lnx = y * float (1.44269504088896338700 ) * ((((*(uint32_t *)&x) >> 23 )-127 ) * float (0.69314718055994528623 ) + ((((float (-0.056571767549593956 ) * mx + float (0.4471786103806078 )) * mx - float (1.4699399880154467 )) * mx + float (2.8211719569953488 )) * mx - float (1.7417780977156199 )));
21- uint32_t out = ((127U + uint32_t (lnx)) << 23U );
22- lnx -= uint32_t (lnx);
23- return (*(float *)(&out)) * ((float (0.34400110689651969 ) * lnx + float (0.65104678030290897 )) * lnx + float (1.0024760564002857 ));
153+ return fastExp2<float >(y * fastLog2<float >(x));
24154 }
25155
26156 template <>
27157 GLM_FUNC_QUALIFIER double fastPow (double x, double y)
28158 {
29- // negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y))
30- uint64_t mantissa = 4607182418800017408U | ((*(uint64_t *)&x) & 0x000FFFFFFFFFFFFF );
31- double mx = *(double *)&mantissa;
32- double lnx = y * double (1.44269504088896338700 ) * ((((*(uint64_t *)&x) >> 52 )-1023 ) * double (0.69314718055994528623 ) + ((((double (-0.056571767549593956 ) * mx + double (0.4471786103806078 )) * mx - double (1.4699399880154467 )) * mx + double (2.8211719569953488 )) * mx - double (1.7417780977156199 )));
33- uint64_t out = ((uint64_t )(1023U + uint64_t (lnx)) << 52U );
34- lnx -= uint64_t (lnx);
35- return (*(double *)(&out)) * ((double (0.34400110689651969 ) * lnx + double (0.65104678030290897 )) * lnx + double (1.0024760564002857 ));
159+ return fastExp2<double >(y * fastLog2<double >(x));
36160 }
37161
38162 template <length_t L, typename T, qualifier Q>
@@ -63,27 +187,13 @@ namespace glm
63187 template <>
64188 GLM_FUNC_QUALIFIER float fastExp (float x)
65189 {
66- // negative x always reciprocal, exp(-x) = 1 / exp(abs(-x))
67- uint32_t out = ((127U + uint32_t (x *= 1 .44269504088896338700f )) << 23U );
68- float mx = x - uint32_t (x);
69- /*
70- //(accurate) 2 degree polynomial exp2(), interval [0, 1]
71- return (*(float*)(&out)) * ((float(0.34400110689651969) * mx + float(0.65104678030290897)) * mx + float(1.0024760564002857));
72- */
73- return (*(float *)(&out)) * (mx + 0 .95696433397203284f );
190+ return fastExp2<float >(1 .4426950409f * x);
74191 }
75192
76193 template <>
77194 GLM_FUNC_QUALIFIER double fastExp (double x)
78195 {
79- // negative x always reciprocal, exp(-x) = 1 / exp(abs(-x))
80- uint64_t out = ((uint64_t )(1023U + uint64_t (x *= 1.44269504088896338700 )) << 52U );
81- double mx = x - uint64_t (x);
82- /*
83- //(accurate) 2 degree polynomial exp2(), interval [0, 1]
84- return (*(double*)(&out)) * ((double(0.34400110689651969) * mx + double(0.65104678030290897)) * mx + double(1.0024760564002857));
85- */
86- return (*(double *)(&out)) * (mx + 0.95696433397203284 );
196+ return fastExp2<double >(1.4426950409 * x);
87197 }
88198 /* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance
89199 GLM_FUNC_QUALIFIER float fastExp(float x)
@@ -125,61 +235,13 @@ namespace glm
125235 return detail::functor1<vec, L, T, T, Q>::call (fastExp, x);
126236 }
127237
128- // fastLog
129- template <typename genType>
130- GLM_FUNC_QUALIFIER genType fastLog (genType x)
131- {
132- return std::log (x);
133- }
134-
135- // Slower than the VC7.1 function...
136- template <>
137- GLM_FUNC_QUALIFIER float fastLog (float x)
138- {
139- // x less than 1 always negative log(0.2) = -log(1 / 0.2)
140- uint32_t mantissa = 1065353216U | ((*(uint32_t *)&x) & 0x007FFFFF );
141- return (((*(uint32_t *)&x) >> 23 )-127 ) * float (0.69314718055994528623 ) + ((float (-0.2390307190544787 ) * (*(float *)&mantissa) + float (1.4033913763229589 )) * (*(float *)&mantissa) - float (1.1609366765682689 ));
142- }
143-
144- template <>
145- GLM_FUNC_QUALIFIER double fastLog (double x)
146- {
147- // x less than 1 always negative log(0.2) = -log(1 / 0.2)
148- uint64_t mantissa = 4607182418800017408U | ((*(uint64_t *)&x) & 0x000FFFFFFFFFFFFF );
149- return (((*(uint64_t *)&x) >> 52 )-1023 ) * double (0.69314718055994528623 ) + ((double (-0.2390307190544787 ) * (*(double *)&mantissa) + double (1.4033913763229589 )) * (*(double *)&mantissa) - double (1.1609366765682689 ));
150- }
151-
152- template <length_t L, typename T, qualifier Q>
153- GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog (vec<L, T, Q> const & x)
154- {
155- return detail::functor1<vec, L, T, T, Q>::call (fastLog, x);
156- }
157-
158238 // fastExp2, ln2 = 0.69314718055994530941723212145818f
159239 template <typename genType>
160240 GLM_FUNC_QUALIFIER genType fastExp2 (genType x)
161241 {
162242 return fastExp (static_cast <genType>(0.69314718055994530941723212145818 ) * x);
163243 }
164244
165- template <>
166- GLM_FUNC_QUALIFIER float fastExp2 (float x)
167- {
168- // negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x))
169- uint32_t out = ((127U + uint32_t (x)) << 23U );
170- x -= uint32_t (x);
171- return (*(float *)(&out)) * ((float (0.34400110689651969 ) * x + float (0.65104678030290897 )) * x + float (1.0024760564002857 ));
172- }
173-
174- template <>
175- GLM_FUNC_QUALIFIER double fastExp2 (double x)
176- {
177- // negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x))
178- uint64_t out = ((uint64_t )(1023U + uint64_t (x)) << 52U );
179- x -= uint64_t (x);
180- return (*(double *)(&out)) * ((double (0.34400110689651969 ) * x + double (0.65104678030290897 )) * x + double (1.0024760564002857 ));
181- }
182-
183245 template <length_t L, typename T, qualifier Q>
184246 GLM_FUNC_QUALIFIER vec<L, T, Q> fastExp2 (vec<L, T, Q> const & x)
185247 {
@@ -193,23 +255,35 @@ namespace glm
193255 return fastLog (x) / static_cast <genType>(0.69314718055994530941723212145818 );
194256 }
195257
196- template <>
197- GLM_FUNC_QUALIFIER float fastLog2 (float x)
258+ template <length_t L, typename T, qualifier Q >
259+ GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog2 (vec<L, T, Q> const & x)
198260 {
199- uint32_t mantissa = 1065353216U | ((*(uint32_t *)&x) & 0x007FFFFF );
200- return (((*(uint32_t *)&x) >> 23 )-127 ) + ((float (-0.34484843300001944 ) * (*(float *)&mantissa) + float (2.0246657790474698 )) * (*(float *)&mantissa) - float (1.674877586071156 ));
261+ return detail::functor1<vec, L, T, T, Q>::call (fastLog2, x);
201262 }
202263
264+ // fastLog
265+ template <typename genType>
266+ GLM_FUNC_QUALIFIER genType fastLog (genType x)
267+ {
268+ return std::log (x);
269+ }
270+
271+ // Slower than the VC7.1 function...
203272 template <>
204- GLM_FUNC_QUALIFIER double fastLog2 ( double x)
273+ GLM_FUNC_QUALIFIER float fastLog ( float x)
205274 {
206- uint64_t mantissa = 4607182418800017408U | ((*(uint64_t *)&x) & 0x000FFFFFFFFFFFFF );
207- return (((*(uint64_t *)&x) >> 52 )-1023 ) + ((double (-0.34484843300001944 ) * (*(double *)&mantissa) + double (2.0246657790474698 )) * (*(double *)&mantissa) - double (1.674877586071156 ));
275+ return fastLog2<float >(x) * 0 .69314718055994530941723212145818f ;
276+ }
277+
278+ template <>
279+ GLM_FUNC_QUALIFIER double fastLog (double x)
280+ {
281+ return fastLog2<double >(x) * 0.69314718055994530941723212145818 ;
208282 }
209283
210284 template <length_t L, typename T, qualifier Q>
211- GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog2 (vec<L, T, Q> const & x)
285+ GLM_FUNC_QUALIFIER vec<L, T, Q> fastLog (vec<L, T, Q> const & x)
212286 {
213- return detail::functor1<vec, L, T, T, Q>::call (fastLog2 , x);
287+ return detail::functor1<vec, L, T, T, Q>::call (fastLog , x);
214288 }
215289}// namespace glm
0 commit comments