diff --git a/glm/gtx/fast_exponential.hpp b/glm/gtx/fast_exponential.hpp index 9fae32521..3df90401a 100644 --- a/glm/gtx/fast_exponential.hpp +++ b/glm/gtx/fast_exponential.hpp @@ -15,6 +15,7 @@ // Dependency: #include "../glm.hpp" +#include "../detail/type_float.hpp" #ifndef GLM_ENABLE_EXPERIMENTAL # error "GLM: GLM_GTX_fast_exponential is an experimental extension and may change in the future. Use #define GLM_ENABLE_EXPERIMENTAL before including it, if you really want to use it." diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index 5b1174246..10b6d2f8c 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -2,12 +2,154 @@ namespace glm { - // fastPow: + template<> + GLM_FUNC_QUALIFIER float fastExp2(float x) + { + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) + float out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + int exponent; + detail::float_t mx, tmpx; + // coefficients interval [0, 1] + C0 = 0.001896461e-00f; + C1 = 0.008942828e-00f; + C2 = 0.055866246e-00f; + C3 = 0.240139711e-00f; + C4 = 0.693154752e-00f; + C5 = 0.999999893e-00f; + mx.f = x; + reciprocal = (mx.i & int(0x80000000)) != 0; + mx.i = mx.i & 0x7FFFFFFF; + exponent = static_cast(mx.f); + mx.f = mx.f - static_cast(exponent); + out = (((((C0 * mx.f + C1) * mx.f + C2) * mx.f + C3) * mx.f + C4) * mx.f + C5); + tmpx.i = (exponent + 127) << 23; + out = tmpx.f * out; + out = reciprocal ? (1.0f / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastExp2(double x) + { + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) + double out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + detail::int64 exponent; + detail::float_t mx, tmpx; + //coefficients interval [0, 1] + C0 = 0.00189646114543331e-00; + C1 = 0.00894282898410912e-00; + C2 = 0.05586624630452070e-00; + C3 = 0.24013971109076948e-00; + C4 = 0.69315475247516734e-00; + C5 = 0.99999989311082671e-00; + mx.f = x; + reciprocal = (mx.i & detail::int64(0x8000000000000000)) != 0; + mx.i = mx.i & 0x7FFFFFFFFFFFFFFF; + exponent = static_cast(mx.f); + mx.f = mx.f - static_cast(exponent); + out = (((((C0 * mx.f + C1) * mx.f + C2) * mx.f + C3) * mx.f + C4) * mx.f + C5); + tmpx.i = (exponent + 1023) << 52; + out = tmpx.f * out; + out = reciprocal ? (1.0 / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER float fastLog2(float x) + { + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) + float l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + detail::float_t tmpx, mx; + // coefficients + C0 = -0.06750856e-00f; + C1 = 0.60578664e-00f; + C2 = -2.35146111e-00f; + C3 = 5.17500641e-00f; + C4 = -7.18252469e-00f; + C5 = 7.06467854e-00f; + C6 = -3.24397719e-00f; + // constants + l2 = 0.5849625f; //log2(1.5) + inv_three_half = 0.6666667f; // 1.0 / 1.5 + mx.f = x; + low = mx.f < 1.0f; + mx.f = low ? (1.0f / mx.f) : mx.f; + tmpx.i = 1065353216 | (mx.i & int(0x007FFFFF)); + lx = tmpx.f; + low_mantissa = lx <= 1.5f; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = static_cast((mx.i >> 23) - 127) + poly; + out = low ? -out : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog2(double x) + { + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) + double l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + detail::float_t tmpx, mx; + // coefficients + C0 = -0.067508561412635e-00; + C1 = 0.605786644896372e-00; + C2 = -2.351461115180550e-00; + C3 = 5.175006419193522e-00; + C4 = -7.182524695365589e-00; + C5 = 7.064678543395325e-00; + C6 = -3.243977190921664e-00; + // constants + l2 = 0.5849625; // log2(1.5) + inv_three_half = 0.6666667; // 1.0 / 1.5 + mx.f = x; + low = mx.f < 1.0; + mx.f = low ? (1.0 / mx.f) : mx.f; + tmpx.i = 4607182418800017408U | (mx.i & detail::int64(0x000FFFFFFFFFFFFF)); + lx = tmpx.f; + low_mantissa = lx <= 1.5; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = static_cast((mx.i >> 52) - 1023) + poly; + out = low ? -out : out; + return out; + } + + // fastPow template GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y) { return exp(y * log(x)); } + + template<> + GLM_FUNC_QUALIFIER float fastPow(float x, float y) + { + return fastExp2(y * fastLog2(x)); + } + + template<> + GLM_FUNC_QUALIFIER double fastPow(double x, double y) + { + return fastExp2(y * fastLog2(x)); + } template GLM_FUNC_QUALIFIER vec fastPow(vec const& x, vec const& y) @@ -34,17 +176,16 @@ namespace glm } // fastExp - // Note: This function provides accurate results only for value between -1 and 1, else avoid it. - template - GLM_FUNC_QUALIFIER T fastExp(T x) + template<> + GLM_FUNC_QUALIFIER float fastExp(float x) { - // This has a better looking and same performance in release mode than the following code. However, in debug mode it's slower. - // return 1.0f + x * (1.0f + x * 0.5f * (1.0f + x * 0.3333333333f * (1.0f + x * 0.25 * (1.0f + x * 0.2f)))); - T x2 = x * x; - T x3 = x2 * x; - T x4 = x3 * x; - T x5 = x4 * x; - return T(1) + x + (x2 * T(0.5)) + (x3 * T(0.1666666667)) + (x4 * T(0.041666667)) + (x5 * T(0.008333333333)); + return fastExp2(1.4426950409f * x); + } + + template<> + GLM_FUNC_QUALIFIER double fastExp(double x) + { + return fastExp2(1.4426950409 * x); } /* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance GLM_FUNC_QUALIFIER float fastExp(float x) @@ -86,28 +227,6 @@ namespace glm return detail::functor1::call(fastExp, x); } - // fastLog - template - GLM_FUNC_QUALIFIER genType fastLog(genType x) - { - return std::log(x); - } - - /* Slower than the VC7.1 function... - GLM_FUNC_QUALIFIER float fastLog(float x) - { - float y1 = (x - 1.0f) / (x + 1.0f); - float y2 = y1 * y1; - return 2.0f * y1 * (1.0f + y2 * (0.3333333333f + y2 * (0.2f + y2 * 0.1428571429f))); - } - */ - - template - GLM_FUNC_QUALIFIER vec fastLog(vec const& x) - { - return detail::functor1::call(fastLog, x); - } - //fastExp2, ln2 = 0.69314718055994530941723212145818f template GLM_FUNC_QUALIFIER genType fastExp2(genType x) @@ -127,10 +246,36 @@ namespace glm { return fastLog(x) / static_cast(0.69314718055994530941723212145818); } - + template GLM_FUNC_QUALIFIER vec fastLog2(vec const& x) { return detail::functor1::call(fastLog2, x); } + + // fastLog + template + GLM_FUNC_QUALIFIER genType fastLog(genType x) + { + return std::log(x); + } + + // Slower than the VC7.1 function... + template<> + GLM_FUNC_QUALIFIER float fastLog(float x) + { + return fastLog2(x) * 0.69314718055994530941723212145818f; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog(double x) + { + return fastLog2(x) * 0.69314718055994530941723212145818; + } + + template + GLM_FUNC_QUALIFIER vec fastLog(vec const& x) + { + return detail::functor1::call(fastLog, x); + } }//namespace glm