From bfc01ce2666a18ec659b280acd2e6acf494d0504 Mon Sep 17 00:00:00 2001 From: ZvRzyan18 Date: Thu, 20 Feb 2025 14:01:44 +0800 Subject: [PATCH 1/5] glm::fastPow, glm::fastExp, glm::fastLog polynomial approximation --- glm/gtx/fast_exponential.inl | 111 ++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index 5b1174246b..472960fa21 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -2,12 +2,38 @@ namespace glm { - // fastPow: + // fastPow template GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y) { return exp(y * log(x)); } + + template<> + GLM_FUNC_QUALIFIER float fastPow(float x, float y) + { + //negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y)) + //when x is lower than 1.0, you can use this formula. pow(x, y) = 1 / glm::fastPow(1 / x, y) + // ex: when x is '0.3'... pow(0.3, y) = 1 / glm::fastPow(1 / 0.3, y) + uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); + float mx = *(float*)&mantissa; + float lnx = y * float(1.44269504088896338700) * ((((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((((float(-0.056571767549593956) * mx + float(0.4471786103806078)) * mx - float(1.4699399880154467)) * mx + float(2.8211719569953488)) * mx - float(1.7417780977156199))); + uint32_t out = ((127U + uint32_t(lnx)) << 23U); + lnx -= uint32_t(lnx); + return (*(float*)(&out)) * ((float(0.34400110689651969) * lnx + float(0.65104678030290897)) * lnx + float(1.0024760564002857)); + } + + template<> + GLM_FUNC_QUALIFIER double fastPow(double x, double y) + { + //negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y)) + uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); + double mx = *(double*)&mantissa; + double lnx = y * double(1.44269504088896338700) * ((((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((((double(-0.056571767549593956) * mx + double(0.4471786103806078)) * mx - double(1.4699399880154467)) * mx + double(2.8211719569953488)) * mx - double(1.7417780977156199))); + uint64_t out = ((uint64_t)(1023U + uint64_t(lnx)) << 52U); + lnx -= uint64_t(lnx); + return (*(double*)(&out)) * ((double(0.34400110689651969) * lnx + double(0.65104678030290897)) * lnx + double(1.0024760564002857)); + } template GLM_FUNC_QUALIFIER vec fastPow(vec const& x, vec const& y) @@ -34,17 +60,30 @@ namespace glm } // fastExp - // Note: This function provides accurate results only for value between -1 and 1, else avoid it. - template - GLM_FUNC_QUALIFIER T fastExp(T x) + template<> + GLM_FUNC_QUALIFIER float fastExp(float x) { - // This has a better looking and same performance in release mode than the following code. However, in debug mode it's slower. - // return 1.0f + x * (1.0f + x * 0.5f * (1.0f + x * 0.3333333333f * (1.0f + x * 0.25 * (1.0f + x * 0.2f)))); - T x2 = x * x; - T x3 = x2 * x; - T x4 = x3 * x; - T x5 = x4 * x; - return T(1) + x + (x2 * T(0.5)) + (x3 * T(0.1666666667)) + (x4 * T(0.041666667)) + (x5 * T(0.008333333333)); + //negative x always reciprocal, exp(-x) = 1 / exp(abs(-x)) + uint32_t out = ((127U + uint32_t(x *= 1.44269504088896338700f)) << 23U); + float mx = x - uint32_t(x); + /* + //(accurate) 2 degree polynomial exp2(), interval [0, 1] + return (*(float*)(&out)) * ((float(0.34400110689651969) * mx + float(0.65104678030290897)) * mx + float(1.0024760564002857)); + */ + return (*(float*)(&out)) * (mx + 0.95696433397203284f); + } + + template<> + GLM_FUNC_QUALIFIER double fastExp(double x) + { + //negative x always reciprocal, exp(-x) = 1 / exp(abs(-x)) + uint64_t out = ((uint64_t)(1023U + uint64_t(x *= 1.44269504088896338700)) << 52U); + double mx = x - uint64_t(x); + /* + //(accurate) 2 degree polynomial exp2(), interval [0, 1] + return (*(double*)(&out)) * ((double(0.34400110689651969) * mx + double(0.65104678030290897)) * mx + double(1.0024760564002857)); + */ + return (*(double*)(&out)) * (mx + 0.95696433397203284); } /* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance GLM_FUNC_QUALIFIER float fastExp(float x) @@ -93,14 +132,22 @@ namespace glm return std::log(x); } - /* Slower than the VC7.1 function... + // Slower than the VC7.1 function... + template<> GLM_FUNC_QUALIFIER float fastLog(float x) { - float y1 = (x - 1.0f) / (x + 1.0f); - float y2 = y1 * y1; - return 2.0f * y1 * (1.0f + y2 * (0.3333333333f + y2 * (0.2f + y2 * 0.1428571429f))); + //x less than 1 always negative log(0.2) = -log(1 / 0.2) + uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); + return (((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((float(-0.2390307190544787) * (*(float*)&mantissa) + float(1.4033913763229589)) * (*(float*)&mantissa) - float(1.1609366765682689)); + } + + template<> + GLM_FUNC_QUALIFIER double fastLog(double x) + { + //x less than 1 always negative log(0.2) = -log(1 / 0.2) + uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); + return (((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((double(-0.2390307190544787) * (*(double*)&mantissa) + double(1.4033913763229589)) * (*(double*)&mantissa) - double(1.1609366765682689)); } - */ template GLM_FUNC_QUALIFIER vec fastLog(vec const& x) @@ -115,6 +162,24 @@ namespace glm return fastExp(static_cast(0.69314718055994530941723212145818) * x); } + template<> + GLM_FUNC_QUALIFIER float fastExp2(float x) + { + //negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x)) + uint32_t out = ((127U + uint32_t(x)) << 23U); + x -= uint32_t(x); + return (*(float*)(&out)) * ((float(0.34400110689651969) * x + float(0.65104678030290897)) * x + float(1.0024760564002857)); + } + + template<> + GLM_FUNC_QUALIFIER double fastExp2(double x) + { + //negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x)) + uint64_t out = ((uint64_t)(1023U + uint64_t(x)) << 52U); + x -= uint64_t(x); + return (*(double*)(&out)) * ((double(0.34400110689651969) * x + double(0.65104678030290897)) * x + double(1.0024760564002857)); + } + template GLM_FUNC_QUALIFIER vec fastExp2(vec const& x) { @@ -127,6 +192,20 @@ namespace glm { return fastLog(x) / static_cast(0.69314718055994530941723212145818); } + + template<> + GLM_FUNC_QUALIFIER float fastLog2(float x) + { + uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); + return (((*(uint32_t*)&x) >> 23)-127) + ((float(-0.34484843300001944) * (*(float*)&mantissa) + float(2.0246657790474698)) * (*(float*)&mantissa) - float(1.674877586071156)); + } + + template<> + GLM_FUNC_QUALIFIER double fastLog2(double x) + { + uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); + return (((*(uint64_t*)&x) >> 52)-1023) + ((double(-0.34484843300001944) * (*(double*)&mantissa) + double(2.0246657790474698)) * (*(double*)&mantissa) - double(1.674877586071156)); + } template GLM_FUNC_QUALIFIER vec fastLog2(vec const& x) From 29609d92ba5b7483cbd98bf2b64776e79c512f3c Mon Sep 17 00:00:00 2001 From: ZvRzyan18 Date: Thu, 5 Jun 2025 09:35:06 +0800 Subject: [PATCH 2/5] Improve accuracy --- glm/gtx/fast_exponential.inl | 252 ++++++++++++++++++++++------------- 1 file changed, 163 insertions(+), 89 deletions(-) diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index 472960fa21..10aee72c51 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -2,6 +2,144 @@ namespace glm { + template<> + GLM_FUNC_QUALIFIER float fastExp2(float x) + { + /* + approximated using "exp2(x)" formula + for +x = exp2(x) + for -x = 1/exp2(abs(x)) + */ + float mx, out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + uint32_t exponent, tmpx; + /* coefficients interval [0, 1] */ + C0 = 0.00189646114543331e-00f; + C1 = 0.00894282898410912e-00f; + C2 = 0.05586624630452070e-00f; + C3 = 0.24013971109076948e-00f; + C4 = 0.69315475247516734e-00f; + C5 = 0.99999989311082671e-00f; + mx = x; + reciprocal = (*(uint32_t*)&x) & 0x80000000; + tmpx = (*(uint32_t*)&x) & 0x7FFFFFFF; + mx = *(float*)&tmpx; + exponent = (uint32_t)mx; + mx = mx - (float)exponent; + out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); + tmpx = (exponent + 127) << 23; + out = (*(float*)&tmpx) * out; + out = reciprocal ? (1.0f / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastExp2(double x) + { + /* + approximated using "exp2(x)" formula + for +x = exp2(x) + for -x = 1/exp2(abs(x)) + */ + double mx, out, C0, C1, C2, C3, C4, C5; + bool reciprocal; + uint64_t exponent, tmpx; + /* coefficients interval [0, 1] */ + C0 = 0.00189646114543331e-00; + C1 = 0.00894282898410912e-00; + C2 = 0.05586624630452070e-00; + C3 = 0.24013971109076948e-00; + C4 = 0.69315475247516734e-00; + C5 = 0.99999989311082671e-00; + mx = x; + reciprocal = (*(uint64_t*)&x) & 0x8000000000000000; + tmpx = (*(uint64_t*)&x) & 0x7FFFFFFFFFFFFFFF; + mx = *(double*)&tmpx; + exponent = (uint64_t)mx; + mx = mx - (double)exponent; + out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); + tmpx = (exponent + 1023) << 52; + out = (*(double*)&tmpx) * out; + out = reciprocal ? (1.0 / out) : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER float fastLog2(float x) + { + /* + approximated using this formula "log2(x)" + interval [1, 1.5] + for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + for x <= 1.0 = -log2(1/x) + */ + float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + uint32_t tmpx; + /* coefficients */ + C0 = -0.067508561412635e-00f; + C1 = 0.605786644896372e-00f; + C2 = -2.351461115180550e-00f; + C3 = 5.175006419193522e-00f; + C4 = -7.182524695365589e-00f; + C5 = 7.064678543395325e-00f; + C6 = -3.243977190921664e-00f; + /* constants */ + l2 = 0.5849625f; /* log2(1.5) */ + inv_three_half = 0.6666667f; /* 1.0 / 1.5 */ + mx = x; + low = mx < 1.0f; + mx = low ? (1.0f / mx) : mx; + tmpx = 1065353216 | ((*(uint32_t*)&mx) & 0x007FFFFF); + lx = *(float*)&tmpx; + low_mantissa = lx <= 1.5f; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = (((*(uint32_t*)&mx) >> 23) - 127) + poly; + out = low ? -out : out; + return out; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog2(double x) + { + /* + approximated using this formula "log2(x)" + interval [1, 1.5] + for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + for x <= 1.0 = -log2(1/x) + */ + double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + bool low, low_mantissa; + uint64_t tmpx; + /* coefficients */ + C0 = -0.067508561412635e-00; + C1 = 0.605786644896372e-00; + C2 = -2.351461115180550e-00; + C3 = 5.175006419193522e-00; + C4 = -7.182524695365589e-00; + C5 = 7.064678543395325e-00; + C6 = -3.243977190921664e-00; + /* constants */ + l2 = 0.5849625; /* log2(1.5) */ + inv_three_half = 0.6666667; /* 1.0 / 1.5 */ + mx = x; + low = mx < 1.0; + mx = low ? (1.0 / mx) : mx; + tmpx = 4607182418800017408U | ((*(uint64_t*)&mx) & 0x000FFFFFFFFFFFFF); + lx = *(double*)&tmpx; + low_mantissa = lx <= 1.5; + lx = low_mantissa ? lx : (lx * inv_three_half); + poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); + poly = low_mantissa ? poly : (poly + l2); + out = (((*(uint64_t*)&mx) >> 52) - 1023) + poly; + out = low ? -out : out; + return out; + } + // fastPow template GLM_FUNC_QUALIFIER genType fastPow(genType x, genType y) @@ -12,27 +150,13 @@ namespace glm template<> GLM_FUNC_QUALIFIER float fastPow(float x, float y) { - //negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y)) - //when x is lower than 1.0, you can use this formula. pow(x, y) = 1 / glm::fastPow(1 / x, y) - // ex: when x is '0.3'... pow(0.3, y) = 1 / glm::fastPow(1 / 0.3, y) - uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); - float mx = *(float*)&mantissa; - float lnx = y * float(1.44269504088896338700) * ((((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((((float(-0.056571767549593956) * mx + float(0.4471786103806078)) * mx - float(1.4699399880154467)) * mx + float(2.8211719569953488)) * mx - float(1.7417780977156199))); - uint32_t out = ((127U + uint32_t(lnx)) << 23U); - lnx -= uint32_t(lnx); - return (*(float*)(&out)) * ((float(0.34400110689651969) * lnx + float(0.65104678030290897)) * lnx + float(1.0024760564002857)); + return fastExp2(y * fastLog2(x)); } template<> GLM_FUNC_QUALIFIER double fastPow(double x, double y) { - //negative y always reciprocal, pow(x, -y) = 1 / pow(x, abs(-y)) - uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); - double mx = *(double*)&mantissa; - double lnx = y * double(1.44269504088896338700) * ((((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((((double(-0.056571767549593956) * mx + double(0.4471786103806078)) * mx - double(1.4699399880154467)) * mx + double(2.8211719569953488)) * mx - double(1.7417780977156199))); - uint64_t out = ((uint64_t)(1023U + uint64_t(lnx)) << 52U); - lnx -= uint64_t(lnx); - return (*(double*)(&out)) * ((double(0.34400110689651969) * lnx + double(0.65104678030290897)) * lnx + double(1.0024760564002857)); + return fastExp2(y * fastLog2(x)); } template @@ -63,27 +187,13 @@ namespace glm template<> GLM_FUNC_QUALIFIER float fastExp(float x) { - //negative x always reciprocal, exp(-x) = 1 / exp(abs(-x)) - uint32_t out = ((127U + uint32_t(x *= 1.44269504088896338700f)) << 23U); - float mx = x - uint32_t(x); - /* - //(accurate) 2 degree polynomial exp2(), interval [0, 1] - return (*(float*)(&out)) * ((float(0.34400110689651969) * mx + float(0.65104678030290897)) * mx + float(1.0024760564002857)); - */ - return (*(float*)(&out)) * (mx + 0.95696433397203284f); + return fastExp2(1.4426950409f * x); } template<> GLM_FUNC_QUALIFIER double fastExp(double x) { - //negative x always reciprocal, exp(-x) = 1 / exp(abs(-x)) - uint64_t out = ((uint64_t)(1023U + uint64_t(x *= 1.44269504088896338700)) << 52U); - double mx = x - uint64_t(x); - /* - //(accurate) 2 degree polynomial exp2(), interval [0, 1] - return (*(double*)(&out)) * ((double(0.34400110689651969) * mx + double(0.65104678030290897)) * mx + double(1.0024760564002857)); - */ - return (*(double*)(&out)) * (mx + 0.95696433397203284); + return fastExp2(1.4426950409 * x); } /* // Try to handle all values of float... but often shower than std::exp, glm::floor and the loop kill the performance GLM_FUNC_QUALIFIER float fastExp(float x) @@ -125,36 +235,6 @@ namespace glm return detail::functor1::call(fastExp, x); } - // fastLog - template - GLM_FUNC_QUALIFIER genType fastLog(genType x) - { - return std::log(x); - } - - // Slower than the VC7.1 function... - template<> - GLM_FUNC_QUALIFIER float fastLog(float x) - { - //x less than 1 always negative log(0.2) = -log(1 / 0.2) - uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); - return (((*(uint32_t*)&x) >> 23)-127) * float(0.69314718055994528623) + ((float(-0.2390307190544787) * (*(float*)&mantissa) + float(1.4033913763229589)) * (*(float*)&mantissa) - float(1.1609366765682689)); - } - - template<> - GLM_FUNC_QUALIFIER double fastLog(double x) - { - //x less than 1 always negative log(0.2) = -log(1 / 0.2) - uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); - return (((*(uint64_t*)&x) >> 52)-1023) * double(0.69314718055994528623) + ((double(-0.2390307190544787) * (*(double*)&mantissa) + double(1.4033913763229589)) * (*(double*)&mantissa) - double(1.1609366765682689)); - } - - template - GLM_FUNC_QUALIFIER vec fastLog(vec const& x) - { - return detail::functor1::call(fastLog, x); - } - //fastExp2, ln2 = 0.69314718055994530941723212145818f template GLM_FUNC_QUALIFIER genType fastExp2(genType x) @@ -162,24 +242,6 @@ namespace glm return fastExp(static_cast(0.69314718055994530941723212145818) * x); } - template<> - GLM_FUNC_QUALIFIER float fastExp2(float x) - { - //negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x)) - uint32_t out = ((127U + uint32_t(x)) << 23U); - x -= uint32_t(x); - return (*(float*)(&out)) * ((float(0.34400110689651969) * x + float(0.65104678030290897)) * x + float(1.0024760564002857)); - } - - template<> - GLM_FUNC_QUALIFIER double fastExp2(double x) - { - //negative x always reciprocal, exp2(-x) = 1 / exp2(abs(-x)) - uint64_t out = ((uint64_t)(1023U + uint64_t(x)) << 52U); - x -= uint64_t(x); - return (*(double*)(&out)) * ((double(0.34400110689651969) * x + double(0.65104678030290897)) * x + double(1.0024760564002857)); - } - template GLM_FUNC_QUALIFIER vec fastExp2(vec const& x) { @@ -193,23 +255,35 @@ namespace glm return fastLog(x) / static_cast(0.69314718055994530941723212145818); } - template<> - GLM_FUNC_QUALIFIER float fastLog2(float x) + template + GLM_FUNC_QUALIFIER vec fastLog2(vec const& x) { - uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF); - return (((*(uint32_t*)&x) >> 23)-127) + ((float(-0.34484843300001944) * (*(float*)&mantissa) + float(2.0246657790474698)) * (*(float*)&mantissa) - float(1.674877586071156)); + return detail::functor1::call(fastLog2, x); } + // fastLog + template + GLM_FUNC_QUALIFIER genType fastLog(genType x) + { + return std::log(x); + } + + // Slower than the VC7.1 function... template<> - GLM_FUNC_QUALIFIER double fastLog2(double x) + GLM_FUNC_QUALIFIER float fastLog(float x) { - uint64_t mantissa = 4607182418800017408U | ((*(uint64_t*)&x) & 0x000FFFFFFFFFFFFF); - return (((*(uint64_t*)&x) >> 52)-1023) + ((double(-0.34484843300001944) * (*(double*)&mantissa) + double(2.0246657790474698)) * (*(double*)&mantissa) - double(1.674877586071156)); + return fastLog2(x) * 0.69314718055994530941723212145818f; + } + + template<> + GLM_FUNC_QUALIFIER double fastLog(double x) + { + return fastLog2(x) * 0.69314718055994530941723212145818; } template - GLM_FUNC_QUALIFIER vec fastLog2(vec const& x) + GLM_FUNC_QUALIFIER vec fastLog(vec const& x) { - return detail::functor1::call(fastLog2, x); + return detail::functor1::call(fastLog, x); } }//namespace glm From 018945c2399e58c8d99774b58521934aa94cff77 Mon Sep 17 00:00:00 2001 From: ZvRzyan18 Date: Thu, 5 Jun 2025 14:58:03 +0800 Subject: [PATCH 3/5] use single line comments --- glm/gtx/fast_exponential.inl | 60 ++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index 10aee72c51..be1f1787b3 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -5,15 +5,13 @@ namespace glm template<> GLM_FUNC_QUALIFIER float fastExp2(float x) { - /* - approximated using "exp2(x)" formula - for +x = exp2(x) - for -x = 1/exp2(abs(x)) - */ + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) float mx, out, C0, C1, C2, C3, C4, C5; bool reciprocal; uint32_t exponent, tmpx; - /* coefficients interval [0, 1] */ + // coefficients interval [0, 1] C0 = 0.00189646114543331e-00f; C1 = 0.00894282898410912e-00f; C2 = 0.05586624630452070e-00f; @@ -36,15 +34,13 @@ namespace glm template<> GLM_FUNC_QUALIFIER double fastExp2(double x) { - /* - approximated using "exp2(x)" formula - for +x = exp2(x) - for -x = 1/exp2(abs(x)) - */ + //approximated using "exp2(x)" formula + //for +x = exp2(x) + //for -x = 1/exp2(abs(x)) double mx, out, C0, C1, C2, C3, C4, C5; bool reciprocal; uint64_t exponent, tmpx; - /* coefficients interval [0, 1] */ + //coefficients interval [0, 1] C0 = 0.00189646114543331e-00; C1 = 0.00894282898410912e-00; C2 = 0.05586624630452070e-00; @@ -67,17 +63,15 @@ namespace glm template<> GLM_FUNC_QUALIFIER float fastLog2(float x) { - /* - approximated using this formula "log2(x)" - interval [1, 1.5] - for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... - for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) - for x <= 1.0 = -log2(1/x) - */ + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; bool low, low_mantissa; uint32_t tmpx; - /* coefficients */ + // coefficients C0 = -0.067508561412635e-00f; C1 = 0.605786644896372e-00f; C2 = -2.351461115180550e-00f; @@ -85,9 +79,9 @@ namespace glm C4 = -7.182524695365589e-00f; C5 = 7.064678543395325e-00f; C6 = -3.243977190921664e-00f; - /* constants */ - l2 = 0.5849625f; /* log2(1.5) */ - inv_three_half = 0.6666667f; /* 1.0 / 1.5 */ + // constants + l2 = 0.5849625f; //log2(1.5) + inv_three_half = 0.6666667f; // 1.0 / 1.5 mx = x; low = mx < 1.0f; mx = low ? (1.0f / mx) : mx; @@ -105,17 +99,15 @@ namespace glm template<> GLM_FUNC_QUALIFIER double fastLog2(double x) { - /* - approximated using this formula "log2(x)" - interval [1, 1.5] - for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... - for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) - for x <= 1.0 = -log2(1/x) - */ + //approximated using this formula "log2(x)" + //interval [1, 1.5] + //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... + //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) + //for x <= 1.0 = -log2(1/x) double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; bool low, low_mantissa; uint64_t tmpx; - /* coefficients */ + // coefficients C0 = -0.067508561412635e-00; C1 = 0.605786644896372e-00; C2 = -2.351461115180550e-00; @@ -123,9 +115,9 @@ namespace glm C4 = -7.182524695365589e-00; C5 = 7.064678543395325e-00; C6 = -3.243977190921664e-00; - /* constants */ - l2 = 0.5849625; /* log2(1.5) */ - inv_three_half = 0.6666667; /* 1.0 / 1.5 */ + // constants + l2 = 0.5849625; // log2(1.5) + inv_three_half = 0.6666667; // 1.0 / 1.5 mx = x; low = mx < 1.0; mx = low ? (1.0 / mx) : mx; From e4cca11c186453ca2e6873eb3a29cdf0210417d1 Mon Sep 17 00:00:00 2001 From: ZvRzyan18 Date: Mon, 20 Oct 2025 10:21:26 +0800 Subject: [PATCH 4/5] switch to static_cast, and use detail::float_t for type punning --- glm/gtx/fast_exponential.inl | 102 +++++++++++++++++------------------ 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/glm/gtx/fast_exponential.inl b/glm/gtx/fast_exponential.inl index be1f1787b3..10b6d2f8c8 100644 --- a/glm/gtx/fast_exponential.inl +++ b/glm/gtx/fast_exponential.inl @@ -8,25 +8,25 @@ namespace glm //approximated using "exp2(x)" formula //for +x = exp2(x) //for -x = 1/exp2(abs(x)) - float mx, out, C0, C1, C2, C3, C4, C5; + float out, C0, C1, C2, C3, C4, C5; bool reciprocal; - uint32_t exponent, tmpx; + int exponent; + detail::float_t mx, tmpx; // coefficients interval [0, 1] - C0 = 0.00189646114543331e-00f; - C1 = 0.00894282898410912e-00f; - C2 = 0.05586624630452070e-00f; - C3 = 0.24013971109076948e-00f; - C4 = 0.69315475247516734e-00f; - C5 = 0.99999989311082671e-00f; - mx = x; - reciprocal = (*(uint32_t*)&x) & 0x80000000; - tmpx = (*(uint32_t*)&x) & 0x7FFFFFFF; - mx = *(float*)&tmpx; - exponent = (uint32_t)mx; - mx = mx - (float)exponent; - out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); - tmpx = (exponent + 127) << 23; - out = (*(float*)&tmpx) * out; + C0 = 0.001896461e-00f; + C1 = 0.008942828e-00f; + C2 = 0.055866246e-00f; + C3 = 0.240139711e-00f; + C4 = 0.693154752e-00f; + C5 = 0.999999893e-00f; + mx.f = x; + reciprocal = (mx.i & int(0x80000000)) != 0; + mx.i = mx.i & 0x7FFFFFFF; + exponent = static_cast(mx.f); + mx.f = mx.f - static_cast(exponent); + out = (((((C0 * mx.f + C1) * mx.f + C2) * mx.f + C3) * mx.f + C4) * mx.f + C5); + tmpx.i = (exponent + 127) << 23; + out = tmpx.f * out; out = reciprocal ? (1.0f / out) : out; return out; } @@ -37,9 +37,10 @@ namespace glm //approximated using "exp2(x)" formula //for +x = exp2(x) //for -x = 1/exp2(abs(x)) - double mx, out, C0, C1, C2, C3, C4, C5; + double out, C0, C1, C2, C3, C4, C5; bool reciprocal; - uint64_t exponent, tmpx; + detail::int64 exponent; + detail::float_t mx, tmpx; //coefficients interval [0, 1] C0 = 0.00189646114543331e-00; C1 = 0.00894282898410912e-00; @@ -47,15 +48,14 @@ namespace glm C3 = 0.24013971109076948e-00; C4 = 0.69315475247516734e-00; C5 = 0.99999989311082671e-00; - mx = x; - reciprocal = (*(uint64_t*)&x) & 0x8000000000000000; - tmpx = (*(uint64_t*)&x) & 0x7FFFFFFFFFFFFFFF; - mx = *(double*)&tmpx; - exponent = (uint64_t)mx; - mx = mx - (double)exponent; - out = (((((C0 * mx + C1) * mx + C2) * mx + C3) * mx + C4) * mx + C5); - tmpx = (exponent + 1023) << 52; - out = (*(double*)&tmpx) * out; + mx.f = x; + reciprocal = (mx.i & detail::int64(0x8000000000000000)) != 0; + mx.i = mx.i & 0x7FFFFFFFFFFFFFFF; + exponent = static_cast(mx.f); + mx.f = mx.f - static_cast(exponent); + out = (((((C0 * mx.f + C1) * mx.f + C2) * mx.f + C3) * mx.f + C4) * mx.f + C5); + tmpx.i = (exponent + 1023) << 52; + out = tmpx.f * out; out = reciprocal ? (1.0 / out) : out; return out; } @@ -68,30 +68,30 @@ namespace glm //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) //for x <= 1.0 = -log2(1/x) - float mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + float l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; bool low, low_mantissa; - uint32_t tmpx; + detail::float_t tmpx, mx; // coefficients - C0 = -0.067508561412635e-00f; - C1 = 0.605786644896372e-00f; - C2 = -2.351461115180550e-00f; - C3 = 5.175006419193522e-00f; - C4 = -7.182524695365589e-00f; - C5 = 7.064678543395325e-00f; - C6 = -3.243977190921664e-00f; + C0 = -0.06750856e-00f; + C1 = 0.60578664e-00f; + C2 = -2.35146111e-00f; + C3 = 5.17500641e-00f; + C4 = -7.18252469e-00f; + C5 = 7.06467854e-00f; + C6 = -3.24397719e-00f; // constants l2 = 0.5849625f; //log2(1.5) inv_three_half = 0.6666667f; // 1.0 / 1.5 - mx = x; - low = mx < 1.0f; - mx = low ? (1.0f / mx) : mx; - tmpx = 1065353216 | ((*(uint32_t*)&mx) & 0x007FFFFF); - lx = *(float*)&tmpx; + mx.f = x; + low = mx.f < 1.0f; + mx.f = low ? (1.0f / mx.f) : mx.f; + tmpx.i = 1065353216 | (mx.i & int(0x007FFFFF)); + lx = tmpx.f; low_mantissa = lx <= 1.5f; lx = low_mantissa ? lx : (lx * inv_three_half); poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); poly = low_mantissa ? poly : (poly + l2); - out = (((*(uint32_t*)&mx) >> 23) - 127) + poly; + out = static_cast((mx.i >> 23) - 127) + poly; out = low ? -out : out; return out; } @@ -104,9 +104,9 @@ namespace glm //for x mantissa <= 1.5 log2(x) ≈ C0 * x + C1 * x2... //for x mantissa >= 1.5 log2(x) ≈ log2(x / 1.5) + log2(1.5) //for x <= 1.0 = -log2(1/x) - double mx, l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; + double l2, inv_three_half, lx, out, poly, C0, C1, C2, C3, C4, C5, C6; bool low, low_mantissa; - uint64_t tmpx; + detail::float_t tmpx, mx; // coefficients C0 = -0.067508561412635e-00; C1 = 0.605786644896372e-00; @@ -118,16 +118,16 @@ namespace glm // constants l2 = 0.5849625; // log2(1.5) inv_three_half = 0.6666667; // 1.0 / 1.5 - mx = x; - low = mx < 1.0; - mx = low ? (1.0 / mx) : mx; - tmpx = 4607182418800017408U | ((*(uint64_t*)&mx) & 0x000FFFFFFFFFFFFF); - lx = *(double*)&tmpx; + mx.f = x; + low = mx.f < 1.0; + mx.f = low ? (1.0 / mx.f) : mx.f; + tmpx.i = 4607182418800017408U | (mx.i & detail::int64(0x000FFFFFFFFFFFFF)); + lx = tmpx.f; low_mantissa = lx <= 1.5; lx = low_mantissa ? lx : (lx * inv_three_half); poly = ((((((C0 * lx + C1) * lx + C2) * lx + C3) * lx + C4) * lx + C5) * lx + C6); poly = low_mantissa ? poly : (poly + l2); - out = (((*(uint64_t*)&mx) >> 52) - 1023) + poly; + out = static_cast((mx.i >> 52) - 1023) + poly; out = low ? -out : out; return out; } From 1cc8b7e3c2a84e5f29bb549622ab5798963a3428 Mon Sep 17 00:00:00 2001 From: ZvRzyan18 Date: Mon, 20 Oct 2025 10:22:53 +0800 Subject: [PATCH 5/5] include type_float.hpp --- glm/gtx/fast_exponential.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/glm/gtx/fast_exponential.hpp b/glm/gtx/fast_exponential.hpp index 9fae32521f..3df90401a5 100644 --- a/glm/gtx/fast_exponential.hpp +++ b/glm/gtx/fast_exponential.hpp @@ -15,6 +15,7 @@ // Dependency: #include "../glm.hpp" +#include "../detail/type_float.hpp" #ifndef GLM_ENABLE_EXPERIMENTAL # error "GLM: GLM_GTX_fast_exponential is an experimental extension and may change in the future. Use #define GLM_ENABLE_EXPERIMENTAL before including it, if you really want to use it."