From 820b4478a7bbe3158f43244cfd69a1b0fd39e9d9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 28 Aug 2025 12:22:01 +0800 Subject: [PATCH 1/2] [SYCL][NFC] Group all msvc specific math functions together Move double and float version of msvc specific functions in single file so that we won't forget to fix double version bug. Signed-off-by: jinge90 --- libdevice/cmath_wrapper_fp64.cpp | 308 ------------------------------ libdevice/msvc_math.cpp | 318 ++++++++++++++++++++++++++++++- 2 files changed, 311 insertions(+), 315 deletions(-) diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 855317bcf3f4b..168211491337c 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -181,312 +181,4 @@ double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); } DEVICE_EXTERN_C_INLINE double rint(double x) { return __spirv_ocl_rint(x); } - -#if defined(_MSC_VER) -#include -// FLOAT PROPERTIES -#define _D0 3 // little-endian, small long doubles -#define _D1 2 -#define _D2 1 -#define _D3 0 - -// IEEE 754 double properties -#define HUGE_EXP (int)(_DMAX * 900L / 1000) - -#define NBITS (48 + _DOFF) - -#define INIT(w0) \ - { 0, 0, 0, w0 } - -// double declarations -union _Dval { // pun floating type as integer array - unsigned short _Sh[8]; - double _Val; -}; - -union _Dconst { // pun float types as integer array - unsigned short _Word[4]; // TRANSITION, ABI: Twice as large as necessary. - double _Double; -}; -#define DSIGN(x) (((_Dval *)(char *)&(x))->_Sh[_D0] & _DSIGN) - -#define _Xbig (double)((NBITS + 1) * 347L / 1000) - -DEVICE_EXTERN_C_INLINE -short _Dtest(double *px) { // categorize *px - _Dval *ps = (_Dval *)(char *)px; - - short ret = 0; - if ((ps->_Sh[_D0] & _DMASK) == _DMAX << _DOFF) { - ret = (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || - ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 - ? _NANCODE - : _INFCODE); - } else if ((ps->_Sh[_D0] & ~_DSIGN) != 0 || ps->_Sh[_D1] != 0 || - ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0) - ret = (ps->_Sh[_D0] & _DMASK) == 0 ? _DENORM : _FINITE; - - return ret; -} - -// Returns _FP_LT, _FP_GT or _FP_EQ based on the ordering -// relationship between x and y. -DEVICE_EXTERN_C_INLINE -int _dpcomp(double x, double y) { - int res = 0; - if (_Dtest(&x) == _NANCODE || _Dtest(&y) == _NANCODE) { - // '0' means unordered. - return res; - } - - if (x < y) - res |= _FP_LT; - else if (x > y) - res |= _FP_GT; - else - res |= _FP_EQ; - - return res; -} - -// Returns 0, if the sign bit is not set, and non-zero otherwise. -DEVICE_EXTERN_C_INLINE -int _dsign(double x) { return DSIGN(x); } - -// fpclassify() equivalent with a pointer argument. -DEVICE_EXTERN_C_INLINE -short _dtest(double *px) { - switch (_Dtest(px)) { - case _DENORM: - return FP_SUBNORMAL; - case _FINITE: - return FP_NORMAL; - case _INFCODE: - return FP_INFINITE; - case _NANCODE: - return FP_NAN; - } - - return FP_ZERO; -} - -DEVICE_EXTERN_C_INLINE -short _Dnorm(_Dval *ps) { // normalize double fraction - short xchar; - unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); - - xchar = 1; - if ((ps->_Sh[_D0] &= _DFRAC) != 0 || ps->_Sh[_D1] || ps->_Sh[_D2] || - ps->_Sh[_D3]) { // nonzero, scale - for (; ps->_Sh[_D0] == 0; xchar -= 16) { // shift left by 16 - ps->_Sh[_D0] = ps->_Sh[_D1]; - ps->_Sh[_D1] = ps->_Sh[_D2]; - ps->_Sh[_D2] = ps->_Sh[_D3]; - ps->_Sh[_D3] = 0; - } - for (; ps->_Sh[_D0] < 1 << _DOFF; --xchar) { // shift left by 1 - ps->_Sh[_D0] = (unsigned short)(ps->_Sh[_D0] << 1 | ps->_Sh[_D1] >> 15); - ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] << 1 | ps->_Sh[_D2] >> 15); - ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] << 1 | ps->_Sh[_D3] >> 15); - ps->_Sh[_D3] <<= 1; - } - for (; 1 << (_DOFF + 1) <= ps->_Sh[_D0]; ++xchar) { // shift right by 1 - ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> 1 | ps->_Sh[_D2] << 15); - ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> 1 | ps->_Sh[_D1] << 15); - ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> 1 | ps->_Sh[_D0] << 15); - ps->_Sh[_D0] >>= 1; - } - ps->_Sh[_D0] &= _DFRAC; - } - ps->_Sh[_D0] |= sign; - return xchar; -} - -DEVICE_EXTERN_C_INLINE -short _Dscale(double *px, long lexp) { // scale *px by 2^xexp with checking - _Dval *ps = (_Dval *)(char *)px; - _Dconst _Inf = {INIT(_DMAX << _DOFF)}; - short xchar = (short)((ps->_Sh[_D0] & _DMASK) >> _DOFF); - - if (xchar == _DMAX) - return (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || - ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 - ? _NANCODE - : _INFCODE); - if (xchar == 0 && 0 < (xchar = _Dnorm(ps))) - return 0; - - short ret = 0; - if (0 < lexp && _DMAX - xchar <= lexp) { // overflow, return +/-INF - *px = ps->_Sh[_D0] & _DSIGN ? -_Inf._Double : _Inf._Double; - ret = _INFCODE; - } else if (-xchar < lexp) { // finite result, repack - ps->_Sh[_D0] = - (unsigned short)(ps->_Sh[_D0] & ~_DMASK | (lexp + xchar) << _DOFF); - ret = _FINITE; - } else { // denormalized, scale - unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); - - ps->_Sh[_D0] = (unsigned short)(1 << _DOFF | ps->_Sh[_D0] & _DFRAC); - lexp += xchar - 1; - if (lexp < -(48 + 1 + _DOFF) || - 0 <= lexp) { // certain underflow, return +/-0 - ps->_Sh[_D0] = sign; - ps->_Sh[_D1] = 0; - ps->_Sh[_D2] = 0; - ps->_Sh[_D3] = 0; - ret = 0; - } else { // nonzero, align fraction - short xexp = (short)lexp; - unsigned short psx = 0; - ret = _FINITE; - - for (; xexp <= -16; xexp += 16) { // scale by words - psx = ps->_Sh[_D3] | (psx != 0 ? 1 : 0); - ps->_Sh[_D3] = ps->_Sh[_D2]; - ps->_Sh[_D2] = ps->_Sh[_D1]; - ps->_Sh[_D1] = ps->_Sh[_D0]; - ps->_Sh[_D0] = 0; - } - if ((xexp = (short)-xexp) != 0) { // scale by bits - psx = (ps->_Sh[_D3] << (16 - xexp)) | (psx != 0 ? 1 : 0); - ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> xexp | - ps->_Sh[_D2] << (16 - xexp)); - ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> xexp | - ps->_Sh[_D1] << (16 - xexp)); - ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> xexp | - ps->_Sh[_D0] << (16 - xexp)); - ps->_Sh[_D0] >>= xexp; - } - - ps->_Sh[_D0] |= sign; - if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_D3] & 0x0001) != 0) && - (++ps->_Sh[_D3] & 0xffff) == 0 && (++ps->_Sh[_D2] & 0xffff) == 0 && - (++ps->_Sh[_D1] & 0xffff) == 0) - ++ps->_Sh[_D0]; // round up - else if (ps->_Sh[_D0] == sign && ps->_Sh[_D1] == 0 && ps->_Sh[_D2] == 0 && - ps->_Sh[_D3] == 0) - ret = 0; - } - } - return ret; -} - -DEVICE_EXTERN_C_INLINE -short _Exp(double *px, double y, - short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge - static const double invln2 = 1.4426950408889634073599246810018921; - static const double c1 = 22713.0 / 32768.0; - static const double c2 = 1.4286068203094172321214581765680755e-6; - static const double p[] = {1.0, 420.30235984910635, 15132.70094680474802}; - static const double q[] = {30.01511290683317, 3362.72154416553028, - 30265.40189360949691}; - - _Dconst _Eps = {INIT((_DBIAS - NBITS - 1) << _DOFF)}; - _Dconst _Inf = {INIT(_DMAX << _DOFF)}; - short ret = 0; - if (*px < -HUGE_EXP || y == 0.0) // certain underflow - *px = __spirv_ocl_copysign(0.0, y); - else if (HUGE_EXP < *px) { // certain overflow - *px = __spirv_ocl_copysign(_Inf._Double, y); - ret = _INFCODE; - } else { // xexp won't overflow - double g = *px * invln2; - short xexp = (short)(g + (g < 0.0 ? -0.5 : +0.5)); - g = xexp; - g = (*px - g * c1) - g * c2; - if (-_Eps._Double < g && g < _Eps._Double) - *px = y; - else { // g * g worth computing - const double z = g * g; - const double w = (q[0] * z + q[1]) * z + q[2]; - - g *= (z + p[1]) * z + p[2]; - *px = (w + g) / (w - g) * 2.0 * y; - --xexp; - } - ret = _Dscale(px, (long)xexp + eoff); - } - - return ret; -} - -DEVICE_EXTERN_C_INLINE -double _Cosh(double x, double y) { // compute y * cosh(x), |y| <= 1 - switch (_Dtest(&x)) { // test for special codes - case _NANCODE: - case _INFCODE: - return x; - case 0: - return y; - default: // finite - if (y == 0.0) - return y; - - if (x < 0.0) - x = -x; - - if (x < _Xbig) { // worth adding in exp(-x) - _Exp(&x, 1.0, -1); - return y * (x + 0.25 / x); - } - _Exp(&x, y, -1); - return x; - } -} - -DEVICE_EXTERN_C_INLINE -double _Poly(double x, const double *tab, int n) { // compute polynomial - double y; - - for (y = *tab; 0 <= --n;) - y = y * x + *++tab; - - return y; -} - -DEVICE_EXTERN_C_INLINE -double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 - - short neg; - // coefficients - static const double p[] = {0.0000000001632881, 0.0000000250483893, - 0.0000027557344615, 0.0001984126975233, - 0.0083333333334816, 0.1666666666666574, - 1.0000000000000001}; - _Dconst _Rteps = {INIT((_DBIAS - NBITS / 2) << _DOFF)}; - switch (_Dtest(&x)) { // test for special codes - case _NANCODE: - return x; - case _INFCODE: - return y != 0.0 ? x : DSIGN(x) ? -y : y; - case 0: - return x * y; - default: // finite - if (y == 0.0) - return x < 0.0 ? -y : y; - - if (x < 0.0) { - x = -x; - neg = 1; - } else - neg = 0; - - if (x < _Rteps._Double) - x *= y; // x tiny - else if (x < 1.0) { - double w = x * x; - - x += x * w * _Poly(w, p, 5); - x *= y; - } else if (x < _Xbig) { // worth adding in exp(-x) - _Exp(&x, 1.0, -1); - x = y * (x - 0.25 / x); - } else - _Exp(&x, y, -1); - - return neg ? -x : x; - } -} -#endif // defined(_WIN32) #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/msvc_math.cpp b/libdevice/msvc_math.cpp index c4619dacdce01..d448047bbb903 100644 --- a/libdevice/msvc_math.cpp +++ b/libdevice/msvc_math.cpp @@ -15,7 +15,7 @@ union _Fval { // pun floating type as integer array float _Val; }; -union _Dconst { // pun float types as integer array +union _Dconst2 { // pun float types as integer array unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary. float _Float; }; @@ -26,13 +26,13 @@ union _Dconst { // pun float types as integer array // IEEE 754 float properties #define FHUGE_EXP (int)(_FMAX * 900L / 1000) -#define NBITS (16 + _FOFF) +#define F_NBITS (16 + _FOFF) #define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN) -#define INIT(w0) \ +#define INIT2(w0) \ { 0, w0 } -#define _FXbig (float)((NBITS + 1) * 347L / 1000) +#define _FXbig (float)((F_NBITS + 1) * 347L / 1000) DEVICE_EXTERN_C_INLINE float __devicelib_hypotf(float, float); @@ -123,7 +123,7 @@ short _FDnorm(_Fval *ps) { // normalize float fraction DEVICE_EXTERN_C_INLINE short _FDscale(float *px, long lexp) { // scale *px by 2^xexp with checking - _Dconst _FInf = {INIT(_FMAX << _FOFF)}; + _Dconst2 _FInf = {INIT2(_FMAX << _FOFF)}; _Fval *ps = (_Fval *)(char *)px; short xchar = (short)((ps->_Sh[_F0] & _FMASK) >> _FOFF); @@ -185,7 +185,7 @@ DEVICE_EXTERN_C_INLINE short _FExp(float *px, float y, short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge static const float hugexp = FHUGE_EXP; - _Dconst _FInf = {INIT(_FMAX << _FOFF)}; + _Dconst2 _FInf = {INIT2(_FMAX << _FOFF)}; static const float p[] = {1.0F, 60.09114349F}; static const float q[] = {12.01517514F, 120.18228722F}; static const float c1 = (22713.0F / 32768.0F); @@ -244,7 +244,7 @@ float _FCosh(float x, float y) { // compute y * cosh(x), |y| <= 1 DEVICE_EXTERN_C_INLINE float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1 - _Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)}; + _Dconst2 _FRteps = {INIT2((_FBIAS - F_NBITS / 2) << _FOFF)}; static const float p[] = {0.00020400F, 0.00832983F, 0.16666737F, 0.99999998F}; short neg; @@ -281,4 +281,308 @@ float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1 return neg ? -x : x; } } + +#define _D0 3 // little-endian, small long doubles +#define _D1 2 +#define _D2 1 +#define _D3 0 + +// IEEE 754 double properties +#define HUGE_EXP (int)(_DMAX * 900L / 1000) + +#define D_NBITS (48 + _DOFF) + +#define INIT4(w0) \ + { 0, 0, 0, w0 } + +// double declarations +union _Dval { // pun floating type as integer array + unsigned short _Sh[8]; + double _Val; +}; + +union _Dconst4 { // pun float types as integer array + unsigned short _Word[4]; // TRANSITION, ABI: Twice as large as necessary. + double _Double; +}; +#define DSIGN(x) (((_Dval *)(char *)&(x))->_Sh[_D0] & _DSIGN) + +#define _Xbig (double)((D_NBITS + 1) * 347L / 1000) + +DEVICE_EXTERN_C_INLINE +short _Dtest(double *px) { // categorize *px + _Dval *ps = (_Dval *)(char *)px; + + short ret = 0; + if ((ps->_Sh[_D0] & _DMASK) == _DMAX << _DOFF) { + ret = (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 + ? _NANCODE + : _INFCODE); + } else if ((ps->_Sh[_D0] & ~_DSIGN) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0) + ret = (ps->_Sh[_D0] & _DMASK) == 0 ? _DENORM : _FINITE; + + return ret; +} + +// Returns _FP_LT, _FP_GT or _FP_EQ based on the ordering +// relationship between x and y. +DEVICE_EXTERN_C_INLINE +int _dpcomp(double x, double y) { + int res = 0; + if (_Dtest(&x) == _NANCODE || _Dtest(&y) == _NANCODE) { + // '0' means unordered. + return res; + } + + if (x < y) + res |= _FP_LT; + else if (x > y) + res |= _FP_GT; + else + res |= _FP_EQ; + + return res; +} + +// Returns 0, if the sign bit is not set, and non-zero otherwise. +DEVICE_EXTERN_C_INLINE +int _dsign(double x) { return DSIGN(x); } + +// fpclassify() equivalent with a pointer argument. +DEVICE_EXTERN_C_INLINE +short _dtest(double *px) { + switch (_Dtest(px)) { + case _DENORM: + return FP_SUBNORMAL; + case _FINITE: + return FP_NORMAL; + case _INFCODE: + return FP_INFINITE; + case _NANCODE: + return FP_NAN; + } + + return FP_ZERO; +} + +DEVICE_EXTERN_C_INLINE +short _Dnorm(_Dval *ps) { // normalize double fraction + short xchar; + unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); + + xchar = 1; + if ((ps->_Sh[_D0] &= _DFRAC) != 0 || ps->_Sh[_D1] || ps->_Sh[_D2] || + ps->_Sh[_D3]) { // nonzero, scale + for (; ps->_Sh[_D0] == 0; xchar -= 16) { // shift left by 16 + ps->_Sh[_D0] = ps->_Sh[_D1]; + ps->_Sh[_D1] = ps->_Sh[_D2]; + ps->_Sh[_D2] = ps->_Sh[_D3]; + ps->_Sh[_D3] = 0; + } + for (; ps->_Sh[_D0] < 1 << _DOFF; --xchar) { // shift left by 1 + ps->_Sh[_D0] = (unsigned short)(ps->_Sh[_D0] << 1 | ps->_Sh[_D1] >> 15); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] << 1 | ps->_Sh[_D2] >> 15); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] << 1 | ps->_Sh[_D3] >> 15); + ps->_Sh[_D3] <<= 1; + } + for (; 1 << (_DOFF + 1) <= ps->_Sh[_D0]; ++xchar) { // shift right by 1 + ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> 1 | ps->_Sh[_D2] << 15); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> 1 | ps->_Sh[_D1] << 15); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> 1 | ps->_Sh[_D0] << 15); + ps->_Sh[_D0] >>= 1; + } + ps->_Sh[_D0] &= _DFRAC; + } + ps->_Sh[_D0] |= sign; + return xchar; +} + +DEVICE_EXTERN_C_INLINE +short _Dscale(double *px, long lexp) { // scale *px by 2^xexp with checking + _Dval *ps = (_Dval *)(char *)px; + _Dconst4 _Inf = {INIT4(_DMAX << _DOFF)}; + short xchar = (short)((ps->_Sh[_D0] & _DMASK) >> _DOFF); + + if (xchar == _DMAX) + return (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 + ? _NANCODE + : _INFCODE); + if (xchar == 0 && 0 < (xchar = _Dnorm(ps))) + return 0; + + short ret = 0; + if (0 < lexp && _DMAX - xchar <= lexp) { // overflow, return +/-INF + *px = ps->_Sh[_D0] & _DSIGN ? -_Inf._Double : _Inf._Double; + ret = _INFCODE; + } else if (-xchar < lexp) { // finite result, repack + ps->_Sh[_D0] = + (unsigned short)(ps->_Sh[_D0] & ~_DMASK | (lexp + xchar) << _DOFF); + ret = _FINITE; + } else { // denormalized, scale + unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); + + ps->_Sh[_D0] = (unsigned short)(1 << _DOFF | ps->_Sh[_D0] & _DFRAC); + lexp += xchar - 1; + if (lexp < -(48 + 1 + _DOFF) || + 0 <= lexp) { // certain underflow, return +/-0 + ps->_Sh[_D0] = sign; + ps->_Sh[_D1] = 0; + ps->_Sh[_D2] = 0; + ps->_Sh[_D3] = 0; + ret = 0; + } else { // nonzero, align fraction + short xexp = (short)lexp; + unsigned short psx = 0; + ret = _FINITE; + + for (; xexp <= -16; xexp += 16) { // scale by words + psx = ps->_Sh[_D3] | (psx != 0 ? 1 : 0); + ps->_Sh[_D3] = ps->_Sh[_D2]; + ps->_Sh[_D2] = ps->_Sh[_D1]; + ps->_Sh[_D1] = ps->_Sh[_D0]; + ps->_Sh[_D0] = 0; + } + if ((xexp = (short)-xexp) != 0) { // scale by bits + psx = (ps->_Sh[_D3] << (16 - xexp)) | (psx != 0 ? 1 : 0); + ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> xexp | + ps->_Sh[_D2] << (16 - xexp)); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> xexp | + ps->_Sh[_D1] << (16 - xexp)); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> xexp | + ps->_Sh[_D0] << (16 - xexp)); + ps->_Sh[_D0] >>= xexp; + } + + ps->_Sh[_D0] |= sign; + if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_D3] & 0x0001) != 0) && + (++ps->_Sh[_D3] & 0xffff) == 0 && (++ps->_Sh[_D2] & 0xffff) == 0 && + (++ps->_Sh[_D1] & 0xffff) == 0) + ++ps->_Sh[_D0]; // round up + else if (ps->_Sh[_D0] == sign && ps->_Sh[_D1] == 0 && ps->_Sh[_D2] == 0 && + ps->_Sh[_D3] == 0) + ret = 0; + } + } + return ret; +} + +DEVICE_EXTERN_C_INLINE +short _Exp(double *px, double y, + short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge + static const double invln2 = 1.4426950408889634073599246810018921; + static const double c1 = 22713.0 / 32768.0; + static const double c2 = 1.4286068203094172321214581765680755e-6; + static const double p[] = {1.0, 420.30235984910635, 15132.70094680474802}; + static const double q[] = {30.01511290683317, 3362.72154416553028, + 30265.40189360949691}; + + _Dconst4 _Eps = {INIT4((_DBIAS - D_NBITS - 1) << _DOFF)}; + _Dconst4 _Inf = {INIT4(_DMAX << _DOFF)}; + short ret = 0; + if (*px < -HUGE_EXP || y == 0.0) // certain underflow + *px = __spirv_ocl_copysign(0.0, y); + else if (HUGE_EXP < *px) { // certain overflow + *px = __spirv_ocl_copysign(_Inf._Double, y); + ret = _INFCODE; + } else { // xexp won't overflow + double g = *px * invln2; + short xexp = (short)(g + (g < 0.0 ? -0.5 : +0.5)); + g = xexp; + g = (*px - g * c1) - g * c2; + if (-_Eps._Double < g && g < _Eps._Double) + *px = y; + else { // g * g worth computing + const double z = g * g; + const double w = (q[0] * z + q[1]) * z + q[2]; + + g *= (z + p[1]) * z + p[2]; + *px = (w + g) / (w - g) * 2.0 * y; + --xexp; + } + ret = _Dscale(px, (long)xexp + eoff); + } + + return ret; +} + +DEVICE_EXTERN_C_INLINE +double _Cosh(double x, double y) { // compute y * cosh(x), |y| <= 1 + switch (_Dtest(&x)) { // test for special codes + case _NANCODE: + case _INFCODE: + return x; + case 0: + return y; + default: // finite + if (y == 0.0) + return y; + + if (x < 0.0) + x = -x; + + if (x < _Xbig) { // worth adding in exp(-x) + _Exp(&x, 1.0, -1); + return y * (x + 0.25 / x); + } + _Exp(&x, y, -1); + return x; + } +} + +DEVICE_EXTERN_C_INLINE +double _Poly(double x, const double *tab, int n) { // compute polynomial + double y; + + for (y = *tab; 0 <= --n;) + y = y * x + *++tab; + + return y; +} + +DEVICE_EXTERN_C_INLINE +double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 + + short neg; + // coefficients + static const double p[] = {0.0000000001632881, 0.0000000250483893, + 0.0000027557344615, 0.0001984126975233, + 0.0083333333334816, 0.1666666666666574, + 1.0000000000000001}; + _Dconst4 _Rteps = {INIT4((_DBIAS - D_NBITS / 2) << _DOFF)}; + switch (_Dtest(&x)) { // test for special codes + case _NANCODE: + return x; + case _INFCODE: + return y != 0.0 ? x : DSIGN(x) ? -y : y; + case 0: + return x * y; + default: // finite + if (y == 0.0) + return x < 0.0 ? -y : y; + + if (x < 0.0) { + x = -x; + neg = 1; + } else + neg = 0; + + if (x < _Rteps._Double) + x *= y; // x tiny + else if (x < 1.0) { + double w = x * x; + + x += x * w * _Poly(w, p, 5); + x *= y; + } else if (x < _Xbig) { // worth adding in exp(-x) + _Exp(&x, 1.0, -1); + x = y * (x - 0.25 / x); + } else + _Exp(&x, y, -1); + + return neg ? -x : x; + } +} #endif // __SPIR__ || __SPIRV__ From 02cd6bfd0d26e2e145d10f740b0c4a29e1292cc9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 28 Aug 2025 13:38:37 +0800 Subject: [PATCH 2/2] fix clang format Signed-off-by: jinge90 --- libdevice/msvc_math.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/libdevice/msvc_math.cpp b/libdevice/msvc_math.cpp index d448047bbb903..ab234eb8e25e3 100644 --- a/libdevice/msvc_math.cpp +++ b/libdevice/msvc_math.cpp @@ -15,7 +15,7 @@ union _Fval { // pun floating type as integer array float _Val; }; -union _Dconst2 { // pun float types as integer array +union _Dconst2 { // pun float types as integer array unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary. float _Float; }; @@ -29,8 +29,7 @@ union _Dconst2 { // pun float types as integer array #define F_NBITS (16 + _FOFF) #define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN) -#define INIT2(w0) \ - { 0, w0 } +#define INIT2(w0) {0, w0} #define _FXbig (float)((F_NBITS + 1) * 347L / 1000) @@ -292,8 +291,7 @@ float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1 #define D_NBITS (48 + _DOFF) -#define INIT4(w0) \ - { 0, 0, 0, w0 } +#define INIT4(w0) {0, 0, 0, w0} // double declarations union _Dval { // pun floating type as integer array @@ -301,7 +299,7 @@ union _Dval { // pun floating type as integer array double _Val; }; -union _Dconst4 { // pun float types as integer array +union _Dconst4 { // pun float types as integer array unsigned short _Word[4]; // TRANSITION, ABI: Twice as large as necessary. double _Double; };