Skip to content

Commit 820b447

Browse files
committed
[SYCL][NFC] Group all msvc specific math functions together
Move double and float version of msvc specific functions in single file so that we won't forget to fix double version bug. Signed-off-by: jinge90 <[email protected]>
1 parent 1d3074d commit 820b447

File tree

2 files changed

+311
-315
lines changed

2 files changed

+311
-315
lines changed

libdevice/cmath_wrapper_fp64.cpp

Lines changed: 0 additions & 308 deletions
Original file line numberDiff line numberDiff line change
@@ -181,312 +181,4 @@ double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); }
181181

182182
DEVICE_EXTERN_C_INLINE
183183
double rint(double x) { return __spirv_ocl_rint(x); }
184-
185-
#if defined(_MSC_VER)
186-
#include <math.h>
187-
// FLOAT PROPERTIES
188-
#define _D0 3 // little-endian, small long doubles
189-
#define _D1 2
190-
#define _D2 1
191-
#define _D3 0
192-
193-
// IEEE 754 double properties
194-
#define HUGE_EXP (int)(_DMAX * 900L / 1000)
195-
196-
#define NBITS (48 + _DOFF)
197-
198-
#define INIT(w0) \
199-
{ 0, 0, 0, w0 }
200-
201-
// double declarations
202-
union _Dval { // pun floating type as integer array
203-
unsigned short _Sh[8];
204-
double _Val;
205-
};
206-
207-
union _Dconst { // pun float types as integer array
208-
unsigned short _Word[4]; // TRANSITION, ABI: Twice as large as necessary.
209-
double _Double;
210-
};
211-
#define DSIGN(x) (((_Dval *)(char *)&(x))->_Sh[_D0] & _DSIGN)
212-
213-
#define _Xbig (double)((NBITS + 1) * 347L / 1000)
214-
215-
DEVICE_EXTERN_C_INLINE
216-
short _Dtest(double *px) { // categorize *px
217-
_Dval *ps = (_Dval *)(char *)px;
218-
219-
short ret = 0;
220-
if ((ps->_Sh[_D0] & _DMASK) == _DMAX << _DOFF) {
221-
ret = (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 ||
222-
ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0
223-
? _NANCODE
224-
: _INFCODE);
225-
} else if ((ps->_Sh[_D0] & ~_DSIGN) != 0 || ps->_Sh[_D1] != 0 ||
226-
ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0)
227-
ret = (ps->_Sh[_D0] & _DMASK) == 0 ? _DENORM : _FINITE;
228-
229-
return ret;
230-
}
231-
232-
// Returns _FP_LT, _FP_GT or _FP_EQ based on the ordering
233-
// relationship between x and y.
234-
DEVICE_EXTERN_C_INLINE
235-
int _dpcomp(double x, double y) {
236-
int res = 0;
237-
if (_Dtest(&x) == _NANCODE || _Dtest(&y) == _NANCODE) {
238-
// '0' means unordered.
239-
return res;
240-
}
241-
242-
if (x < y)
243-
res |= _FP_LT;
244-
else if (x > y)
245-
res |= _FP_GT;
246-
else
247-
res |= _FP_EQ;
248-
249-
return res;
250-
}
251-
252-
// Returns 0, if the sign bit is not set, and non-zero otherwise.
253-
DEVICE_EXTERN_C_INLINE
254-
int _dsign(double x) { return DSIGN(x); }
255-
256-
// fpclassify() equivalent with a pointer argument.
257-
DEVICE_EXTERN_C_INLINE
258-
short _dtest(double *px) {
259-
switch (_Dtest(px)) {
260-
case _DENORM:
261-
return FP_SUBNORMAL;
262-
case _FINITE:
263-
return FP_NORMAL;
264-
case _INFCODE:
265-
return FP_INFINITE;
266-
case _NANCODE:
267-
return FP_NAN;
268-
}
269-
270-
return FP_ZERO;
271-
}
272-
273-
DEVICE_EXTERN_C_INLINE
274-
short _Dnorm(_Dval *ps) { // normalize double fraction
275-
short xchar;
276-
unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN);
277-
278-
xchar = 1;
279-
if ((ps->_Sh[_D0] &= _DFRAC) != 0 || ps->_Sh[_D1] || ps->_Sh[_D2] ||
280-
ps->_Sh[_D3]) { // nonzero, scale
281-
for (; ps->_Sh[_D0] == 0; xchar -= 16) { // shift left by 16
282-
ps->_Sh[_D0] = ps->_Sh[_D1];
283-
ps->_Sh[_D1] = ps->_Sh[_D2];
284-
ps->_Sh[_D2] = ps->_Sh[_D3];
285-
ps->_Sh[_D3] = 0;
286-
}
287-
for (; ps->_Sh[_D0] < 1 << _DOFF; --xchar) { // shift left by 1
288-
ps->_Sh[_D0] = (unsigned short)(ps->_Sh[_D0] << 1 | ps->_Sh[_D1] >> 15);
289-
ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] << 1 | ps->_Sh[_D2] >> 15);
290-
ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] << 1 | ps->_Sh[_D3] >> 15);
291-
ps->_Sh[_D3] <<= 1;
292-
}
293-
for (; 1 << (_DOFF + 1) <= ps->_Sh[_D0]; ++xchar) { // shift right by 1
294-
ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> 1 | ps->_Sh[_D2] << 15);
295-
ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> 1 | ps->_Sh[_D1] << 15);
296-
ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> 1 | ps->_Sh[_D0] << 15);
297-
ps->_Sh[_D0] >>= 1;
298-
}
299-
ps->_Sh[_D0] &= _DFRAC;
300-
}
301-
ps->_Sh[_D0] |= sign;
302-
return xchar;
303-
}
304-
305-
DEVICE_EXTERN_C_INLINE
306-
short _Dscale(double *px, long lexp) { // scale *px by 2^xexp with checking
307-
_Dval *ps = (_Dval *)(char *)px;
308-
_Dconst _Inf = {INIT(_DMAX << _DOFF)};
309-
short xchar = (short)((ps->_Sh[_D0] & _DMASK) >> _DOFF);
310-
311-
if (xchar == _DMAX)
312-
return (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 ||
313-
ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0
314-
? _NANCODE
315-
: _INFCODE);
316-
if (xchar == 0 && 0 < (xchar = _Dnorm(ps)))
317-
return 0;
318-
319-
short ret = 0;
320-
if (0 < lexp && _DMAX - xchar <= lexp) { // overflow, return +/-INF
321-
*px = ps->_Sh[_D0] & _DSIGN ? -_Inf._Double : _Inf._Double;
322-
ret = _INFCODE;
323-
} else if (-xchar < lexp) { // finite result, repack
324-
ps->_Sh[_D0] =
325-
(unsigned short)(ps->_Sh[_D0] & ~_DMASK | (lexp + xchar) << _DOFF);
326-
ret = _FINITE;
327-
} else { // denormalized, scale
328-
unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN);
329-
330-
ps->_Sh[_D0] = (unsigned short)(1 << _DOFF | ps->_Sh[_D0] & _DFRAC);
331-
lexp += xchar - 1;
332-
if (lexp < -(48 + 1 + _DOFF) ||
333-
0 <= lexp) { // certain underflow, return +/-0
334-
ps->_Sh[_D0] = sign;
335-
ps->_Sh[_D1] = 0;
336-
ps->_Sh[_D2] = 0;
337-
ps->_Sh[_D3] = 0;
338-
ret = 0;
339-
} else { // nonzero, align fraction
340-
short xexp = (short)lexp;
341-
unsigned short psx = 0;
342-
ret = _FINITE;
343-
344-
for (; xexp <= -16; xexp += 16) { // scale by words
345-
psx = ps->_Sh[_D3] | (psx != 0 ? 1 : 0);
346-
ps->_Sh[_D3] = ps->_Sh[_D2];
347-
ps->_Sh[_D2] = ps->_Sh[_D1];
348-
ps->_Sh[_D1] = ps->_Sh[_D0];
349-
ps->_Sh[_D0] = 0;
350-
}
351-
if ((xexp = (short)-xexp) != 0) { // scale by bits
352-
psx = (ps->_Sh[_D3] << (16 - xexp)) | (psx != 0 ? 1 : 0);
353-
ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> xexp |
354-
ps->_Sh[_D2] << (16 - xexp));
355-
ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> xexp |
356-
ps->_Sh[_D1] << (16 - xexp));
357-
ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> xexp |
358-
ps->_Sh[_D0] << (16 - xexp));
359-
ps->_Sh[_D0] >>= xexp;
360-
}
361-
362-
ps->_Sh[_D0] |= sign;
363-
if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_D3] & 0x0001) != 0) &&
364-
(++ps->_Sh[_D3] & 0xffff) == 0 && (++ps->_Sh[_D2] & 0xffff) == 0 &&
365-
(++ps->_Sh[_D1] & 0xffff) == 0)
366-
++ps->_Sh[_D0]; // round up
367-
else if (ps->_Sh[_D0] == sign && ps->_Sh[_D1] == 0 && ps->_Sh[_D2] == 0 &&
368-
ps->_Sh[_D3] == 0)
369-
ret = 0;
370-
}
371-
}
372-
return ret;
373-
}
374-
375-
DEVICE_EXTERN_C_INLINE
376-
short _Exp(double *px, double y,
377-
short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
378-
static const double invln2 = 1.4426950408889634073599246810018921;
379-
static const double c1 = 22713.0 / 32768.0;
380-
static const double c2 = 1.4286068203094172321214581765680755e-6;
381-
static const double p[] = {1.0, 420.30235984910635, 15132.70094680474802};
382-
static const double q[] = {30.01511290683317, 3362.72154416553028,
383-
30265.40189360949691};
384-
385-
_Dconst _Eps = {INIT((_DBIAS - NBITS - 1) << _DOFF)};
386-
_Dconst _Inf = {INIT(_DMAX << _DOFF)};
387-
short ret = 0;
388-
if (*px < -HUGE_EXP || y == 0.0) // certain underflow
389-
*px = __spirv_ocl_copysign(0.0, y);
390-
else if (HUGE_EXP < *px) { // certain overflow
391-
*px = __spirv_ocl_copysign(_Inf._Double, y);
392-
ret = _INFCODE;
393-
} else { // xexp won't overflow
394-
double g = *px * invln2;
395-
short xexp = (short)(g + (g < 0.0 ? -0.5 : +0.5));
396-
g = xexp;
397-
g = (*px - g * c1) - g * c2;
398-
if (-_Eps._Double < g && g < _Eps._Double)
399-
*px = y;
400-
else { // g * g worth computing
401-
const double z = g * g;
402-
const double w = (q[0] * z + q[1]) * z + q[2];
403-
404-
g *= (z + p[1]) * z + p[2];
405-
*px = (w + g) / (w - g) * 2.0 * y;
406-
--xexp;
407-
}
408-
ret = _Dscale(px, (long)xexp + eoff);
409-
}
410-
411-
return ret;
412-
}
413-
414-
DEVICE_EXTERN_C_INLINE
415-
double _Cosh(double x, double y) { // compute y * cosh(x), |y| <= 1
416-
switch (_Dtest(&x)) { // test for special codes
417-
case _NANCODE:
418-
case _INFCODE:
419-
return x;
420-
case 0:
421-
return y;
422-
default: // finite
423-
if (y == 0.0)
424-
return y;
425-
426-
if (x < 0.0)
427-
x = -x;
428-
429-
if (x < _Xbig) { // worth adding in exp(-x)
430-
_Exp(&x, 1.0, -1);
431-
return y * (x + 0.25 / x);
432-
}
433-
_Exp(&x, y, -1);
434-
return x;
435-
}
436-
}
437-
438-
DEVICE_EXTERN_C_INLINE
439-
double _Poly(double x, const double *tab, int n) { // compute polynomial
440-
double y;
441-
442-
for (y = *tab; 0 <= --n;)
443-
y = y * x + *++tab;
444-
445-
return y;
446-
}
447-
448-
DEVICE_EXTERN_C_INLINE
449-
double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
450-
451-
short neg;
452-
// coefficients
453-
static const double p[] = {0.0000000001632881, 0.0000000250483893,
454-
0.0000027557344615, 0.0001984126975233,
455-
0.0083333333334816, 0.1666666666666574,
456-
1.0000000000000001};
457-
_Dconst _Rteps = {INIT((_DBIAS - NBITS / 2) << _DOFF)};
458-
switch (_Dtest(&x)) { // test for special codes
459-
case _NANCODE:
460-
return x;
461-
case _INFCODE:
462-
return y != 0.0 ? x : DSIGN(x) ? -y : y;
463-
case 0:
464-
return x * y;
465-
default: // finite
466-
if (y == 0.0)
467-
return x < 0.0 ? -y : y;
468-
469-
if (x < 0.0) {
470-
x = -x;
471-
neg = 1;
472-
} else
473-
neg = 0;
474-
475-
if (x < _Rteps._Double)
476-
x *= y; // x tiny
477-
else if (x < 1.0) {
478-
double w = x * x;
479-
480-
x += x * w * _Poly(w, p, 5);
481-
x *= y;
482-
} else if (x < _Xbig) { // worth adding in exp(-x)
483-
_Exp(&x, 1.0, -1);
484-
x = y * (x - 0.25 / x);
485-
} else
486-
_Exp(&x, y, -1);
487-
488-
return neg ? -x : x;
489-
}
490-
}
491-
#endif // defined(_WIN32)
492184
#endif // __SPIR__ || __SPIRV__

0 commit comments

Comments
 (0)