2323#define __DEVICE__ __device__ inline
2424#endif
2525
26- // To make the algorithms available for C and C++ in CUDA and OpenMP we select
27- // different but equivalent function versions. TODO: For OpenMP we currently
28- // select the native builtins as the overload support for templates is lacking.
29- #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
30- #define _ISNANd std::isnan
31- #define _ISNANf std::isnan
32- #define _ISINFd std::isinf
33- #define _ISINFf std::isinf
34- #define _ISFINITEd std::isfinite
35- #define _ISFINITEf std::isfinite
36- #define _COPYSIGNd std::copysign
37- #define _COPYSIGNf std::copysign
38- #define _SCALBNd std::scalbn
39- #define _SCALBNf std::scalbn
40- #define _ABSd std::abs
41- #define _ABSf std::abs
42- #define _LOGBd std::logb
43- #define _LOGBf std::logb
44- // Rather than pulling in std::max from algorithm everytime, use available ::max.
45- #define _fmaxd max
46- #define _fmaxf max
47- #else
48- #ifdef __AMDGCN__
49- #define _ISNANd __ocml_isnan_f64
50- #define _ISNANf __ocml_isnan_f32
51- #define _ISINFd __ocml_isinf_f64
52- #define _ISINFf __ocml_isinf_f32
53- #define _ISFINITEd __ocml_isfinite_f64
54- #define _ISFINITEf __ocml_isfinite_f32
55- #define _COPYSIGNd __ocml_copysign_f64
56- #define _COPYSIGNf __ocml_copysign_f32
57- #define _SCALBNd __ocml_scalbn_f64
58- #define _SCALBNf __ocml_scalbn_f32
59- #define _ABSd __ocml_fabs_f64
60- #define _ABSf __ocml_fabs_f32
61- #define _LOGBd __ocml_logb_f64
62- #define _LOGBf __ocml_logb_f32
63- #define _fmaxd __ocml_fmax_f64
64- #define _fmaxf __ocml_fmax_f32
65- #else
66- #define _ISNANd __nv_isnand
67- #define _ISNANf __nv_isnanf
68- #define _ISINFd __nv_isinfd
69- #define _ISINFf __nv_isinff
70- #define _ISFINITEd __nv_isfinited
71- #define _ISFINITEf __nv_finitef
72- #define _COPYSIGNd __nv_copysign
73- #define _COPYSIGNf __nv_copysignf
26+ #ifdef __NVPTX__
27+ // FIXME: NVPTX should use generic builtins.
7428#define _SCALBNd __nv_scalbn
7529#define _SCALBNf __nv_scalbnf
76- #define _ABSd __nv_fabs
77- #define _ABSf __nv_fabsf
7830#define _LOGBd __nv_logb
7931#define _LOGBf __nv_logbf
80- #define _fmaxd __nv_fmax
81- #define _fmaxf __nv_fmaxf
82- #endif
32+ #else
33+ #define _SCALBNd __builtin_scalbn
34+ #define _SCALBNf __builtin_scalbnf
35+ #define _LOGBd __builtin_logb
36+ #define _LOGBf __builtin_logbf
8337#endif
8438
8539#if defined(__cplusplus)
@@ -95,36 +49,36 @@ __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
9549 double _Complex z;
9650 __real__ (z) = __ac - __bd;
9751 __imag__ (z) = __ad + __bc;
98- if (_ISNANd (__real__ (z)) && _ISNANd (__imag__ (z))) {
52+ if (__builtin_isnan (__real__ (z)) && __builtin_isnan (__imag__ (z))) {
9953 int __recalc = 0 ;
100- if (_ISINFd (__a) || _ISINFd (__b)) {
101- __a = _COPYSIGNd ( _ISINFd (__a) ? 1 : 0 , __a);
102- __b = _COPYSIGNd ( _ISINFd (__b) ? 1 : 0 , __b);
103- if (_ISNANd (__c))
104- __c = _COPYSIGNd (0 , __c);
105- if (_ISNANd (__d))
106- __d = _COPYSIGNd (0 , __d);
54+ if (__builtin_isinf (__a) || __builtin_isinf (__b)) {
55+ __a = __builtin_copysign ( __builtin_isinf (__a) ? 1 : 0 , __a);
56+ __b = __builtin_copysign ( __builtin_isinf (__b) ? 1 : 0 , __b);
57+ if (__builtin_isnan (__c))
58+ __c = __builtin_copysign (0 , __c);
59+ if (__builtin_isnan (__d))
60+ __d = __builtin_copysign (0 , __d);
10761 __recalc = 1 ;
10862 }
109- if (_ISINFd (__c) || _ISINFd (__d)) {
110- __c = _COPYSIGNd ( _ISINFd (__c) ? 1 : 0 , __c);
111- __d = _COPYSIGNd ( _ISINFd (__d) ? 1 : 0 , __d);
112- if (_ISNANd (__a))
113- __a = _COPYSIGNd (0 , __a);
114- if (_ISNANd (__b))
115- __b = _COPYSIGNd (0 , __b);
63+ if (__builtin_isinf (__c) || __builtin_isinf (__d)) {
64+ __c = __builtin_copysign ( __builtin_isinf (__c) ? 1 : 0 , __c);
65+ __d = __builtin_copysign ( __builtin_isinf (__d) ? 1 : 0 , __d);
66+ if (__builtin_isnan (__a))
67+ __a = __builtin_copysign (0 , __a);
68+ if (__builtin_isnan (__b))
69+ __b = __builtin_copysign (0 , __b);
11670 __recalc = 1 ;
11771 }
118- if (!__recalc &&
119- ( _ISINFd (__ac) || _ISINFd (__bd) || _ISINFd (__ad) || _ISINFd (__bc))) {
120- if (_ISNANd (__a))
121- __a = _COPYSIGNd (0 , __a);
122- if (_ISNANd (__b))
123- __b = _COPYSIGNd (0 , __b);
124- if (_ISNANd (__c))
125- __c = _COPYSIGNd (0 , __c);
126- if (_ISNANd (__d))
127- __d = _COPYSIGNd (0 , __d);
72+ if (!__recalc && ( __builtin_isinf (__ac) || __builtin_isinf (__bd) ||
73+ __builtin_isinf (__ad) || __builtin_isinf (__bc))) {
74+ if (__builtin_isnan (__a))
75+ __a = __builtin_copysign (0 , __a);
76+ if (__builtin_isnan (__b))
77+ __b = __builtin_copysign (0 , __b);
78+ if (__builtin_isnan (__c))
79+ __c = __builtin_copysign (0 , __c);
80+ if (__builtin_isnan (__d))
81+ __d = __builtin_copysign (0 , __d);
12882 __recalc = 1 ;
12983 }
13084 if (__recalc) {
@@ -145,36 +99,36 @@ __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
14599 float _Complex z;
146100 __real__ (z) = __ac - __bd;
147101 __imag__ (z) = __ad + __bc;
148- if (_ISNANf (__real__ (z)) && _ISNANf (__imag__ (z))) {
102+ if (__builtin_isnan (__real__ (z)) && __builtin_isnan (__imag__ (z))) {
149103 int __recalc = 0 ;
150- if (_ISINFf (__a) || _ISINFf (__b)) {
151- __a = _COPYSIGNf ( _ISINFf (__a) ? 1 : 0 , __a);
152- __b = _COPYSIGNf ( _ISINFf (__b) ? 1 : 0 , __b);
153- if (_ISNANf (__c))
154- __c = _COPYSIGNf (0 , __c);
155- if (_ISNANf (__d))
156- __d = _COPYSIGNf (0 , __d);
104+ if (__builtin_isinf (__a) || __builtin_isinf (__b)) {
105+ __a = __builtin_copysignf ( __builtin_isinf (__a) ? 1 : 0 , __a);
106+ __b = __builtin_copysignf ( __builtin_isinf (__b) ? 1 : 0 , __b);
107+ if (__builtin_isnan (__c))
108+ __c = __builtin_copysignf (0 , __c);
109+ if (__builtin_isnan (__d))
110+ __d = __builtin_copysignf (0 , __d);
157111 __recalc = 1 ;
158112 }
159- if (_ISINFf (__c) || _ISINFf (__d)) {
160- __c = _COPYSIGNf ( _ISINFf (__c) ? 1 : 0 , __c);
161- __d = _COPYSIGNf ( _ISINFf (__d) ? 1 : 0 , __d);
162- if (_ISNANf (__a))
163- __a = _COPYSIGNf (0 , __a);
164- if (_ISNANf (__b))
165- __b = _COPYSIGNf (0 , __b);
113+ if (__builtin_isinf (__c) || __builtin_isinf (__d)) {
114+ __c = __builtin_copysignf ( __builtin_isinf (__c) ? 1 : 0 , __c);
115+ __d = __builtin_copysignf ( __builtin_isinf (__d) ? 1 : 0 , __d);
116+ if (__builtin_isnan (__a))
117+ __a = __builtin_copysignf (0 , __a);
118+ if (__builtin_isnan (__b))
119+ __b = __builtin_copysignf (0 , __b);
166120 __recalc = 1 ;
167121 }
168- if (!__recalc &&
169- ( _ISINFf (__ac) || _ISINFf (__bd) || _ISINFf (__ad) || _ISINFf (__bc))) {
170- if (_ISNANf (__a))
171- __a = _COPYSIGNf (0 , __a);
172- if (_ISNANf (__b))
173- __b = _COPYSIGNf (0 , __b);
174- if (_ISNANf (__c))
175- __c = _COPYSIGNf (0 , __c);
176- if (_ISNANf (__d))
177- __d = _COPYSIGNf (0 , __d);
122+ if (!__recalc && ( __builtin_isinf (__ac) || __builtin_isinf (__bd) ||
123+ __builtin_isinf (__ad) || __builtin_isinf (__bc))) {
124+ if (__builtin_isnan (__a))
125+ __a = __builtin_copysignf (0 , __a);
126+ if (__builtin_isnan (__b))
127+ __b = __builtin_copysignf (0 , __b);
128+ if (__builtin_isnan (__c))
129+ __c = __builtin_copysignf (0 , __c);
130+ if (__builtin_isnan (__d))
131+ __d = __builtin_copysignf (0 , __d);
178132 __recalc = 1 ;
179133 }
180134 if (__recalc) {
@@ -191,8 +145,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
191145 // Can't use std::max, because that's defined in <algorithm>, and we don't
192146 // want to pull that in for every compile. The CUDA headers define
193147 // ::max(float, float) and ::max(double, double), which is sufficient for us.
194- double __logbw = _LOGBd (_fmaxd (_ABSd (__c), _ABSd (__d)));
195- if (_ISFINITEd (__logbw)) {
148+ double __logbw =
149+ _LOGBd (__builtin_fmax (__builtin_fabs (__c), __builtin_fabs (__d)));
150+ if (__builtin_isfinite (__logbw)) {
196151 __ilogbw = (int )__logbw;
197152 __c = _SCALBNd (__c, -__ilogbw);
198153 __d = _SCALBNd (__d, -__ilogbw);
@@ -201,20 +156,20 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
201156 double _Complex z;
202157 __real__ (z) = _SCALBNd ((__a * __c + __b * __d) / __denom, -__ilogbw);
203158 __imag__ (z) = _SCALBNd ((__b * __c - __a * __d) / __denom, -__ilogbw);
204- if (_ISNANd (__real__ (z)) && _ISNANd (__imag__ (z))) {
205- if ((__denom == 0.0 ) && (!_ISNANd (__a) || !_ISNANd (__b))) {
206- __real__ (z) = _COPYSIGNd (__builtin_huge_val (), __c) * __a;
207- __imag__ (z) = _COPYSIGNd (__builtin_huge_val (), __c) * __b;
208- } else if ((_ISINFd (__a) || _ISINFd (__b)) && _ISFINITEd (__c ) &&
209- _ISFINITEd (__d)) {
210- __a = _COPYSIGNd ( _ISINFd (__a) ? 1.0 : 0.0 , __a);
211- __b = _COPYSIGNd ( _ISINFd (__b) ? 1.0 : 0.0 , __b);
159+ if (__builtin_isnan (__real__ (z)) && __builtin_isnan (__imag__ (z))) {
160+ if ((__denom == 0.0 ) && (!__builtin_isnan (__a) || !__builtin_isnan (__b))) {
161+ __real__ (z) = __builtin_copysign (__builtin_huge_val (), __c) * __a;
162+ __imag__ (z) = __builtin_copysign (__builtin_huge_val (), __c) * __b;
163+ } else if ((__builtin_isinf (__a) || __builtin_isinf (__b)) &&
164+ __builtin_isfinite (__c) && __builtin_isfinite (__d)) {
165+ __a = __builtin_copysign ( __builtin_isinf (__a) ? 1.0 : 0.0 , __a);
166+ __b = __builtin_copysign ( __builtin_isinf (__b) ? 1.0 : 0.0 , __b);
212167 __real__ (z) = __builtin_huge_val () * (__a * __c + __b * __d);
213168 __imag__ (z) = __builtin_huge_val () * (__b * __c - __a * __d);
214- } else if (_ISINFd (__logbw) && __logbw > 0.0 && _ISFINITEd (__a) &&
215- _ISFINITEd (__b)) {
216- __c = _COPYSIGNd ( _ISINFd (__c) ? 1.0 : 0.0 , __c);
217- __d = _COPYSIGNd ( _ISINFd (__d) ? 1.0 : 0.0 , __d);
169+ } else if (__builtin_isinf (__logbw) && __logbw > 0.0 &&
170+ __builtin_isfinite (__a) && __builtin_isfinite (__b)) {
171+ __c = __builtin_copysign ( __builtin_isinf (__c) ? 1.0 : 0.0 , __c);
172+ __d = __builtin_copysign ( __builtin_isinf (__d) ? 1.0 : 0.0 , __d);
218173 __real__ (z) = 0.0 * (__a * __c + __b * __d);
219174 __imag__ (z) = 0.0 * (__b * __c - __a * __d);
220175 }
@@ -224,8 +179,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
224179
225180__DEVICE__ float _Complex __divsc3 (float __a, float __b, float __c, float __d) {
226181 int __ilogbw = 0 ;
227- float __logbw = _LOGBf (_fmaxf (_ABSf (__c), _ABSf (__d)));
228- if (_ISFINITEf (__logbw)) {
182+ float __logbw =
183+ _LOGBf (__builtin_fmaxf (__builtin_fabsf (__c), __builtin_fabsf (__d)));
184+ if (__builtin_isfinite (__logbw)) {
229185 __ilogbw = (int )__logbw;
230186 __c = _SCALBNf (__c, -__ilogbw);
231187 __d = _SCALBNf (__d, -__ilogbw);
@@ -234,20 +190,20 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
234190 float _Complex z;
235191 __real__ (z) = _SCALBNf ((__a * __c + __b * __d) / __denom, -__ilogbw);
236192 __imag__ (z) = _SCALBNf ((__b * __c - __a * __d) / __denom, -__ilogbw);
237- if (_ISNANf (__real__ (z)) && _ISNANf (__imag__ (z))) {
238- if ((__denom == 0 ) && (!_ISNANf (__a) || !_ISNANf (__b))) {
239- __real__ (z) = _COPYSIGNf (__builtin_huge_valf (), __c) * __a;
240- __imag__ (z) = _COPYSIGNf (__builtin_huge_valf (), __c) * __b;
241- } else if ((_ISINFf (__a) || _ISINFf (__b)) && _ISFINITEf (__c ) &&
242- _ISFINITEf (__d)) {
243- __a = _COPYSIGNf ( _ISINFf (__a) ? 1 : 0 , __a);
244- __b = _COPYSIGNf ( _ISINFf (__b) ? 1 : 0 , __b);
193+ if (__builtin_isnan (__real__ (z)) && __builtin_isnan (__imag__ (z))) {
194+ if ((__denom == 0 ) && (!__builtin_isnan (__a) || !__builtin_isnan (__b))) {
195+ __real__ (z) = __builtin_copysignf (__builtin_huge_valf (), __c) * __a;
196+ __imag__ (z) = __builtin_copysignf (__builtin_huge_valf (), __c) * __b;
197+ } else if ((__builtin_isinf (__a) || __builtin_isinf (__b)) &&
198+ __builtin_isfinite (__c) && __builtin_isfinite (__d)) {
199+ __a = __builtin_copysignf ( __builtin_isinf (__a) ? 1 : 0 , __a);
200+ __b = __builtin_copysignf ( __builtin_isinf (__b) ? 1 : 0 , __b);
245201 __real__ (z) = __builtin_huge_valf () * (__a * __c + __b * __d);
246202 __imag__ (z) = __builtin_huge_valf () * (__b * __c - __a * __d);
247- } else if (_ISINFf (__logbw) && __logbw > 0 && _ISFINITEf (__a) &&
248- _ISFINITEf (__b)) {
249- __c = _COPYSIGNf ( _ISINFf (__c) ? 1 : 0 , __c);
250- __d = _COPYSIGNf ( _ISINFf (__d) ? 1 : 0 , __d);
203+ } else if (__builtin_isinf (__logbw) && __logbw > 0 &&
204+ __builtin_isfinite (__a) && __builtin_isfinite (__b)) {
205+ __c = __builtin_copysignf ( __builtin_isinf (__c) ? 1 : 0 , __c);
206+ __d = __builtin_copysignf ( __builtin_isinf (__d) ? 1 : 0 , __d);
251207 __real__ (z) = 0 * (__a * __c + __b * __d);
252208 __imag__ (z) = 0 * (__b * __c - __a * __d);
253209 }
@@ -259,22 +215,10 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
259215} // extern "C"
260216#endif
261217
262- #undef _ISNANd
263- #undef _ISNANf
264- #undef _ISINFd
265- #undef _ISINFf
266- #undef _COPYSIGNd
267- #undef _COPYSIGNf
268- #undef _ISFINITEd
269- #undef _ISFINITEf
270218#undef _SCALBNd
271219#undef _SCALBNf
272- #undef _ABSd
273- #undef _ABSf
274220#undef _LOGBd
275221#undef _LOGBf
276- #undef _fmaxd
277- #undef _fmaxf
278222
279223#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
280224#pragma omp end declare target
0 commit comments