Skip to content

Commit 9919301

Browse files
[Offload][Conformance] Add randomized tests for double-precision math functions (#155003)
This patch adds a set of randomized conformance tests for double-precision math functions. The functions included in this set were selected based on the following criteria: - An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in). - The corresponding LLVM CPU libm implementation is correctly rounded. - The function is listed in Table 68 of the OpenCL C Specification v3.0.19.
1 parent 2ca0526 commit 9919301

20 files changed

+1303
-5
lines changed

offload/unittests/Conformance/device_code/CUDAMath.cpp

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) {
3030
return __nv_powf(Base, __nv_roundf(Exponent));
3131
}
3232

33+
static inline double sincosSin(double X) {
34+
double SinX, CosX;
35+
__nv_sincos(X, &SinX, &CosX);
36+
return SinX;
37+
}
38+
39+
static inline double sincosCos(double X) {
40+
double SinX, CosX;
41+
__nv_sincos(X, &SinX, &CosX);
42+
return CosX;
43+
}
44+
3345
static inline float sincosfSin(float X) {
3446
float SinX, CosX;
3547
__nv_sincosf(X, &SinX, &CosX);
@@ -48,6 +60,11 @@ static inline float sincosfCos(float X) {
4860

4961
extern "C" {
5062

63+
__gpu_kernel void acosKernel(const double *X, double *Out,
64+
size_t NumElements) noexcept {
65+
runKernelBody<__nv_acos>(NumElements, Out, X);
66+
}
67+
5168
__gpu_kernel void acosfKernel(const float *X, float *Out,
5269
size_t NumElements) noexcept {
5370
runKernelBody<__nv_acosf>(NumElements, Out, X);
@@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out,
5875
runKernelBody<__nv_acoshf>(NumElements, Out, X);
5976
}
6077

78+
__gpu_kernel void asinKernel(const double *X, double *Out,
79+
size_t NumElements) noexcept {
80+
runKernelBody<__nv_asin>(NumElements, Out, X);
81+
}
82+
6183
__gpu_kernel void asinfKernel(const float *X, float *Out,
6284
size_t NumElements) noexcept {
6385
runKernelBody<__nv_asinf>(NumElements, Out, X);
@@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
83105
runKernelBody<__nv_atanhf>(NumElements, Out, X);
84106
}
85107

108+
__gpu_kernel void cbrtKernel(const double *X, double *Out,
109+
size_t NumElements) noexcept {
110+
runKernelBody<__nv_cbrt>(NumElements, Out, X);
111+
}
112+
86113
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
87114
size_t NumElements) noexcept {
88115
runKernelBody<__nv_cbrtf>(NumElements, Out, X);
89116
}
90117

118+
__gpu_kernel void cosKernel(const double *X, double *Out,
119+
size_t NumElements) noexcept {
120+
runKernelBody<__nv_cos>(NumElements, Out, X);
121+
}
122+
91123
__gpu_kernel void cosfKernel(const float *X, float *Out,
92124
size_t NumElements) noexcept {
93125
runKernelBody<__nv_cosf>(NumElements, Out, X);
@@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out,
108140
runKernelBody<__nv_erff>(NumElements, Out, X);
109141
}
110142

143+
__gpu_kernel void expKernel(const double *X, double *Out,
144+
size_t NumElements) noexcept {
145+
runKernelBody<__nv_exp>(NumElements, Out, X);
146+
}
147+
111148
__gpu_kernel void expfKernel(const float *X, float *Out,
112149
size_t NumElements) noexcept {
113150
runKernelBody<__nv_expf>(NumElements, Out, X);
114151
}
115152

153+
__gpu_kernel void exp10Kernel(const double *X, double *Out,
154+
size_t NumElements) noexcept {
155+
runKernelBody<__nv_exp10>(NumElements, Out, X);
156+
}
157+
116158
__gpu_kernel void exp10fKernel(const float *X, float *Out,
117159
size_t NumElements) noexcept {
118160
runKernelBody<__nv_exp10f>(NumElements, Out, X);
119161
}
120162

163+
__gpu_kernel void exp2Kernel(const double *X, double *Out,
164+
size_t NumElements) noexcept {
165+
runKernelBody<__nv_exp2>(NumElements, Out, X);
166+
}
167+
121168
__gpu_kernel void exp2fKernel(const float *X, float *Out,
122169
size_t NumElements) noexcept {
123170
runKernelBody<__nv_exp2f>(NumElements, Out, X);
124171
}
125172

173+
__gpu_kernel void expm1Kernel(const double *X, double *Out,
174+
size_t NumElements) noexcept {
175+
runKernelBody<__nv_expm1>(NumElements, Out, X);
176+
}
177+
126178
__gpu_kernel void expm1fKernel(const float *X, float *Out,
127179
size_t NumElements) noexcept {
128180
runKernelBody<__nv_expm1f>(NumElements, Out, X);
129181
}
130182

131-
__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
183+
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
184+
size_t NumElements) noexcept {
185+
runKernelBody<__nv_hypot>(NumElements, Out, X, Y);
186+
}
187+
188+
__gpu_kernel void hypotfKernel(const float *X, const float *Y, float *Out,
132189
size_t NumElements) noexcept {
133190
runKernelBody<__nv_hypotf>(NumElements, Out, X, Y);
134191
}
@@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
143200
runKernelBody<__nv_logf>(NumElements, Out, X);
144201
}
145202

203+
__gpu_kernel void log10Kernel(const double *X, double *Out,
204+
size_t NumElements) noexcept {
205+
runKernelBody<__nv_log10>(NumElements, Out, X);
206+
}
207+
146208
__gpu_kernel void log10fKernel(const float *X, float *Out,
147209
size_t NumElements) noexcept {
148210
runKernelBody<__nv_log10f>(NumElements, Out, X);
149211
}
150212

213+
__gpu_kernel void log1pKernel(const double *X, double *Out,
214+
size_t NumElements) noexcept {
215+
runKernelBody<__nv_log1p>(NumElements, Out, X);
216+
}
217+
151218
__gpu_kernel void log1pfKernel(const float *X, float *Out,
152219
size_t NumElements) noexcept {
153220
runKernelBody<__nv_log1pf>(NumElements, Out, X);
154221
}
155222

223+
__gpu_kernel void log2Kernel(const double *X, double *Out,
224+
size_t NumElements) noexcept {
225+
runKernelBody<__nv_log2>(NumElements, Out, X);
226+
}
227+
156228
__gpu_kernel void log2fKernel(const float *X, float *Out,
157229
size_t NumElements) noexcept {
158230
runKernelBody<__nv_log2f>(NumElements, Out, X);
@@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
169241
runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
170242
}
171243

244+
__gpu_kernel void sinKernel(const double *X, double *Out,
245+
size_t NumElements) noexcept {
246+
runKernelBody<__nv_sin>(NumElements, Out, X);
247+
}
248+
172249
__gpu_kernel void sinfKernel(const float *X, float *Out,
173250
size_t NumElements) noexcept {
174251
runKernelBody<__nv_sinf>(NumElements, Out, X);
175252
}
176253

254+
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
255+
size_t NumElements) noexcept {
256+
runKernelBody<sincosSin>(NumElements, Out, X);
257+
}
258+
259+
__gpu_kernel void sincosCosKernel(const double *X, double *Out,
260+
size_t NumElements) noexcept {
261+
runKernelBody<sincosCos>(NumElements, Out, X);
262+
}
263+
177264
__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
178265
size_t NumElements) noexcept {
179266
runKernelBody<sincosfSin>(NumElements, Out, X);
@@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out,
194281
runKernelBody<__nv_sinpif>(NumElements, Out, X);
195282
}
196283

284+
__gpu_kernel void tanKernel(const double *X, double *Out,
285+
size_t NumElements) noexcept {
286+
runKernelBody<__nv_tan>(NumElements, Out, X);
287+
}
288+
197289
__gpu_kernel void tanfKernel(const float *X, float *Out,
198290
size_t NumElements) noexcept {
199291
runKernelBody<__nv_tanf>(NumElements, Out, X);

offload/unittests/Conformance/device_code/DeviceAPIs.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,34 +48,49 @@ extern const inline uint32_t __oclc_ISA_version = 9000;
4848

4949
extern "C" {
5050

51+
double __nv_acos(double);
5152
float __nv_acosf(float);
5253
float __nv_acoshf(float);
54+
double __nv_asin(double);
5355
float __nv_asinf(float);
5456
float __nv_asinhf(float);
5557
float __nv_atanf(float);
5658
float __nv_atan2f(float, float);
5759
float __nv_atanhf(float);
60+
double __nv_cbrt(double);
5861
float __nv_cbrtf(float);
62+
double __nv_cos(double);
5963
float __nv_cosf(float);
6064
float __nv_coshf(float);
6165
float __nv_cospif(float);
6266
float __nv_erff(float);
67+
double __nv_exp(double);
6368
float __nv_expf(float);
69+
double __nv_exp10(double);
6470
float __nv_exp10f(float);
71+
double __nv_exp2(double);
6572
float __nv_exp2f(float);
73+
double __nv_expm1(double);
6674
float __nv_expm1f(float);
75+
double __nv_hypot(double, double);
6776
float __nv_hypotf(float, float);
6877
double __nv_log(double);
6978
float __nv_logf(float);
79+
double __nv_log10(double);
7080
float __nv_log10f(float);
81+
double __nv_log1p(double);
7182
float __nv_log1pf(float);
83+
double __nv_log2(double);
7284
float __nv_log2f(float);
7385
float __nv_powf(float, float);
7486
float __nv_roundf(float);
87+
double __nv_sin(double);
7588
float __nv_sinf(float);
89+
void __nv_sincos(double, double *, double *);
7690
void __nv_sincosf(float, float *, float *);
7791
float __nv_sinhf(float);
7892
float __nv_sinpif(float);
93+
double __nv_tan(double);
7994
float __nv_tanf(float);
8095
float __nv_tanhf(float);
8196
} // extern "C"
@@ -86,34 +101,49 @@ float __nv_tanhf(float);
86101

87102
extern "C" {
88103

104+
double __ocml_acos_f64(double);
89105
float __ocml_acos_f32(float);
90106
float __ocml_acosh_f32(float);
107+
double __ocml_asin_f64(double);
91108
float __ocml_asin_f32(float);
92109
float __ocml_asinh_f32(float);
93110
float __ocml_atan_f32(float);
94111
float __ocml_atan2_f32(float, float);
95112
float __ocml_atanh_f32(float);
113+
double __ocml_cbrt_f64(double);
96114
float __ocml_cbrt_f32(float);
115+
double __ocml_cos_f64(double);
97116
float __ocml_cos_f32(float);
98117
float __ocml_cosh_f32(float);
99118
float __ocml_cospi_f32(float);
100119
float __ocml_erf_f32(float);
120+
double __ocml_exp_f64(double);
101121
float __ocml_exp_f32(float);
122+
double __ocml_exp10_f64(double);
102123
float __ocml_exp10_f32(float);
124+
double __ocml_exp2_f64(double);
103125
float __ocml_exp2_f32(float);
126+
double __ocml_expm1_f64(double);
104127
float __ocml_expm1_f32(float);
128+
double __ocml_hypot_f64(double, double);
105129
float __ocml_hypot_f32(float, float);
106130
double __ocml_log_f64(double);
107131
float __ocml_log_f32(float);
132+
double __ocml_log10_f64(double);
108133
float __ocml_log10_f32(float);
134+
double __ocml_log1p_f64(double);
109135
float __ocml_log1p_f32(float);
136+
double __ocml_log2_f64(double);
110137
float __ocml_log2_f32(float);
111138
float __ocml_pow_f32(float, float);
112139
float __ocml_round_f32(float);
140+
double __ocml_sin_f64(double);
113141
float __ocml_sin_f32(float);
142+
double __ocml_sincos_f64(double, double *);
114143
float __ocml_sincos_f32(float, float *);
115144
float __ocml_sinh_f32(float);
116145
float __ocml_sinpi_f32(float);
146+
double __ocml_tan_f64(double);
117147
float __ocml_tan_f32(float);
118148
float __ocml_tanh_f32(float);
119149
} // extern "C"

0 commit comments

Comments
 (0)