@@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) {
3030 return __nv_powf (Base, __nv_roundf (Exponent));
3131}
3232
33+ static inline double sincosSin (double X) {
34+ double SinX, CosX;
35+ __nv_sincos (X, &SinX, &CosX);
36+ return SinX;
37+ }
38+
39+ static inline double sincosCos (double X) {
40+ double SinX, CosX;
41+ __nv_sincos (X, &SinX, &CosX);
42+ return CosX;
43+ }
44+
3345static inline float sincosfSin (float X) {
3446 float SinX, CosX;
3547 __nv_sincosf (X, &SinX, &CosX);
@@ -48,6 +60,11 @@ static inline float sincosfCos(float X) {
4860
4961extern " C" {
5062
63+ __gpu_kernel void acosKernel (const double *X, double *Out,
64+ size_t NumElements) noexcept {
65+ runKernelBody<__nv_acos>(NumElements, Out, X);
66+ }
67+
5168__gpu_kernel void acosfKernel (const float *X, float *Out,
5269 size_t NumElements) noexcept {
5370 runKernelBody<__nv_acosf>(NumElements, Out, X);
@@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out,
5875 runKernelBody<__nv_acoshf>(NumElements, Out, X);
5976}
6077
78+ __gpu_kernel void asinKernel (const double *X, double *Out,
79+ size_t NumElements) noexcept {
80+ runKernelBody<__nv_asin>(NumElements, Out, X);
81+ }
82+
6183__gpu_kernel void asinfKernel (const float *X, float *Out,
6284 size_t NumElements) noexcept {
6385 runKernelBody<__nv_asinf>(NumElements, Out, X);
@@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
83105 runKernelBody<__nv_atanhf>(NumElements, Out, X);
84106}
85107
108+ __gpu_kernel void cbrtKernel (const double *X, double *Out,
109+ size_t NumElements) noexcept {
110+ runKernelBody<__nv_cbrt>(NumElements, Out, X);
111+ }
112+
86113__gpu_kernel void cbrtfKernel (const float *X, float *Out,
87114 size_t NumElements) noexcept {
88115 runKernelBody<__nv_cbrtf>(NumElements, Out, X);
89116}
90117
118+ __gpu_kernel void cosKernel (const double *X, double *Out,
119+ size_t NumElements) noexcept {
120+ runKernelBody<__nv_cos>(NumElements, Out, X);
121+ }
122+
91123__gpu_kernel void cosfKernel (const float *X, float *Out,
92124 size_t NumElements) noexcept {
93125 runKernelBody<__nv_cosf>(NumElements, Out, X);
@@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out,
108140 runKernelBody<__nv_erff>(NumElements, Out, X);
109141}
110142
143+ __gpu_kernel void expKernel (const double *X, double *Out,
144+ size_t NumElements) noexcept {
145+ runKernelBody<__nv_exp>(NumElements, Out, X);
146+ }
147+
111148__gpu_kernel void expfKernel (const float *X, float *Out,
112149 size_t NumElements) noexcept {
113150 runKernelBody<__nv_expf>(NumElements, Out, X);
114151}
115152
153+ __gpu_kernel void exp10Kernel (const double *X, double *Out,
154+ size_t NumElements) noexcept {
155+ runKernelBody<__nv_exp10>(NumElements, Out, X);
156+ }
157+
116158__gpu_kernel void exp10fKernel (const float *X, float *Out,
117159 size_t NumElements) noexcept {
118160 runKernelBody<__nv_exp10f>(NumElements, Out, X);
119161}
120162
163+ __gpu_kernel void exp2Kernel (const double *X, double *Out,
164+ size_t NumElements) noexcept {
165+ runKernelBody<__nv_exp2>(NumElements, Out, X);
166+ }
167+
121168__gpu_kernel void exp2fKernel (const float *X, float *Out,
122169 size_t NumElements) noexcept {
123170 runKernelBody<__nv_exp2f>(NumElements, Out, X);
124171}
125172
173+ __gpu_kernel void expm1Kernel (const double *X, double *Out,
174+ size_t NumElements) noexcept {
175+ runKernelBody<__nv_expm1>(NumElements, Out, X);
176+ }
177+
126178__gpu_kernel void expm1fKernel (const float *X, float *Out,
127179 size_t NumElements) noexcept {
128180 runKernelBody<__nv_expm1f>(NumElements, Out, X);
129181}
130182
131- __gpu_kernel void hypotfKernel (const float *X, float *Y, float *Out,
183+ __gpu_kernel void hypotKernel (const double *X, const double *Y, double *Out,
184+ size_t NumElements) noexcept {
185+ runKernelBody<__nv_hypot>(NumElements, Out, X, Y);
186+ }
187+
188+ __gpu_kernel void hypotfKernel (const float *X, const float *Y, float *Out,
132189 size_t NumElements) noexcept {
133190 runKernelBody<__nv_hypotf>(NumElements, Out, X, Y);
134191}
@@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
143200 runKernelBody<__nv_logf>(NumElements, Out, X);
144201}
145202
203+ __gpu_kernel void log10Kernel (const double *X, double *Out,
204+ size_t NumElements) noexcept {
205+ runKernelBody<__nv_log10>(NumElements, Out, X);
206+ }
207+
146208__gpu_kernel void log10fKernel (const float *X, float *Out,
147209 size_t NumElements) noexcept {
148210 runKernelBody<__nv_log10f>(NumElements, Out, X);
149211}
150212
213+ __gpu_kernel void log1pKernel (const double *X, double *Out,
214+ size_t NumElements) noexcept {
215+ runKernelBody<__nv_log1p>(NumElements, Out, X);
216+ }
217+
151218__gpu_kernel void log1pfKernel (const float *X, float *Out,
152219 size_t NumElements) noexcept {
153220 runKernelBody<__nv_log1pf>(NumElements, Out, X);
154221}
155222
223+ __gpu_kernel void log2Kernel (const double *X, double *Out,
224+ size_t NumElements) noexcept {
225+ runKernelBody<__nv_log2>(NumElements, Out, X);
226+ }
227+
156228__gpu_kernel void log2fKernel (const float *X, float *Out,
157229 size_t NumElements) noexcept {
158230 runKernelBody<__nv_log2f>(NumElements, Out, X);
@@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
169241 runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
170242}
171243
244+ __gpu_kernel void sinKernel (const double *X, double *Out,
245+ size_t NumElements) noexcept {
246+ runKernelBody<__nv_sin>(NumElements, Out, X);
247+ }
248+
172249__gpu_kernel void sinfKernel (const float *X, float *Out,
173250 size_t NumElements) noexcept {
174251 runKernelBody<__nv_sinf>(NumElements, Out, X);
175252}
176253
254+ __gpu_kernel void sincosSinKernel (const double *X, double *Out,
255+ size_t NumElements) noexcept {
256+ runKernelBody<sincosSin>(NumElements, Out, X);
257+ }
258+
259+ __gpu_kernel void sincosCosKernel (const double *X, double *Out,
260+ size_t NumElements) noexcept {
261+ runKernelBody<sincosCos>(NumElements, Out, X);
262+ }
263+
177264__gpu_kernel void sincosfSinKernel (const float *X, float *Out,
178265 size_t NumElements) noexcept {
179266 runKernelBody<sincosfSin>(NumElements, Out, X);
@@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out,
194281 runKernelBody<__nv_sinpif>(NumElements, Out, X);
195282}
196283
284+ __gpu_kernel void tanKernel (const double *X, double *Out,
285+ size_t NumElements) noexcept {
286+ runKernelBody<__nv_tan>(NumElements, Out, X);
287+ }
288+
197289__gpu_kernel void tanfKernel (const float *X, float *Out,
198290 size_t NumElements) noexcept {
199291 runKernelBody<__nv_tanf>(NumElements, Out, X);
0 commit comments