@@ -70,11 +70,21 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
7070 runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
7171}
7272
73+ __gpu_kernel void acosf16Kernel (const float16 *X, float16 *Out,
74+ size_t NumElements) noexcept {
75+ runKernelBody<__ocml_acos_f16>(NumElements, Out, X);
76+ }
77+
7378__gpu_kernel void acoshfKernel (const float *X, float *Out,
7479 size_t NumElements) noexcept {
7580 runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
7681}
7782
83+ __gpu_kernel void acoshf16Kernel (const float16 *X, float16 *Out,
84+ size_t NumElements) noexcept {
85+ runKernelBody<__ocml_acosh_f16>(NumElements, Out, X);
86+ }
87+
7888__gpu_kernel void asinKernel (const double *X, double *Out,
7989 size_t NumElements) noexcept {
8090 runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
@@ -85,16 +95,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
8595 runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
8696}
8797
98+ __gpu_kernel void asinf16Kernel (const float16 *X, float16 *Out,
99+ size_t NumElements) noexcept {
100+ runKernelBody<__ocml_asin_f16>(NumElements, Out, X);
101+ }
102+
88103__gpu_kernel void asinhfKernel (const float *X, float *Out,
89104 size_t NumElements) noexcept {
90105 runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
91106}
92107
108+ __gpu_kernel void asinhf16Kernel (const float16 *X, float16 *Out,
109+ size_t NumElements) noexcept {
110+ runKernelBody<__ocml_asinh_f16>(NumElements, Out, X);
111+ }
112+
93113__gpu_kernel void atanfKernel (const float *X, float *Out,
94114 size_t NumElements) noexcept {
95115 runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
96116}
97117
118+ __gpu_kernel void atanf16Kernel (const float16 *X, float16 *Out,
119+ size_t NumElements) noexcept {
120+ runKernelBody<__ocml_atan_f16>(NumElements, Out, X);
121+ }
122+
98123__gpu_kernel void atan2fKernel (const float *X, const float *Y, float *Out,
99124 size_t NumElements) noexcept {
100125 runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
@@ -105,6 +130,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
105130 runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
106131}
107132
133+ __gpu_kernel void atanhf16Kernel (const float16 *X, float16 *Out,
134+ size_t NumElements) noexcept {
135+ runKernelBody<__ocml_atanh_f16>(NumElements, Out, X);
136+ }
137+
108138__gpu_kernel void cbrtKernel (const double *X, double *Out,
109139 size_t NumElements) noexcept {
110140 runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
@@ -125,11 +155,21 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
125155 runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
126156}
127157
158+ __gpu_kernel void cosf16Kernel (const float16 *X, float16 *Out,
159+ size_t NumElements) noexcept {
160+ runKernelBody<__ocml_cos_f16>(NumElements, Out, X);
161+ }
162+
128163__gpu_kernel void coshfKernel (const float *X, float *Out,
129164 size_t NumElements) noexcept {
130165 runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
131166}
132167
168+ __gpu_kernel void coshf16Kernel (const float16 *X, float16 *Out,
169+ size_t NumElements) noexcept {
170+ runKernelBody<__ocml_cosh_f16>(NumElements, Out, X);
171+ }
172+
133173__gpu_kernel void cospifKernel (const float *X, float *Out,
134174 size_t NumElements) noexcept {
135175 runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
@@ -150,6 +190,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
150190 runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
151191}
152192
193+ __gpu_kernel void expf16Kernel (const float16 *X, float16 *Out,
194+ size_t NumElements) noexcept {
195+ runKernelBody<__ocml_exp_f16>(NumElements, Out, X);
196+ }
197+
153198__gpu_kernel void exp10Kernel (const double *X, double *Out,
154199 size_t NumElements) noexcept {
155200 runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
@@ -160,6 +205,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
160205 runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
161206}
162207
208+ __gpu_kernel void exp10f16Kernel (const float16 *X, float16 *Out,
209+ size_t NumElements) noexcept {
210+ runKernelBody<__ocml_exp10_f16>(NumElements, Out, X);
211+ }
212+
163213__gpu_kernel void exp2Kernel (const double *X, double *Out,
164214 size_t NumElements) noexcept {
165215 runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
@@ -170,6 +220,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
170220 runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
171221}
172222
223+ __gpu_kernel void exp2f16Kernel (const float16 *X, float16 *Out,
224+ size_t NumElements) noexcept {
225+ runKernelBody<__ocml_exp2_f16>(NumElements, Out, X);
226+ }
227+
173228__gpu_kernel void expm1Kernel (const double *X, double *Out,
174229 size_t NumElements) noexcept {
175230 runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
@@ -180,6 +235,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
180235 runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
181236}
182237
238+ __gpu_kernel void expm1f16Kernel (const float16 *X, float16 *Out,
239+ size_t NumElements) noexcept {
240+ runKernelBody<__ocml_expm1_f16>(NumElements, Out, X);
241+ }
242+
183243__gpu_kernel void hypotKernel (const double *X, const double *Y, double *Out,
184244 size_t NumElements) noexcept {
185245 runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
@@ -200,6 +260,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
200260 runKernelBody<__ocml_log_f32>(NumElements, Out, X);
201261}
202262
263+ __gpu_kernel void logf16Kernel (const float16 *X, float16 *Out,
264+ size_t NumElements) noexcept {
265+ runKernelBody<__ocml_log_f16>(NumElements, Out, X);
266+ }
267+
203268__gpu_kernel void log10Kernel (const double *X, double *Out,
204269 size_t NumElements) noexcept {
205270 runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
@@ -210,6 +275,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
210275 runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
211276}
212277
278+ __gpu_kernel void log10f16Kernel (const float16 *X, float16 *Out,
279+ size_t NumElements) noexcept {
280+ runKernelBody<__ocml_log10_f16>(NumElements, Out, X);
281+ }
282+
213283__gpu_kernel void log1pKernel (const double *X, double *Out,
214284 size_t NumElements) noexcept {
215285 runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
@@ -230,6 +300,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
230300 runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
231301}
232302
303+ __gpu_kernel void log2f16Kernel (const float16 *X, float16 *Out,
304+ size_t NumElements) noexcept {
305+ runKernelBody<__ocml_log2_f16>(NumElements, Out, X);
306+ }
307+
233308__gpu_kernel void powfKernel (const float *X, float *Y, float *Out,
234309 size_t NumElements) noexcept {
235310 runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
@@ -251,6 +326,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
251326 runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
252327}
253328
329+ __gpu_kernel void sinf16Kernel (const float16 *X, float16 *Out,
330+ size_t NumElements) noexcept {
331+ runKernelBody<__ocml_sin_f16>(NumElements, Out, X);
332+ }
333+
254334__gpu_kernel void sincosSinKernel (const double *X, double *Out,
255335 size_t NumElements) noexcept {
256336 runKernelBody<sincosSin>(NumElements, Out, X);
@@ -276,6 +356,11 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
276356 runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
277357}
278358
359+ __gpu_kernel void sinhf16Kernel (const float16 *X, float16 *Out,
360+ size_t NumElements) noexcept {
361+ runKernelBody<__ocml_sinh_f16>(NumElements, Out, X);
362+ }
363+
279364__gpu_kernel void sinpifKernel (const float *X, float *Out,
280365 size_t NumElements) noexcept {
281366 runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
@@ -291,10 +376,20 @@ __gpu_kernel void tanfKernel(const float *X, float *Out,
291376 runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
292377}
293378
379+ __gpu_kernel void tanf16Kernel (const float16 *X, float16 *Out,
380+ size_t NumElements) noexcept {
381+ runKernelBody<__ocml_tan_f16>(NumElements, Out, X);
382+ }
383+
294384__gpu_kernel void tanhfKernel (const float *X, float *Out,
295385 size_t NumElements) noexcept {
296386 runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
297387}
388+
389+ __gpu_kernel void tanhf16Kernel (const float16 *X, float16 *Out,
390+ size_t NumElements) noexcept {
391+ runKernelBody<__ocml_tanh_f16>(NumElements, Out, X);
392+ }
298393} // extern "C"
299394
300395#endif // HIP_MATH_FOUND
0 commit comments