19
19
#endif
20
20
#include < limits.h>
21
21
#include < stdint.h>
22
+ #ifdef __OPENMP_AMDGCN__
23
+ #include < omp.h>
24
+ #endif
22
25
#endif // !defined(__HIPCC_RTC__)
23
26
24
27
#pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
258
261
__DEVICE__
259
262
float frexpf (float __x, int *__nptr) {
260
263
int __tmp;
264
+ #ifdef __OPENMP_AMDGCN__
265
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
266
+ #endif
261
267
float __r =
262
268
__ocml_frexp_f32 (__x, (__attribute__ ((address_space (5 ))) int *)&__tmp);
263
269
*__nptr = __tmp;
@@ -343,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
343
349
__DEVICE__
344
350
float modff (float __x, float *__iptr) {
345
351
float __tmp;
352
+ #ifdef __OPENMP_AMDGCN__
353
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
354
+ #endif
346
355
float __r =
347
356
__ocml_modf_f32 (__x, (__attribute__ ((address_space (5 ))) float *)&__tmp);
348
357
*__iptr = __tmp;
@@ -423,6 +432,9 @@ float remainderf(float __x, float __y) {
423
432
__DEVICE__
424
433
float remquof (float __x, float __y, int *__quo) {
425
434
int __tmp;
435
+ #ifdef __OPENMP_AMDGCN__
436
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
437
+ #endif
426
438
float __r = __ocml_remquo_f32 (
427
439
__x, __y, (__attribute__ ((address_space (5 ))) int *)&__tmp);
428
440
*__quo = __tmp;
@@ -479,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
479
491
__DEVICE__
480
492
void sincosf (float __x, float *__sinptr, float *__cosptr) {
481
493
float __tmp;
494
+ #ifdef __OPENMP_AMDGCN__
495
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
496
+ #endif
482
497
*__sinptr =
483
498
__ocml_sincos_f32 (__x, (__attribute__ ((address_space (5 ))) float *)&__tmp);
484
499
*__cosptr = __tmp;
@@ -487,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
487
502
__DEVICE__
488
503
void sincospif (float __x, float *__sinptr, float *__cosptr) {
489
504
float __tmp;
505
+ #ifdef __OPENMP_AMDGCN__
506
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
507
+ #endif
490
508
*__sinptr = __ocml_sincospi_f32 (
491
509
__x, (__attribute__ ((address_space (5 ))) float *)&__tmp);
492
510
*__cosptr = __tmp;
@@ -799,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
799
817
__DEVICE__
800
818
double frexp (double __x, int *__nptr) {
801
819
int __tmp;
820
+ #ifdef __OPENMP_AMDGCN__
821
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
822
+ #endif
802
823
double __r =
803
824
__ocml_frexp_f64 (__x, (__attribute__ ((address_space (5 ))) int *)&__tmp);
804
825
*__nptr = __tmp;
@@ -883,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
883
904
__DEVICE__
884
905
double modf (double __x, double *__iptr) {
885
906
double __tmp;
907
+ #ifdef __OPENMP_AMDGCN__
908
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
909
+ #endif
886
910
double __r =
887
911
__ocml_modf_f64 (__x, (__attribute__ ((address_space (5 ))) double *)&__tmp);
888
912
*__iptr = __tmp;
@@ -971,6 +995,9 @@ double remainder(double __x, double __y) {
971
995
__DEVICE__
972
996
double remquo (double __x, double __y, int *__quo) {
973
997
int __tmp;
998
+ #ifdef __OPENMP_AMDGCN__
999
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1000
+ #endif
974
1001
double __r = __ocml_remquo_f64 (
975
1002
__x, __y, (__attribute__ ((address_space (5 ))) int *)&__tmp);
976
1003
*__quo = __tmp;
@@ -1029,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
1029
1056
__DEVICE__
1030
1057
void sincos (double __x, double *__sinptr, double *__cosptr) {
1031
1058
double __tmp;
1059
+ #ifdef __OPENMP_AMDGCN__
1060
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1061
+ #endif
1032
1062
*__sinptr = __ocml_sincos_f64 (
1033
1063
__x, (__attribute__ ((address_space (5 ))) double *)&__tmp);
1034
1064
*__cosptr = __tmp;
@@ -1037,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
1037
1067
__DEVICE__
1038
1068
void sincospi (double __x, double *__sinptr, double *__cosptr) {
1039
1069
double __tmp;
1070
+ #ifdef __OPENMP_AMDGCN__
1071
+ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1072
+ #endif
1040
1073
*__sinptr = __ocml_sincospi_f64 (
1041
1074
__x, (__attribute__ ((address_space (5 ))) double *)&__tmp);
1042
1075
*__cosptr = __tmp;
0 commit comments