@@ -612,11 +612,17 @@ float atomicMin(float* addr, float val) {
612612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613613 return unsafeAtomicMin (addr, val);
614614#else
615+ typedef union u_hold {
616+ float a;
617+ unsigned int b;
618+ } u_hold_t ;
619+ u_hold_t u{val};
620+ bool neg_zero = 0x80000000U == u.b ;
615621 #if __has_builtin(__hip_atomic_load) && \
616622 __has_builtin (__hip_atomic_compare_exchange_strong)
617623 float value = __hip_atomic_load (addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
618624 bool done = false ;
619- while (!done && value > val) {
625+ while (!done && ( value > val || (neg_zero && value == 0 . 0f )) ) {
620626 done = __hip_atomic_compare_exchange_strong (addr, &value, val,
621627 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
622628 }
@@ -625,7 +631,7 @@ float atomicMin(float* addr, float val) {
625631 unsigned int *uaddr = (unsigned int *)addr;
626632 unsigned int value = __atomic_load_n (uaddr, __ATOMIC_RELAXED);
627633 bool done = false ;
628- while (!done && __uint_as_float (value) > val) {
634+ while (!done && ( __uint_as_float (value) > val || (neg_zero && __uint_as_float (value) == 0 . 0f )) ) {
629635 done = __atomic_compare_exchange_n (uaddr, &value, __float_as_uint (val), false ,
630636 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
631637 }
@@ -658,11 +664,17 @@ double atomicMin(double* addr, double val) {
658664#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
659665 return unsafeAtomicMin (addr, val);
660666#else
667+ typedef union u_hold {
668+ double a;
669+ unsigned long long b;
670+ } u_hold_t ;
671+ u_hold_t u{val};
672+ bool neg_zero = 0x8000000000000000ULL == u.b ;
661673 #if __has_builtin(__hip_atomic_load) && \
662674 __has_builtin (__hip_atomic_compare_exchange_strong)
663675 double value = __hip_atomic_load (addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
664676 bool done = false ;
665- while (!done && value > val) {
677+ while (!done && ( value > val || (neg_zero && value == 0.0 ))) {
666678 done = __hip_atomic_compare_exchange_strong (addr, &value, val,
667679 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
668680 }
@@ -671,7 +683,8 @@ double atomicMin(double* addr, double val) {
671683 unsigned long long *uaddr = (unsigned long long *)addr;
672684 unsigned long long value = __atomic_load_n (uaddr, __ATOMIC_RELAXED);
673685 bool done = false ;
674- while (!done && __longlong_as_double (value) > val) {
686+ while (!done &&
687+ (__longlong_as_double (value) > val || (neg_zero && __longlong_as_double (value) == 0.0 ))) {
675688 done = __atomic_compare_exchange_n (uaddr, &value, __double_as_longlong (val), false ,
676689 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
677690 }
@@ -856,11 +869,17 @@ float atomicMax(float* addr, float val) {
856869#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
857870 return unsafeAtomicMax (addr, val);
858871#else
872+ typedef union u_hold {
873+ float a;
874+ unsigned int b;
875+ } u_hold_t ;
876+ u_hold_t u{val};
877+ bool neg_zero = 0x80000000U == u.b ;
859878 #if __has_builtin(__hip_atomic_load) && \
860879 __has_builtin (__hip_atomic_compare_exchange_strong)
861880 float value = __hip_atomic_load (addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
862881 bool done = false ;
863- while (!done && value < val) {
882+ while (!done && ( value < val || (neg_zero && value == 0 . 0f )) ) {
864883 done = __hip_atomic_compare_exchange_strong (addr, &value, val,
865884 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
866885 }
@@ -869,7 +888,7 @@ float atomicMax(float* addr, float val) {
869888 unsigned int *uaddr = (unsigned int *)addr;
870889 unsigned int value = __atomic_load_n (uaddr, __ATOMIC_RELAXED);
871890 bool done = false ;
872- while (!done && __uint_as_float (value) < val) {
891+ while (!done && ( __uint_as_float (value) < val || (neg_zero && __uint_as_float (value) == 0 . 0f )) ) {
873892 done = __atomic_compare_exchange_n (uaddr, &value, __float_as_uint (val), false ,
874893 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
875894 }
@@ -902,11 +921,17 @@ double atomicMax(double* addr, double val) {
902921#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
903922 return unsafeAtomicMax (addr, val);
904923#else
924+ typedef union u_hold {
925+ double a;
926+ unsigned long long b;
927+ } u_hold_t ;
928+ u_hold_t u{val};
929+ bool neg_zero = 0x8000000000000000ULL == u.b ;
905930 #if __has_builtin(__hip_atomic_load) && \
906931 __has_builtin (__hip_atomic_compare_exchange_strong)
907932 double value = __hip_atomic_load (addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
908933 bool done = false ;
909- while (!done && value < val) {
934+ while (!done && ( value < val || (neg_zero && value == 0.0 )) ) {
910935 done = __hip_atomic_compare_exchange_strong (addr, &value, val,
911936 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
912937 }
@@ -915,7 +940,8 @@ double atomicMax(double* addr, double val) {
915940 unsigned long long *uaddr = (unsigned long long *)addr;
916941 unsigned long long value = __atomic_load_n (uaddr, __ATOMIC_RELAXED);
917942 bool done = false ;
918- while (!done && __longlong_as_double (value) < val) {
943+ while (!done &&
944+ (__longlong_as_double (value) < val || (neg_zero && __longlong_as_double (value) == 0.0 ))) {
919945 done = __atomic_compare_exchange_n (uaddr, &value, __double_as_longlong (val), false ,
920946 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
921947 }
@@ -977,7 +1003,7 @@ unsigned int atomicDec(unsigned int* address, unsigned int val)
9771003#else
9781004 return __builtin_amdgcn_atomic_dec32 (address, val, __ATOMIC_RELAXED, " agent" );
9791005#endif // __gfx941__
980-
1006+
9811007}
9821008
9831009__device__
0 commit comments