@@ -56,7 +56,6 @@ DEVICE double __kmpc_impl_get_wtime() {
56
56
}
57
57
58
58
// In Cuda 9.0, __ballot(1) from Cuda 8.0 is replaced with __activemask().
59
-
60
59
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask () {
61
60
#if CUDA_VERSION >= 9000
62
61
return __activemask ();
@@ -66,7 +65,6 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
66
65
}
67
66
68
67
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
69
-
70
68
DEVICE int32_t __kmpc_impl_shfl_sync (__kmpc_impl_lanemask_t Mask, int32_t Var,
71
69
int32_t SrcLane) {
72
70
#if CUDA_VERSION >= 9000
@@ -86,14 +84,7 @@ DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
86
84
#endif // CUDA_VERSION
87
85
}
88
86
89
- DEVICE void __kmpc_impl_syncthreads () {
90
- // Use original __syncthreads if compiled by nvcc or clang >= 9.0.
91
- #if !defined(__clang__) || __clang_major__ >= 9
92
- __syncthreads ();
93
- #else
94
- asm volatile (" bar.sync %0;" : : " r" (0 ) : " memory" );
95
- #endif // __clang__
96
- }
87
+ DEVICE void __kmpc_impl_syncthreads () { __syncthreads (); }
97
88
98
89
DEVICE void __kmpc_impl_syncwarp (__kmpc_impl_lanemask_t Mask) {
99
90
#if CUDA_VERSION >= 9000
@@ -145,11 +136,11 @@ DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock) {
145
136
DEVICE void __kmpc_impl_set_lock (omp_lock_t *lock) {
146
137
// TODO: not sure spinning is a good idea here..
147
138
while (__kmpc_atomic_cas (lock, UNSET, SET) != UNSET) {
148
- clock_t start = clock ();
149
- clock_t now;
139
+ int32_t start = __nvvm_read_ptx_sreg_clock ();
140
+ int32_t now;
150
141
for (;;) {
151
- now = clock ();
152
- clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
142
+ now = __nvvm_read_ptx_sreg_clock ();
143
+ int32_t cycles = now > start ? now - start : now + (0xffffffff - start);
153
144
if (cycles >= __OMP_SPIN * GetBlockIdInKernel ()) {
154
145
break ;
155
146
}
0 commit comments