55 * reserved.
66 * Copyright (c) 2016 Los Alamos National Security, LLC. All rights
77 * reserved.
8+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
89 * $COPYRIGHT$
910 *
1011 * Additional copyrights may follow
@@ -24,26 +25,50 @@ typedef struct ompi_wait_sync_t {
2425 pthread_mutex_t lock ;
2526 struct ompi_wait_sync_t * next ;
2627 struct ompi_wait_sync_t * prev ;
28+ volatile bool signaling ;
2729} ompi_wait_sync_t ;
2830
2931#define REQUEST_PENDING (void*)0L
3032#define REQUEST_COMPLETED (void*)1L
3133
3234#define SYNC_WAIT (sync ) (opal_using_threads() ? sync_wait_mt (sync) : sync_wait_st (sync))
3335
36+ /* The loop in release handles a race condition between the signaling
37+ * thread and the destruction of the condition variable. The signaling
38+ * member will be set to false after the final signaling thread has
39+ * finished opertating on the sync object. This is done to avoid
40+ * extra atomics in the singalling function and keep it as fast
41+ * as possible. Note that the race window is small so spinning here
42+ * is more optimal than sleeping since this macro is called in
43+ * the critical path. */
3444#define WAIT_SYNC_RELEASE (sync ) \
3545 if (opal_using_threads()) { \
36- pthread_cond_destroy(&(sync)->condition); \
37- pthread_mutex_destroy(&(sync)->lock); \
46+ while ((sync)->signaling) { \
47+ continue; \
48+ } \
49+ pthread_cond_destroy(&(sync)->condition); \
50+ pthread_mutex_destroy(&(sync)->lock); \
3851 }
3952
53+ #define WAIT_SYNC_RELEASE_NOWAIT (sync ) \
54+ if (opal_using_threads()) { \
55+ pthread_cond_destroy(&(sync)->condition); \
56+ pthread_mutex_destroy(&(sync)->lock); \
57+ }
58+
59+
4060#define WAIT_SYNC_SIGNAL (sync ) \
4161 if (opal_using_threads()) { \
4262 pthread_mutex_lock(&(sync->lock)); \
4363 pthread_cond_signal(&sync->condition); \
4464 pthread_mutex_unlock(&(sync->lock)); \
65+ sync->signaling = false; \
4566 }
4667
68+ #define WAIT_SYNC_SIGNALLED (sync ){ \
69+ (sync)->signaling = false; \
70+ }
71+
4772OPAL_DECLSPEC int sync_wait_mt (ompi_wait_sync_t * sync );
4873static inline int sync_wait_st (ompi_wait_sync_t * sync )
4974{
@@ -61,6 +86,7 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync)
6186 (sync)->next = NULL; \
6287 (sync)->prev = NULL; \
6388 (sync)->status = 0; \
89+ (sync)->signaling = true; \
6490 if (opal_using_threads()) { \
6591 pthread_cond_init (&(sync)->condition, NULL); \
6692 pthread_mutex_init (&(sync)->lock, NULL); \
@@ -81,8 +107,9 @@ static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int sta
81107 }
82108 } else {
83109 /* this is an error path so just use the atomic */
84- opal_atomic_swap_32 (& sync -> count , 0 );
85110 sync -> status = OPAL_ERROR ;
111+ opal_atomic_wmb ();
112+ opal_atomic_swap_32 (& sync -> count , 0 );
86113 }
87114 WAIT_SYNC_SIGNAL (sync );
88115}
0 commit comments