55 * reserved.
66 * Copyright (c) 2016 Los Alamos National Security, LLC. All rights
77 * reserved.
8+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
89 * $COPYRIGHT$
910 *
1011 * Additional copyrights may follow
@@ -26,26 +27,50 @@ typedef struct ompi_wait_sync_t {
2627 pthread_mutex_t lock ;
2728 struct ompi_wait_sync_t * next ;
2829 struct ompi_wait_sync_t * prev ;
30+ volatile bool signaling ;
2931} ompi_wait_sync_t ;
3032
3133#define REQUEST_PENDING (void*)0L
3234#define REQUEST_COMPLETED (void*)1L
3335
3436#define SYNC_WAIT (sync ) (opal_using_threads() ? sync_wait_mt (sync) : sync_wait_st (sync))
3537
38+ /* The loop in release handles a race condition between the signaling
39+ * thread and the destruction of the condition variable. The signaling
40+ * member will be set to false after the final signaling thread has
41+ * finished opertating on the sync object. This is done to avoid
42+ * extra atomics in the singalling function and keep it as fast
43+ * as possible. Note that the race window is small so spinning here
44+ * is more optimal than sleeping since this macro is called in
45+ * the critical path. */
3646#define WAIT_SYNC_RELEASE (sync ) \
3747 if (opal_using_threads()) { \
38- pthread_cond_destroy(&(sync)->condition); \
39- pthread_mutex_destroy(&(sync)->lock); \
48+ while ((sync)->signaling) { \
49+ continue; \
50+ } \
51+ pthread_cond_destroy(&(sync)->condition); \
52+ pthread_mutex_destroy(&(sync)->lock); \
4053 }
4154
55+ #define WAIT_SYNC_RELEASE_NOWAIT (sync ) \
56+ if (opal_using_threads()) { \
57+ pthread_cond_destroy(&(sync)->condition); \
58+ pthread_mutex_destroy(&(sync)->lock); \
59+ }
60+
61+
4262#define WAIT_SYNC_SIGNAL (sync ) \
4363 if (opal_using_threads()) { \
4464 pthread_mutex_lock(&(sync->lock)); \
4565 pthread_cond_signal(&sync->condition); \
4666 pthread_mutex_unlock(&(sync->lock)); \
67+ sync->signaling = false; \
4768 }
4869
70+ #define WAIT_SYNC_SIGNALLED (sync ){ \
71+ (sync)->signaling = false; \
72+ }
73+
4974OPAL_DECLSPEC int sync_wait_mt (ompi_wait_sync_t * sync );
5075static inline int sync_wait_st (ompi_wait_sync_t * sync )
5176{
@@ -63,6 +88,7 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync)
6388 (sync)->next = NULL; \
6489 (sync)->prev = NULL; \
6590 (sync)->status = 0; \
91+ (sync)->signaling = true; \
6692 if (opal_using_threads()) { \
6793 pthread_cond_init (&(sync)->condition, NULL); \
6894 pthread_mutex_init (&(sync)->lock, NULL); \
@@ -83,8 +109,9 @@ static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int sta
83109 }
84110 } else {
85111 /* this is an error path so just use the atomic */
86- opal_atomic_swap_32 (& sync -> count , 0 );
87112 sync -> status = OPAL_ERROR ;
113+ opal_atomic_wmb ();
114+ opal_atomic_swap_32 (& sync -> count , 0 );
88115 }
89116 WAIT_SYNC_SIGNAL (sync );
90117}
0 commit comments