55 * reserved.
66 * Copyright (c) 2016 Los Alamos National Security, LLC. All rights
77 * reserved.
8+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
89 * $COPYRIGHT$
910 *
1011 * Additional copyrights may follow
1314 */
1415#include "opal/sys/atomic.h"
1516#include "opal/threads/condition.h"
17+ #include "opal/constants.h"
18+ #include "opal/prefetch.h"
1619#include <pthread.h>
1720
1821BEGIN_C_DECLS
@@ -33,17 +36,29 @@ typedef struct ompi_wait_sync_t {
3336
3437#define WAIT_SYNC_RELEASE (sync ) \
3538 if (opal_using_threads()) { \
39+ pthread_mutex_lock(&(sync)->lock); \
3640 pthread_cond_destroy(&(sync)->condition); \
41+ pthread_mutex_unlock(&(sync)->lock); \
3742 pthread_mutex_destroy(&(sync)->lock); \
3843 }
3944
40- #define WAIT_SYNC_SIGNAL (sync ) \
45+ #define WAIT_SYNC_LOCK (sync ) \
4146 if (opal_using_threads()) { \
42- pthread_mutex_lock(&(sync->lock)); \
43- pthread_cond_signal(&sync->condition); \
44- pthread_mutex_unlock(&(sync->lock)); \
47+ pthread_mutex_lock(&((sync)->lock)); \
4548 }
4649
50+ #define WAIT_SYNC_SIGNAL_UNLOCK (sync ) \
51+ if (opal_using_threads()) { \
52+ pthread_cond_signal(&((sync)->condition)); \
53+ pthread_mutex_unlock(&((sync)->lock)); \
54+ }
55+
56+ #define WAIT_SYNC_UNLOCK (sync ) \
57+ if (opal_using_threads()) { \
58+ pthread_mutex_unlock(&((sync)->lock)); \
59+ }
60+
61+
4762OPAL_DECLSPEC int sync_wait_mt (ompi_wait_sync_t * sync );
4863static inline int sync_wait_st (ompi_wait_sync_t * sync )
4964{
@@ -75,16 +90,39 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync)
7590 */
7691static inline void wait_sync_update (ompi_wait_sync_t * sync , int updates , int status )
7792{
93+ /* Fast path: if we can decrement the sync->count without
94+ * dropping it to 0 - just return
95+ * Consider that there might be concurrent decrements
96+ */
97+ if ( OPAL_LIKELY (OPAL_SUCCESS == status ) ) {
98+ /* we know that our contribution is not yet there
99+ * so we can safely check if the count will still be above 0
100+ * after the change */
101+ while ( (sync -> count - updates > 0 ) ){
102+ int tmp = sync -> count ;
103+ if ( OPAL_ATOMIC_CMPSET_32 (& sync -> count , tmp , tmp - updates ) ){
104+ /* fastpath succeeds */
105+ return ;
106+ }
107+ }
108+ }
109+
110+ /* Slow path */
111+ WAIT_SYNC_LOCK (sync );
112+
78113 if ( OPAL_LIKELY (OPAL_SUCCESS == status ) ) {
79114 if ( 0 != (OPAL_THREAD_ADD32 (& sync -> count , - updates )) ) {
80- return ;
115+ goto unlock ;
81116 }
82117 } else {
83118 /* this is an error path so just use the atomic */
84119 opal_atomic_swap_32 (& sync -> count , 0 );
85120 sync -> status = OPAL_ERROR ;
86121 }
87- WAIT_SYNC_SIGNAL (sync );
122+ WAIT_SYNC_SIGNAL_UNLOCK (sync );
123+ return ;
124+ unlock :
125+ WAIT_SYNC_UNLOCK (sync );
88126}
89127
90128END_C_DECLS
0 commit comments