@@ -58,11 +58,14 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_
5858static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t * module , ompi_osc_pt2pt_sync_t * lock )
5959{
6060 const int my_rank = ompi_comm_rank (module -> comm );
61+ ompi_osc_pt2pt_peer_t * peer = ompi_osc_pt2pt_peer_lookup (module , my_rank );
6162 int lock_type = lock -> sync .lock .type ;
6263 bool acquired = false;
6364
6465 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
6566
67+ (void ) OPAL_THREAD_ADD32 (& lock -> sync_expected , 1 );
68+
6669 acquired = ompi_osc_pt2pt_lock_try_acquire (module , my_rank , lock_type , (uint64_t ) (uintptr_t ) lock );
6770 if (!acquired ) {
6871 /* queue the lock */
@@ -73,6 +76,9 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp
7376 ompi_osc_pt2pt_sync_wait_expected (lock );
7477 }
7578
79+ ompi_osc_pt2pt_peer_set_locked (peer , true);
80+ ompi_osc_pt2pt_peer_set_eager_active (peer , true);
81+
7682 OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
7783 "local lock aquired" ));
7884
@@ -81,8 +87,12 @@ static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, omp
8187
8288static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t * module , ompi_osc_pt2pt_sync_t * lock )
8389{
90+ const int my_rank = ompi_comm_rank (module -> comm );
91+ ompi_osc_pt2pt_peer_t * peer = ompi_osc_pt2pt_peer_lookup (module , my_rank );
8492 int lock_type = lock -> sync .lock .type ;
8593
94+ (void ) OPAL_THREAD_ADD32 (& lock -> sync_expected , 1 );
95+
8696 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
8797
8898 OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
@@ -98,16 +108,22 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
98108 /* need to ensure we make progress */
99109 opal_progress ();
100110
111+ ompi_osc_pt2pt_peer_set_locked (peer , false);
112+ ompi_osc_pt2pt_peer_set_eager_active (peer , false);
113+
101114 ompi_osc_pt2pt_sync_expected (lock );
102115}
103116
104- static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t * module , int target , ompi_osc_pt2pt_sync_t * lock )
117+ int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t * module , int target , ompi_osc_pt2pt_sync_t * lock )
105118{
119+ ompi_osc_pt2pt_peer_t * peer = ompi_osc_pt2pt_peer_lookup (module , target );
106120 int lock_type = lock -> sync .lock .type ;
107121 ompi_osc_pt2pt_header_lock_t lock_req ;
108122
109123 int ret ;
110124
125+ (void ) OPAL_THREAD_ADD32 (& lock -> sync_expected , 1 );
126+
111127 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
112128
113129 /* generate a lock request */
@@ -128,6 +144,9 @@ static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, i
128144
129145 /* make sure the request gets sent, so we can start eager sending... */
130146 ret = ompi_osc_pt2pt_frag_flush_target (module , target );
147+ if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
148+ ompi_osc_pt2pt_peer_set_locked (peer , true);
149+ }
131150
132151 return ret ;
133152}
@@ -140,6 +159,8 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
140159 ompi_osc_pt2pt_header_unlock_t unlock_req ;
141160 int ret ;
142161
162+ (void ) OPAL_THREAD_ADD32 (& lock -> sync_expected , 1 );
163+
143164 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
144165
145166 unlock_req .base .type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ ;
@@ -169,6 +190,9 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
169190 return ret ;
170191 }
171192
193+ ompi_osc_pt2pt_peer_set_locked (peer , false);
194+ ompi_osc_pt2pt_peer_set_eager_active (peer , false);
195+
172196 return ompi_osc_pt2pt_frag_flush_target (module , target );
173197}
174198
@@ -179,6 +203,8 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
179203 int32_t frag_count = opal_atomic_swap_32 ((int32_t * ) module -> epoch_outgoing_frag_count + target , -1 );
180204 int ret ;
181205
206+ (void ) OPAL_THREAD_ADD32 (& lock -> sync_expected , 1 );
207+
182208 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
183209
184210 flush_req .base .type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ ;
@@ -218,8 +244,6 @@ static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module
218244 assert (lock -> type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK );
219245
220246 if (0 == (assert & MPI_MODE_NOCHECK )) {
221- lock -> sync_expected = (-1 == target ) ? ompi_comm_size (module -> comm ) : 1 ;
222-
223247 if (my_rank != target && target != -1 ) {
224248 ret = ompi_osc_pt2pt_lock_remote (module , target , lock );
225249 } else {
@@ -231,19 +255,7 @@ static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module
231255 return ret ;
232256 }
233257
234- if (-1 == target ) {
235- for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
236- if (my_rank == i ) {
237- continue ;
238- }
239-
240- ret = ompi_osc_pt2pt_lock_remote (module , i , lock );
241- if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
242- return ret ;
243- }
244- }
245-
246- }
258+ /* for lock_all there is nothing more to do. we will lock peer's on demand */
247259 } else {
248260 lock -> eager_send_active = true;
249261 }
@@ -312,7 +324,7 @@ static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert,
312324 lock -> sync .lock .target = target ;
313325 lock -> sync .lock .type = lock_type ;
314326 lock -> sync .lock .assert = assert ;
315-
327+ lock -> num_peers = ( -1 == target ) ? ompi_comm_size ( & module -> comm ) : 1 ;
316328 lock -> sync_expected = 0 ;
317329
318330 /* delay all eager sends until we've heard back.. */
@@ -376,13 +388,13 @@ static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win)
376388 "ompi_osc_pt2pt_unlock_internal: all lock acks received" ));
377389
378390 if (!(lock -> sync .lock .assert & MPI_MODE_NOCHECK )) {
379- lock -> sync_expected = (-1 == target ) ? ompi_comm_size (module -> comm ) : 1 ;
380-
381391 if (my_rank != target ) {
382392 if (-1 == target ) {
383393 /* send unlock messages to all of my peers */
384394 for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
385- if (my_rank == i ) {
395+ ompi_osc_pt2pt_peer_t * peer = ompi_osc_pt2pt_peer_lookup (module , i );
396+
397+ if (my_rank == i || !ompi_osc_pt2pt_peer_locked (peer )) {
386398 continue ;
387399 }
388400
@@ -469,12 +481,6 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_
469481 able to eager send before we can transfer all the data... */
470482 ompi_osc_pt2pt_sync_wait_expected (lock );
471483
472- if (-1 == target ) {
473- lock -> sync_expected = ompi_comm_size (module -> comm ) - 1 ;
474- } else {
475- lock -> sync_expected = 1 ;
476- }
477-
478484 if (-1 == target ) {
479485 /* NTH: no local flush */
480486 for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
@@ -488,7 +494,6 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_
488494 }
489495 }
490496 } else {
491-
492497 /* send control message with flush request and count */
493498 ret = ompi_osc_pt2pt_flush_remote (module , target , lock );
494499 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
@@ -787,6 +792,7 @@ int ompi_osc_pt2pt_process_lock (ompi_osc_pt2pt_module_t* module, int source,
787792void ompi_osc_pt2pt_process_lock_ack (ompi_osc_pt2pt_module_t * module ,
788793 ompi_osc_pt2pt_header_lock_ack_t * lock_ack_header )
789794{
795+ ompi_osc_pt2pt_peer_t * peer = ompi_osc_pt2pt_peer_lookup (module , lock_ack_header -> source );
790796 ompi_osc_pt2pt_sync_t * lock ;
791797
792798 OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
@@ -796,6 +802,8 @@ void ompi_osc_pt2pt_process_lock_ack (ompi_osc_pt2pt_module_t *module,
796802 lock = (ompi_osc_pt2pt_sync_t * ) (uintptr_t ) lock_ack_header -> lock_ptr ;
797803 assert (NULL != lock );
798804
805+ ompi_osc_pt2pt_peer_set_eager_active (peer , true);
806+
799807 ompi_osc_pt2pt_sync_expected (lock );
800808}
801809
0 commit comments