@@ -168,7 +168,6 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
168168 MPI_SUM , module -> comm ,
169169 module -> comm -> c_coll .coll_reduce_scatter_block_module );
170170 if (OMPI_SUCCESS != ret ) {
171- OPAL_THREAD_UNLOCK (& module -> lock );
172171 return ret ;
173172 }
174173
@@ -181,11 +180,10 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
181180 incoming_reqs ));
182181
183182 /* set our complete condition for incoming requests */
184- module -> active_incoming_frag_signal_count += incoming_reqs ;
183+ OPAL_THREAD_ADD32 ( & module -> active_incoming_frag_count , - incoming_reqs ) ;
185184
186185 /* wait for completion */
187- while (module -> outgoing_frag_count != module -> outgoing_frag_signal_count ||
188- module -> active_incoming_frag_count < module -> active_incoming_frag_signal_count ) {
186+ while (module -> outgoing_frag_count < 0 || module -> active_incoming_frag_count < 0 ) {
189187 opal_condition_wait (& module -> cond , & module -> lock );
190188 }
191189
@@ -196,10 +194,10 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
196194 }
197195
198196 module -> all_sync .epoch_active = false;
199-
200- opal_condition_broadcast (& module -> cond );
201197 OPAL_THREAD_UNLOCK (& module -> lock );
202198
199+ module -> comm -> c_coll .coll_barrier (module -> comm , module -> comm -> c_coll .coll_barrier_module );
200+
203201 OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
204202 "osc pt2pt: fence end: %d" , ret ));
205203
@@ -212,11 +210,11 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
212210 ompi_osc_pt2pt_module_t * module = GET_MODULE (win );
213211 ompi_osc_pt2pt_sync_t * sync = & module -> all_sync ;
214212
215- OPAL_THREAD_LOCK (& module -> lock );
213+ OPAL_THREAD_LOCK (& sync -> lock );
216214
217215 /* check if we are already in an access epoch */
218216 if (ompi_osc_pt2pt_access_epoch_active (module )) {
219- OPAL_THREAD_UNLOCK (& module -> lock );
217+ OPAL_THREAD_UNLOCK (& sync -> lock );
220218 return OMPI_ERR_RMA_SYNC ;
221219 }
222220
@@ -249,7 +247,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
249247 if (0 == ompi_group_size (group )) {
250248 /* nothing more to do. this is an empty start epoch */
251249 sync -> eager_send_active = true;
252- OPAL_THREAD_UNLOCK (& module -> lock );
250+ OPAL_THREAD_UNLOCK (& sync -> lock );
253251 return OMPI_SUCCESS ;
254252 }
255253
@@ -258,12 +256,11 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
258256 /* translate the group ranks into the communicator */
259257 sync -> peer_list .peers = ompi_osc_pt2pt_get_peers (module , group );
260258 if (NULL == sync -> peer_list .peers ) {
261- OPAL_THREAD_UNLOCK (& module -> lock );
259+ OPAL_THREAD_UNLOCK (& sync -> lock );
262260 return OMPI_ERR_OUT_OF_RESOURCE ;
263261 }
264262
265263 if (!(assert & MPI_MODE_NOCHECK )) {
266- OPAL_THREAD_LOCK (& sync -> lock );
267264 for (int i = 0 ; i < sync -> num_peers ; ++ i ) {
268265 ompi_osc_pt2pt_peer_t * peer = sync -> peer_list .peers [i ];
269266
@@ -276,7 +273,6 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
276273 ompi_osc_pt2pt_peer_set_unex (peer , false);
277274 }
278275 }
279- OPAL_THREAD_UNLOCK (& sync -> lock );
280276 } else {
281277 sync -> sync_expected = 0 ;
282278 }
@@ -295,7 +291,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
295291 "ompi_osc_pt2pt_start complete. eager sends active: %d" ,
296292 sync -> eager_send_active ));
297293
298- OPAL_THREAD_UNLOCK (& module -> lock );
294+ OPAL_THREAD_UNLOCK (& sync -> lock );
299295 return OMPI_SUCCESS ;
300296}
301297
@@ -313,14 +309,14 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
313309 OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
314310 "ompi_osc_pt2pt_complete entering..." ));
315311
316- OPAL_THREAD_LOCK (& module -> lock );
312+ OPAL_THREAD_LOCK (& sync -> lock );
317313 if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != sync -> type ) {
318- OPAL_THREAD_UNLOCK (& module -> lock );
314+ OPAL_THREAD_UNLOCK (& sync -> lock );
319315 return OMPI_ERR_RMA_SYNC ;
320316 }
321317
322318 /* wait for all the post messages */
323- ompi_osc_pt2pt_sync_wait (sync );
319+ ompi_osc_pt2pt_sync_wait_nolock (sync );
324320
325321 /* phase 1 cleanup sync object */
326322 group = sync -> sync .pscw .group ;
@@ -330,8 +326,7 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
330326
331327 /* need to reset the sync here to avoid processing incorrect post messages */
332328 ompi_osc_pt2pt_sync_reset (sync );
333-
334- OPAL_THREAD_UNLOCK (& module -> lock );
329+ OPAL_THREAD_UNLOCK (& sync -> lock );
335330
336331 OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
337332 "ompi_osc_pt2pt_complete all posts received. sending complete messages..." ));
@@ -403,7 +398,7 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
403398 OPAL_THREAD_LOCK (& module -> lock );
404399 /* wait for outgoing requests to complete. Don't wait for incoming, as
405400 we're only completing the access epoch, not the exposure epoch */
406- while (module -> outgoing_frag_count != module -> outgoing_frag_signal_count ) {
401+ while (module -> outgoing_frag_count < 0 ) {
407402 opal_condition_wait (& module -> cond , & module -> lock );
408403 }
409404
@@ -513,15 +508,13 @@ int ompi_osc_pt2pt_wait (ompi_win_t *win)
513508 }
514509
515510 OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
516- "ompi_osc_pt2pt_wait entering..." ));
511+ "ompi_osc_pt2pt_wait entering... module %p" , ( void * ) module ));
517512
518513 OPAL_THREAD_LOCK (& module -> lock );
519- while (0 != module -> num_complete_msgs ||
520- module -> active_incoming_frag_count != module -> active_incoming_frag_signal_count ) {
521- OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output , "num_complete_msgs = %d, "
522- "active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d" ,
523- module -> num_complete_msgs , module -> active_incoming_frag_count ,
524- module -> active_incoming_frag_signal_count ));
514+ while (0 != module -> num_complete_msgs || module -> active_incoming_frag_count < 0 ) {
515+ OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output , "module %p, num_complete_msgs = %d, "
516+ "active_incoming_frag_count = %d" , (void * ) module , module -> num_complete_msgs ,
517+ module -> active_incoming_frag_count ));
525518 opal_condition_wait (& module -> cond , & module -> lock );
526519 }
527520
@@ -554,21 +547,15 @@ int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag)
554547
555548 OPAL_THREAD_LOCK (& (module -> lock ));
556549
557- if (0 != module -> num_complete_msgs ||
558- module -> active_incoming_frag_count != module -> active_incoming_frag_signal_count ) {
550+ if (0 != module -> num_complete_msgs || module -> active_incoming_frag_count < 0 ) {
559551 * flag = 0 ;
560- ret = OMPI_SUCCESS ;
561552 } else {
562553 * flag = 1 ;
563554
564555 group = module -> pw_group ;
565556 module -> pw_group = NULL ;
566557
567- OPAL_THREAD_UNLOCK (& (module -> lock ));
568-
569558 OBJ_RELEASE (group );
570-
571- return OMPI_SUCCESS ;
572559 }
573560
574561 OPAL_THREAD_UNLOCK (& (module -> lock ));
@@ -580,15 +567,19 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i
580567{
581568 OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
582569 "osc pt2pt: process_complete got complete message from %d. expected fragment count %d. "
583- "current signal count %d. current incomming count: %d. expected complete msgs: %d" ,
584- source , frag_count , module -> active_incoming_frag_signal_count ,
585- module -> active_incoming_frag_count , module -> num_complete_msgs ));
570+ "current incomming count: %d. expected complete msgs: %d" , source ,
571+ frag_count , module -> active_incoming_frag_count , module -> num_complete_msgs ));
586572
587573 /* the current fragment is not part of the frag_count so we need to add it here */
588- OPAL_THREAD_ADD32 (( int32_t * ) & module -> active_incoming_frag_signal_count , frag_count );
574+ OPAL_THREAD_ADD32 (& module -> active_incoming_frag_count , - frag_count );
589575
590- if (0 == OPAL_THREAD_ADD32 ((int32_t * ) & module -> num_complete_msgs , 1 )) {
576+ /* make sure the signal count is written before changing the complete message count */
577+ opal_atomic_wmb ();
578+
579+ if (0 == OPAL_THREAD_ADD32 (& module -> num_complete_msgs , 1 )) {
580+ OPAL_THREAD_LOCK (& module -> lock );
591581 opal_condition_broadcast (& module -> cond );
582+ OPAL_THREAD_UNLOCK (& module -> lock );
592583 }
593584}
594585
0 commit comments