@@ -168,7 +168,6 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
168
168
MPI_SUM , module -> comm ,
169
169
module -> comm -> c_coll .coll_reduce_scatter_block_module );
170
170
if (OMPI_SUCCESS != ret ) {
171
- OPAL_THREAD_UNLOCK (& module -> lock );
172
171
return ret ;
173
172
}
174
173
@@ -181,11 +180,10 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
181
180
incoming_reqs ));
182
181
183
182
/* set our complete condition for incoming requests */
184
- module -> active_incoming_frag_signal_count += incoming_reqs ;
183
+ OPAL_THREAD_ADD32 ( & module -> active_incoming_frag_count , - incoming_reqs ) ;
185
184
186
185
/* wait for completion */
187
- while (module -> outgoing_frag_count != module -> outgoing_frag_signal_count ||
188
- module -> active_incoming_frag_count < module -> active_incoming_frag_signal_count ) {
186
+ while (module -> outgoing_frag_count < 0 || module -> active_incoming_frag_count < 0 ) {
189
187
opal_condition_wait (& module -> cond , & module -> lock );
190
188
}
191
189
@@ -196,10 +194,10 @@ int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
196
194
}
197
195
198
196
module -> all_sync .epoch_active = false;
199
-
200
- opal_condition_broadcast (& module -> cond );
201
197
OPAL_THREAD_UNLOCK (& module -> lock );
202
198
199
+ module -> comm -> c_coll .coll_barrier (module -> comm , module -> comm -> c_coll .coll_barrier_module );
200
+
203
201
OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
204
202
"osc pt2pt: fence end: %d" , ret ));
205
203
@@ -212,11 +210,11 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
212
210
ompi_osc_pt2pt_module_t * module = GET_MODULE (win );
213
211
ompi_osc_pt2pt_sync_t * sync = & module -> all_sync ;
214
212
215
- OPAL_THREAD_LOCK (& module -> lock );
213
+ OPAL_THREAD_LOCK (& sync -> lock );
216
214
217
215
/* check if we are already in an access epoch */
218
216
if (ompi_osc_pt2pt_access_epoch_active (module )) {
219
- OPAL_THREAD_UNLOCK (& module -> lock );
217
+ OPAL_THREAD_UNLOCK (& sync -> lock );
220
218
return OMPI_ERR_RMA_SYNC ;
221
219
}
222
220
@@ -249,7 +247,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
249
247
if (0 == ompi_group_size (group )) {
250
248
/* nothing more to do. this is an empty start epoch */
251
249
sync -> eager_send_active = true;
252
- OPAL_THREAD_UNLOCK (& module -> lock );
250
+ OPAL_THREAD_UNLOCK (& sync -> lock );
253
251
return OMPI_SUCCESS ;
254
252
}
255
253
@@ -258,12 +256,11 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
258
256
/* translate the group ranks into the communicator */
259
257
sync -> peer_list .peers = ompi_osc_pt2pt_get_peers (module , group );
260
258
if (NULL == sync -> peer_list .peers ) {
261
- OPAL_THREAD_UNLOCK (& module -> lock );
259
+ OPAL_THREAD_UNLOCK (& sync -> lock );
262
260
return OMPI_ERR_OUT_OF_RESOURCE ;
263
261
}
264
262
265
263
if (!(assert & MPI_MODE_NOCHECK )) {
266
- OPAL_THREAD_LOCK (& sync -> lock );
267
264
for (int i = 0 ; i < sync -> num_peers ; ++ i ) {
268
265
ompi_osc_pt2pt_peer_t * peer = sync -> peer_list .peers [i ];
269
266
@@ -276,7 +273,6 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
276
273
ompi_osc_pt2pt_peer_set_unex (peer , false);
277
274
}
278
275
}
279
- OPAL_THREAD_UNLOCK (& sync -> lock );
280
276
} else {
281
277
sync -> sync_expected = 0 ;
282
278
}
@@ -295,7 +291,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
295
291
"ompi_osc_pt2pt_start complete. eager sends active: %d" ,
296
292
sync -> eager_send_active ));
297
293
298
- OPAL_THREAD_UNLOCK (& module -> lock );
294
+ OPAL_THREAD_UNLOCK (& sync -> lock );
299
295
return OMPI_SUCCESS ;
300
296
}
301
297
@@ -313,14 +309,14 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
313
309
OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
314
310
"ompi_osc_pt2pt_complete entering..." ));
315
311
316
- OPAL_THREAD_LOCK (& module -> lock );
312
+ OPAL_THREAD_LOCK (& sync -> lock );
317
313
if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != sync -> type ) {
318
- OPAL_THREAD_UNLOCK (& module -> lock );
314
+ OPAL_THREAD_UNLOCK (& sync -> lock );
319
315
return OMPI_ERR_RMA_SYNC ;
320
316
}
321
317
322
318
/* wait for all the post messages */
323
- ompi_osc_pt2pt_sync_wait (sync );
319
+ ompi_osc_pt2pt_sync_wait_nolock (sync );
324
320
325
321
/* phase 1 cleanup sync object */
326
322
group = sync -> sync .pscw .group ;
@@ -330,8 +326,7 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
330
326
331
327
/* need to reset the sync here to avoid processing incorrect post messages */
332
328
ompi_osc_pt2pt_sync_reset (sync );
333
-
334
- OPAL_THREAD_UNLOCK (& module -> lock );
329
+ OPAL_THREAD_UNLOCK (& sync -> lock );
335
330
336
331
OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
337
332
"ompi_osc_pt2pt_complete all posts received. sending complete messages..." ));
@@ -403,7 +398,7 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
403
398
OPAL_THREAD_LOCK (& module -> lock );
404
399
/* wait for outgoing requests to complete. Don't wait for incoming, as
405
400
we're only completing the access epoch, not the exposure epoch */
406
- while (module -> outgoing_frag_count != module -> outgoing_frag_signal_count ) {
401
+ while (module -> outgoing_frag_count < 0 ) {
407
402
opal_condition_wait (& module -> cond , & module -> lock );
408
403
}
409
404
@@ -513,15 +508,13 @@ int ompi_osc_pt2pt_wait (ompi_win_t *win)
513
508
}
514
509
515
510
OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output ,
516
- "ompi_osc_pt2pt_wait entering..." ));
511
+ "ompi_osc_pt2pt_wait entering... module %p" , ( void * ) module ));
517
512
518
513
OPAL_THREAD_LOCK (& module -> lock );
519
- while (0 != module -> num_complete_msgs ||
520
- module -> active_incoming_frag_count != module -> active_incoming_frag_signal_count ) {
521
- OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output , "num_complete_msgs = %d, "
522
- "active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d" ,
523
- module -> num_complete_msgs , module -> active_incoming_frag_count ,
524
- module -> active_incoming_frag_signal_count ));
514
+ while (0 != module -> num_complete_msgs || module -> active_incoming_frag_count < 0 ) {
515
+ OPAL_OUTPUT_VERBOSE ((25 , ompi_osc_base_framework .framework_output , "module %p, num_complete_msgs = %d, "
516
+ "active_incoming_frag_count = %d" , (void * ) module , module -> num_complete_msgs ,
517
+ module -> active_incoming_frag_count ));
525
518
opal_condition_wait (& module -> cond , & module -> lock );
526
519
}
527
520
@@ -554,21 +547,15 @@ int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag)
554
547
555
548
OPAL_THREAD_LOCK (& (module -> lock ));
556
549
557
- if (0 != module -> num_complete_msgs ||
558
- module -> active_incoming_frag_count != module -> active_incoming_frag_signal_count ) {
550
+ if (0 != module -> num_complete_msgs || module -> active_incoming_frag_count < 0 ) {
559
551
* flag = 0 ;
560
- ret = OMPI_SUCCESS ;
561
552
} else {
562
553
* flag = 1 ;
563
554
564
555
group = module -> pw_group ;
565
556
module -> pw_group = NULL ;
566
557
567
- OPAL_THREAD_UNLOCK (& (module -> lock ));
568
-
569
558
OBJ_RELEASE (group );
570
-
571
- return OMPI_SUCCESS ;
572
559
}
573
560
574
561
OPAL_THREAD_UNLOCK (& (module -> lock ));
@@ -580,15 +567,19 @@ void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, i
580
567
{
581
568
OPAL_OUTPUT_VERBOSE ((50 , ompi_osc_base_framework .framework_output ,
582
569
"osc pt2pt: process_complete got complete message from %d. expected fragment count %d. "
583
- "current signal count %d. current incomming count: %d. expected complete msgs: %d" ,
584
- source , frag_count , module -> active_incoming_frag_signal_count ,
585
- module -> active_incoming_frag_count , module -> num_complete_msgs ));
570
+ "current incomming count: %d. expected complete msgs: %d" , source ,
571
+ frag_count , module -> active_incoming_frag_count , module -> num_complete_msgs ));
586
572
587
573
/* the current fragment is not part of the frag_count so we need to add it here */
588
- OPAL_THREAD_ADD32 (( int32_t * ) & module -> active_incoming_frag_signal_count , frag_count );
574
+ OPAL_THREAD_ADD32 (& module -> active_incoming_frag_count , - frag_count );
589
575
590
- if (0 == OPAL_THREAD_ADD32 ((int32_t * ) & module -> num_complete_msgs , 1 )) {
576
+ /* make sure the signal count is written before changing the complete message count */
577
+ opal_atomic_wmb ();
578
+
579
+ if (0 == OPAL_THREAD_ADD32 (& module -> num_complete_msgs , 1 )) {
580
+ OPAL_THREAD_LOCK (& module -> lock );
591
581
opal_condition_broadcast (& module -> cond );
582
+ OPAL_THREAD_UNLOCK (& module -> lock );
592
583
}
593
584
}
594
585
0 commit comments