@@ -303,6 +303,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
303303 ompi_request_t * subreq ;
304304 bool flag ;
305305 int ret ;
306+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
306307
307308 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
308309 return ompi_comm_request_schedule_append (request , ompi_comm_allreduce_getnextcid , NULL , 0 );
@@ -318,54 +319,63 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
318319 /**
319320 * This is the real algorithm described in the doc
320321 */
321- flag = false;
322- context -> nextlocal_cid = mca_pml .pml_max_contextid ;
323- for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
324- flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
325- context -> comm );
326- if (true == flag ) {
327- context -> nextlocal_cid = i ;
328- break ;
322+ if ( participate ){
323+ flag = false;
324+ context -> nextlocal_cid = mca_pml .pml_max_contextid ;
325+ for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
326+ flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
327+ context -> comm );
328+ if (true == flag ) {
329+ context -> nextlocal_cid = i ;
330+ break ;
331+ }
329332 }
333+ } else {
334+ context -> nextlocal_cid = 0 ;
330335 }
331336
332337 ret = context -> allreduce_fn (& context -> nextlocal_cid , & context -> nextcid , 1 , MPI_MAX ,
333338 context , & subreq );
339+ /* there was a failure during non-blocking collective
340+ * all we can do is abort
341+ */
334342 if (OMPI_SUCCESS != ret ) {
335- ompi_comm_cid_lowest_id = INT64_MAX ;
336- OPAL_THREAD_UNLOCK (& ompi_cid_lock );
337- return ret ;
343+ goto err_exit ;
338344 }
339345
340- if ((unsigned int ) context -> nextlocal_cid == mca_pml .pml_max_contextid ) {
341- /* at least one peer ran out of CIDs */
342- if (flag ) {
343- opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
344- }
345-
346- ompi_comm_cid_lowest_id = INT64_MAX ;
347- OPAL_THREAD_UNLOCK (& ompi_cid_lock );
348- return OMPI_ERR_OUT_OF_RESOURCE ;
346+ if ( ((unsigned int ) context -> nextlocal_cid == mca_pml .pml_max_contextid ) ) {
347+ /* Our local CID space is out, others already aware (allreduce above) */
348+ ret = OMPI_ERR_OUT_OF_RESOURCE ;
349+ goto err_exit ;
349350 }
350351 OPAL_THREAD_UNLOCK (& ompi_cid_lock );
351352
352353 /* next we want to verify that the resulting commid is ok */
353354 return ompi_comm_request_schedule_append (request , ompi_comm_checkcid , & subreq , 1 );
355+ err_exit :
356+ if (participate && flag ) {
357+ opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
358+ }
359+ ompi_comm_cid_lowest_id = INT64_MAX ;
360+ OPAL_THREAD_UNLOCK (& ompi_cid_lock );
361+ return ret ;
362+
354363}
355364
356365static int ompi_comm_checkcid (ompi_comm_request_t * request )
357366{
358367 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
359368 ompi_request_t * subreq ;
360369 int ret ;
370+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
361371
362372 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
363373 return ompi_comm_request_schedule_append (request , ompi_comm_checkcid , NULL , 0 );
364374 }
365375
366376 context -> flag = (context -> nextcid == context -> nextlocal_cid );
367377
368- if (!context -> flag ) {
378+ if ( participate && !context -> flag ) {
369379 opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
370380
371381 context -> flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators ,
@@ -377,22 +387,45 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request)
377387 ret = context -> allreduce_fn (& context -> flag , & context -> rflag , 1 , MPI_MIN , context , & subreq );
378388 if (OMPI_SUCCESS == ret ) {
379389 ompi_comm_request_schedule_append (request , ompi_comm_nextcid_check_flag , & subreq , 1 );
390+ } else {
391+ if (participate && context -> flag ) {
392+ opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
393+ }
394+ ompi_comm_cid_lowest_id = INT64_MAX ;
380395 }
381396
382397 OPAL_THREAD_UNLOCK (& ompi_cid_lock );
383-
384398 return ret ;
385399}
386400
387401static int ompi_comm_nextcid_check_flag (ompi_comm_request_t * request )
388402{
389403 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
404+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
390405
391406 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
392407 return ompi_comm_request_schedule_append (request , ompi_comm_nextcid_check_flag , NULL , 0 );
393408 }
394409
395410 if (1 == context -> rflag ) {
411+ if ( !participate ) {
412+ /* we need to provide something sane here
413+ * but we cannot use `nextcid` as we may have it
414+ * in-use, go ahead with next locally-available CID
415+ */
416+ context -> nextlocal_cid = mca_pml .pml_max_contextid ;
417+ for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
418+ bool flag ;
419+ flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
420+ context -> comm );
421+ if (true == flag ) {
422+ context -> nextlocal_cid = i ;
423+ break ;
424+ }
425+ }
426+ context -> nextcid = context -> nextlocal_cid ;
427+ }
428+
396429 /* set the according values to the newcomm */
397430 context -> newcomm -> c_contextid = context -> nextcid ;
398431 opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextcid , context -> newcomm );
@@ -405,7 +438,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request)
405438 return OMPI_SUCCESS ;
406439 }
407440
408- if (1 == context -> flag ) {
441+ if (participate && ( 1 == context -> flag ) ) {
409442 /* we could use this cid, but other don't agree */
410443 opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextcid , NULL );
411444 context -> start = context -> nextcid + 1 ; /* that's where we can start the next round */
0 commit comments