@@ -303,6 +303,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
303303 ompi_request_t * subreq ;
304304 bool flag ;
305305 int ret ;
306+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
306307
307308 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
308309 return ompi_comm_request_schedule_append (request , ompi_comm_allreduce_getnextcid , NULL , 0 );
@@ -318,81 +319,116 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
318319 /**
319320 * This is the real algorithm described in the doc
320321 */
321- flag = false;
322- context -> nextlocal_cid = mca_pml .pml_max_contextid ;
323- for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
324- flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
325- context -> comm );
326- if (true == flag ) {
327- context -> nextlocal_cid = i ;
328- break ;
322+ if ( participate ){
323+ flag = false;
324+ context -> nextlocal_cid = mca_pml .pml_max_contextid ;
325+ for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
326+ flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
327+ context -> comm );
328+ if (true == flag ) {
329+ context -> nextlocal_cid = i ;
330+ break ;
331+ }
329332 }
333+ } else {
334+ context -> nextlocal_cid = 0 ;
330335 }
331336
332337 ret = context -> allreduce_fn (& context -> nextlocal_cid , & context -> nextcid , 1 , MPI_MAX ,
333338 context , & subreq );
339+ /* there was a failure during non-blocking collective
340+ * all we can do is abort
341+ */
334342 if (OMPI_SUCCESS != ret ) {
335- ompi_comm_cid_lowest_id = INT64_MAX ;
336- OPAL_THREAD_UNLOCK (& ompi_cid_lock );
337- return ret ;
343+ goto err_exit ;
338344 }
339345
340- if ((unsigned int ) context -> nextlocal_cid == mca_pml .pml_max_contextid ) {
341- /* at least one peer ran out of CIDs */
342- if (flag ) {
343- opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
344- }
345-
346- ompi_comm_cid_lowest_id = INT64_MAX ;
347- OPAL_THREAD_UNLOCK (& ompi_cid_lock );
348- return OMPI_ERR_OUT_OF_RESOURCE ;
346+ if ( ((unsigned int ) context -> nextlocal_cid == mca_pml .pml_max_contextid ) ) {
347+ /* Our local CID space is out, others already aware (allreduce above) */
348+ ret = OMPI_ERR_OUT_OF_RESOURCE ;
349+ goto err_exit ;
349350 }
350351 OPAL_THREAD_UNLOCK (& ompi_cid_lock );
351352
352353 /* next we want to verify that the resulting commid is ok */
353354 return ompi_comm_request_schedule_append (request , ompi_comm_checkcid , & subreq , 1 );
355+ err_exit :
356+ if (participate && flag ) {
357+ opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
358+ }
359+ ompi_comm_cid_lowest_id = INT64_MAX ;
360+ OPAL_THREAD_UNLOCK (& ompi_cid_lock );
361+ return ret ;
362+
354363}
355364
356365static int ompi_comm_checkcid (ompi_comm_request_t * request )
357366{
358367 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
359368 ompi_request_t * subreq ;
360369 int ret ;
370+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
361371
362372 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
363373 return ompi_comm_request_schedule_append (request , ompi_comm_checkcid , NULL , 0 );
364374 }
365375
366- context -> flag = (context -> nextcid == context -> nextlocal_cid );
367-
368- if (!context -> flag ) {
369- opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
376+ if ( !participate ){
377+ context -> flag = 1 ;
378+ } else {
379+ context -> flag = (context -> nextcid == context -> nextlocal_cid );
380+ if ( participate && !context -> flag ) {
381+ opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
370382
371- context -> flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators ,
372- context -> nextcid , context -> comm );
383+ context -> flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators ,
384+ context -> nextcid , context -> comm );
385+ }
373386 }
374387
375388 ++ context -> iter ;
376389
377390 ret = context -> allreduce_fn (& context -> flag , & context -> rflag , 1 , MPI_MIN , context , & subreq );
378391 if (OMPI_SUCCESS == ret ) {
379392 ompi_comm_request_schedule_append (request , ompi_comm_nextcid_check_flag , & subreq , 1 );
393+ } else {
394+ if (participate && context -> flag ) {
395+ opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , context -> nextlocal_cid , NULL );
396+ }
397+ ompi_comm_cid_lowest_id = INT64_MAX ;
380398 }
381399
382400 OPAL_THREAD_UNLOCK (& ompi_cid_lock );
383-
384401 return ret ;
385402}
386403
387404static int ompi_comm_nextcid_check_flag (ompi_comm_request_t * request )
388405{
389406 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
407+ int participate = (context -> newcomm -> c_local_group -> grp_my_rank != MPI_UNDEFINED );
390408
391409 if (OPAL_THREAD_TRYLOCK (& ompi_cid_lock )) {
392410 return ompi_comm_request_schedule_append (request , ompi_comm_nextcid_check_flag , NULL , 0 );
393411 }
394412
395413 if (1 == context -> rflag ) {
414+ if ( !participate ) {
415+ /* we need to provide something sane here
416+ * but we cannot use `nextcid` as we may have it
417+ * in-use, go ahead with next locally-available CID
418+ */
419+ context -> nextlocal_cid = mca_pml .pml_max_contextid ;
420+ for (unsigned int i = context -> start ; i < mca_pml .pml_max_contextid ; ++ i ) {
421+ bool flag ;
422+ flag = opal_pointer_array_test_and_set_item (& ompi_mpi_communicators , i ,
423+ context -> comm );
424+ if (true == flag ) {
425+ context -> nextlocal_cid = i ;
426+ break ;
427+ }
428+ }
429+ context -> nextcid = context -> nextlocal_cid ;
430+ }
431+
396432 /* set the according values to the newcomm */
397433 context -> newcomm -> c_contextid = context -> nextcid ;
398434 opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextcid , context -> newcomm );
@@ -405,7 +441,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request)
405441 return OMPI_SUCCESS ;
406442 }
407443
408- if (1 == context -> flag ) {
444+ if (participate && ( 1 == context -> flag ) ) {
409445 /* we could use this cid, but other don't agree */
410446 opal_pointer_array_set_item (& ompi_mpi_communicators , context -> nextcid , NULL );
411447 context -> start = context -> nextcid + 1 ; /* that's where we can start the next round */
0 commit comments