@@ -524,6 +524,19 @@ struct _local_data {
524524 size_t size ;
525525};
526526
527+ static int synchronize_errorcode (int errorcode , ompi_communicator_t * comm )
528+ {
529+ int ret ;
530+ int err = errorcode ;
531+ /* This assumes that error codes are negative integers */
532+ ret = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & err , 1 , MPI_INT , MPI_MIN ,
533+ comm , comm -> c_coll -> coll_allreduce_module );
534+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
535+ err = ret ;
536+ }
537+ return err ;
538+ }
539+
527540static int allocate_state_shared (ompi_osc_rdma_module_t * module , void * * base , size_t size )
528541{
529542 ompi_communicator_t * shared_comm ;
@@ -595,28 +608,35 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
595608 OMPI_PROC_MY_NAME -> jobid , ompi_comm_get_cid (module -> comm ));
596609 if (0 > ret ) {
597610 ret = OMPI_ERR_OUT_OF_RESOURCE ;
598- break ;
611+ } else {
612+ /* allocate enough space for the state + data for all local ranks */
613+ ret = opal_shmem_segment_create (& module -> seg_ds , data_file , total_size );
614+ free (data_file );
615+ if (OPAL_SUCCESS != ret ) {
616+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to create shared memory segment" );
617+ }
599618 }
619+ }
600620
601- /* allocate enough space for the state + data for all local ranks */
602- ret = opal_shmem_segment_create (& module -> seg_ds , data_file , total_size );
603- free (data_file );
604- if (OPAL_SUCCESS != ret ) {
605- OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to create shared memory segment" );
606- break ;
607- }
621+ ret = synchronize_errorcode (ret , shared_comm );
622+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
623+ break ;
608624 }
609625
610- ret = module -> comm -> c_coll -> coll_bcast (& module -> seg_ds , sizeof (module -> seg_ds ), MPI_BYTE , 0 ,
626+ ret = shared_comm -> c_coll -> coll_bcast (& module -> seg_ds , sizeof (module -> seg_ds ), MPI_BYTE , 0 ,
611627 shared_comm , shared_comm -> c_coll -> coll_bcast_module );
612- if (OMPI_SUCCESS != ret ) {
628+ if (OPAL_UNLIKELY ( OMPI_SUCCESS != ret ) ) {
613629 break ;
614630 }
615631
616632 module -> segment_base = opal_shmem_segment_attach (& module -> seg_ds );
617633 if (NULL == module -> segment_base ) {
618634 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to attach to the shared memory segment" );
619635 ret = OPAL_ERROR ;
636+ }
637+
638+ ret = synchronize_errorcode (ret , shared_comm );
639+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
620640 break ;
621641 }
622642
@@ -636,35 +656,28 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
636656 memset (module -> state , 0 , module -> state_size );
637657
638658 if (0 == local_rank ) {
659+ /* unlink the shared memory backing file */
660+ opal_shmem_unlink (& module -> seg_ds );
639661 /* just go ahead and register the whole segment */
640662 ret = ompi_osc_rdma_register (module , MCA_BTL_ENDPOINT_ANY , module -> segment_base , total_size , MCA_BTL_REG_FLAG_ACCESS_ANY ,
641663 & module -> state_handle );
642- if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
643- break ;
664+ if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
665+ state_region -> base = (intptr_t ) module -> segment_base ;
666+ if (module -> state_handle ) {
667+ memcpy (state_region -> btl_handle_data , module -> state_handle , module -> selected_btl -> btl_registration_handle_size );
668+ }
644669 }
670+ }
645671
646- state_region -> base = ( intptr_t ) module -> segment_base ;
647- if ( module -> state_handle ) {
648- memcpy ( state_region -> btl_handle_data , module -> state_handle , module -> selected_btl -> btl_registration_handle_size );
649- }
672+ /* synchronization to make sure memory is registered */
673+ ret = synchronize_errorcode ( ret , shared_comm );
674+ if ( OPAL_UNLIKELY ( OMPI_SUCCESS != ret )) {
675+ break ;
650676 }
651677
652678 if (MPI_WIN_FLAVOR_CREATE == module -> flavor ) {
653679 ret = ompi_osc_rdma_initialize_region (module , base , size );
654- if (OMPI_SUCCESS != ret ) {
655- break ;
656- }
657- }
658-
659- /* barrier to make sure all ranks have attached */
660- shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
661-
662- /* unlink the shared memory backing file */
663- if (0 == local_rank ) {
664- opal_shmem_unlink (& module -> seg_ds );
665- }
666-
667- if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
680+ } else if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
668681 ompi_osc_rdma_region_t * region = (ompi_osc_rdma_region_t * ) module -> state -> regions ;
669682 module -> state -> disp_unit = module -> disp_unit ;
670683 module -> state -> region_count = 1 ;
@@ -675,8 +688,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
675688 }
676689 }
677690
678- /* barrier to make sure all ranks have set up their region data */
679- shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
691+ /* synchronization to make sure all ranks have set up their region data */
692+ ret = synchronize_errorcode (ret , shared_comm );
693+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
694+ break ;
695+ }
680696
681697 offset = data_base ;
682698 for (int i = 0 ; i < local_size ; ++ i ) {
@@ -995,13 +1011,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
9951011 free (temp );
9961012 } while (0 );
9971013
998-
999- ret = module -> comm -> c_coll -> coll_allreduce (& ret , & global_result , 1 , MPI_INT , MPI_MIN , module -> comm ,
1000- module -> comm -> c_coll -> coll_allreduce_module );
1001-
1002- if (OMPI_SUCCESS != ret ) {
1003- global_result = ret ;
1004- }
1014+ global_result = synchronize_errorcode (ret , module -> comm );
10051015
10061016 /* none of these communicators are needed anymore so free them now*/
10071017 if (MPI_COMM_NULL != module -> local_leaders ) {
@@ -1236,6 +1246,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
12361246
12371247 /* fill in our part */
12381248 ret = allocate_state_shared (module , base , size );
1249+
1250+ /* notify all others if something went wrong */
1251+ ret = synchronize_errorcode (ret , module -> comm );
12391252 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
12401253 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to allocate internal state" );
12411254 ompi_osc_rdma_free (win );
0 commit comments