@@ -523,6 +523,19 @@ struct _local_data {
523523 size_t size ;
524524};
525525
526+ static int synchronize_errorcode (int errorcode , ompi_communicator_t * comm )
527+ {
528+ int ret ;
529+ int err = errorcode ;
530+ /* This assumes that error codes are negative integers */
531+ ret = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & err , 1 , MPI_INT , MPI_MIN ,
532+ comm , comm -> c_coll -> coll_allreduce_module );
533+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
534+ err = ret ;
535+ }
536+ return err ;
537+ }
538+
526539static int allocate_state_shared (ompi_osc_rdma_module_t * module , void * * base , size_t size )
527540{
528541 ompi_communicator_t * shared_comm ;
@@ -593,28 +606,35 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
593606 OMPI_PROC_MY_NAME -> jobid , ompi_comm_get_cid (module -> comm ));
594607 if (0 > ret ) {
595608 ret = OMPI_ERR_OUT_OF_RESOURCE ;
596- break ;
609+ } else {
610+ /* allocate enough space for the state + data for all local ranks */
611+ ret = opal_shmem_segment_create (& module -> seg_ds , data_file , total_size );
612+ free (data_file );
613+ if (OPAL_SUCCESS != ret ) {
614+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to create shared memory segment" );
615+ }
597616 }
617+ }
598618
599- /* allocate enough space for the state + data for all local ranks */
600- ret = opal_shmem_segment_create (& module -> seg_ds , data_file , total_size );
601- free (data_file );
602- if (OPAL_SUCCESS != ret ) {
603- OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to create shared memory segment" );
604- break ;
605- }
619+ ret = synchronize_errorcode (ret , shared_comm );
620+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
621+ break ;
606622 }
607623
608- ret = module -> comm -> c_coll -> coll_bcast (& module -> seg_ds , sizeof (module -> seg_ds ), MPI_BYTE , 0 ,
624+ ret = shared_comm -> c_coll -> coll_bcast (& module -> seg_ds , sizeof (module -> seg_ds ), MPI_BYTE , 0 ,
609625 shared_comm , shared_comm -> c_coll -> coll_bcast_module );
610- if (OMPI_SUCCESS != ret ) {
626+ if (OPAL_UNLIKELY ( OMPI_SUCCESS != ret ) ) {
611627 break ;
612628 }
613629
614630 module -> segment_base = opal_shmem_segment_attach (& module -> seg_ds );
615631 if (NULL == module -> segment_base ) {
616632 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to attach to the shared memory segment" );
617633 ret = OPAL_ERROR ;
634+ }
635+
636+ ret = synchronize_errorcode (ret , shared_comm );
637+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
618638 break ;
619639 }
620640
@@ -643,27 +663,23 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
643663 /* just go ahead and register the whole segment */
644664 ret = ompi_osc_rdma_register (module , MCA_BTL_ENDPOINT_ANY , module -> segment_base , total_size , MCA_BTL_REG_FLAG_ACCESS_ANY ,
645665 & module -> state_handle );
646- if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
647- break ;
648- }
649-
650- state_region -> base = (intptr_t ) module -> segment_base ;
651- if (module -> state_handle ) {
652- memcpy (state_region -> btl_handle_data , module -> state_handle , module -> selected_btl -> btl_registration_handle_size );
666+ if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
667+ state_region -> base = (intptr_t ) module -> segment_base ;
668+ if (module -> state_handle ) {
669+ memcpy (state_region -> btl_handle_data , module -> state_handle , module -> selected_btl -> btl_registration_handle_size );
670+ }
653671 }
654672 }
655673
656- /* barrier to make sure memory is registered */
657- shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
674+ /* synchronization to make sure memory is registered */
675+ ret = synchronize_errorcode (ret , shared_comm );
676+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
677+ break ;
678+ }
658679
659680 if (MPI_WIN_FLAVOR_CREATE == module -> flavor ) {
660681 ret = ompi_osc_rdma_initialize_region (module , base , size );
661- if (OMPI_SUCCESS != ret ) {
662- break ;
663- }
664- }
665-
666- if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
682+ } else if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
667683 ompi_osc_rdma_region_t * region = (ompi_osc_rdma_region_t * ) module -> state -> regions ;
668684 module -> state -> disp_unit = module -> disp_unit ;
669685 module -> state -> region_count = 1 ;
@@ -674,8 +690,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
674690 }
675691 }
676692
677- /* barrier to make sure all ranks have set up their region data */
678- shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
693+ /* synchronization to make sure all ranks have set up their region data */
694+ ret = synchronize_errorcode (ret , shared_comm );
695+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
696+ break ;
697+ }
679698
680699 offset = data_base ;
681700 for (int i = 0 ; i < local_size ; ++ i ) {
@@ -994,13 +1013,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
9941013 free (temp );
9951014 } while (0 );
9961015
997-
998- ret = module -> comm -> c_coll -> coll_allreduce (& ret , & global_result , 1 , MPI_INT , MPI_MIN , module -> comm ,
999- module -> comm -> c_coll -> coll_allreduce_module );
1000-
1001- if (OMPI_SUCCESS != ret ) {
1002- global_result = ret ;
1003- }
1016+ global_result = synchronize_errorcode (ret , module -> comm );
10041017
10051018 /* none of these communicators are needed anymore so free them now*/
10061019 if (MPI_COMM_NULL != module -> local_leaders ) {
@@ -1235,6 +1248,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
12351248
12361249 /* fill in our part */
12371250 ret = allocate_state_shared (module , base , size );
1251+
1252+ /* notify all others if something went wrong */
1253+ ret = synchronize_errorcode (ret , module -> comm );
12381254 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
12391255 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "failed to allocate internal state" );
12401256 ompi_osc_rdma_free (win );
0 commit comments