@@ -98,7 +98,7 @@ struct ompi_comm_cid_context_t {
9898 int remote_leader ;
9999 int iter ;
100100 /** storage for activate barrier */
101- int ok ;
101+ int max_local_peers ;
102102 char * port_string ;
103103 bool send_first ;
104104 int pml_tag ;
@@ -266,7 +266,7 @@ static ompi_comm_cid_context_t *mca_comm_cid_context_alloc (ompi_communicator_t
266266
267267 context -> send_first = send_first ;
268268 context -> iter = 0 ;
269- context -> ok = 1 ;
269+ context -> max_local_peers = ompi_group_count_local_peers ( newcomm -> c_local_group ) ;
270270
271271 return context ;
272272}
@@ -771,9 +771,33 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request)
771771/* Non-blocking version of ompi_comm_activate */
772772static int ompi_comm_activate_nb_complete (ompi_comm_request_t * request );
773773
774- static int ompi_comm_activate_complete (ompi_communicator_t * * newcomm , ompi_communicator_t * comm )
774+ /* Callback function to set communicator disjointness flags */
775+ static inline void ompi_comm_set_disjointness_nb_complete (ompi_comm_cid_context_t * context )
776+ {
777+ if (OMPI_COMM_IS_DISJOINT_SET (* context -> newcommp )) {
778+ opal_show_help ("help-comm.txt" , "disjointness-set-again" , true);
779+ return ;
780+ }
781+
782+ if (1 == context -> max_local_peers ) {
783+ (* context -> newcommp )-> c_flags |= OMPI_COMM_DISJOINT ;
784+ } else {
785+ (* context -> newcommp )-> c_flags &= ~OMPI_COMM_DISJOINT ;
786+ }
787+ (* context -> newcommp )-> c_flags |= OMPI_COMM_DISJOINT_SET ;
788+ }
789+
790+ static int ompi_comm_activate_complete (ompi_comm_cid_context_t * context )
775791{
776792 int ret ;
793+ ompi_communicator_t * * newcomm = context -> newcommp , * comm = context -> comm ;
794+
795+ /**
796+ * Determine the new communicator's disjointness based on
797+ * context->max_local_peers. It is reduced on the communicator
798+ * before ompi_comm_activate_nb_complete is called.
799+ */
800+ ompi_comm_set_disjointness_nb_complete (context );
777801
778802 /**
779803 * Check to see if this process is in the new communicator.
@@ -846,7 +870,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
846870 ompi_comm_cid_context_t * context ;
847871 ompi_comm_request_t * request ;
848872 ompi_request_t * subreq ;
849- int ret = 0 ;
873+ int ret = 0 , local_peers = -1 ;
850874
851875 /* the caller should not pass NULL for comm (it may be the same as *newcomm) */
852876 assert (NULL != comm );
@@ -878,10 +902,13 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
878902 OMPI_COMM_SET_PML_ADDED (* newcomm );
879903 }
880904
881- /* Step 1: the barrier, after which it is allowed to
882- * send messages over the new communicator
905+ /**
906+ * Dual-purpose barrier:
907+ * 1. The communicator's disjointness is inferred from max_local_peers.
908+ * 2. After the operation it is allowed to send messages over the new communicator.
883909 */
884- ret = context -> allreduce_fn (& context -> ok , & context -> ok , 1 , MPI_MIN , context ,
910+ local_peers = context -> max_local_peers ;
911+ ret = context -> allreduce_fn (& local_peers , & context -> max_local_peers , 1 , MPI_MAX , context ,
885912 & subreq );
886913 if (OMPI_SUCCESS != ret ) {
887914 ompi_comm_request_return (request );
@@ -920,7 +947,7 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm
920947static int ompi_comm_activate_nb_complete (ompi_comm_request_t * request )
921948{
922949 ompi_comm_cid_context_t * context = (ompi_comm_cid_context_t * ) request -> context ;
923- return ompi_comm_activate_complete (context -> newcommp , context -> comm );
950+ return ompi_comm_activate_complete (context );
924951}
925952
926953/**************************************************************************/
0 commit comments