@@ -952,88 +952,84 @@ int MPIR_Comm_set_info_impl(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
952952 goto fn_exit ;
953953}
954954
955+ /* arbitrarily determine which group is the low_group by comparing
956+ * world namespaces and world ranks */
957+ static int determine_low_group (MPIR_Lpid remote_lpid , bool * is_low_group_out )
958+ {
959+ int mpi_errno = MPI_SUCCESS ;
960+
961+ int my_world_idx = 0 ;
962+ int my_world_rank = MPIR_Process .rank ;
963+ int remote_world_idx = MPIR_LPID_WORLD_INDEX (remote_lpid );
964+ int remote_world_rank = MPIR_LPID_WORLD_RANK (remote_lpid );
965+
966+ if (my_world_idx == remote_world_idx ) {
967+ /* same world, just compare world ranks */
968+ MPIR_Assert (my_world_idx == 0 );
969+ * is_low_group_out = (my_world_rank < remote_world_rank );
970+ } else {
971+ /* different world, compare namespace */
972+ int cmp_result = strncmp (MPIR_Worlds [my_world_idx ].namespace ,
973+ MPIR_Worlds [remote_world_idx ].namespace ,
974+ MPIR_NAMESPACE_MAX );
975+ MPIR_Assert (cmp_result != 0 );
976+ if (cmp_result < 0 )
977+ * is_low_group_out = false;
978+ else
979+ * is_low_group_out = true;
980+ }
981+
982+ return mpi_errno ;
983+ }
984+
955985int MPIR_Intercomm_create_impl (MPIR_Comm * local_comm_ptr , int local_leader ,
956986 MPIR_Comm * peer_comm_ptr , int remote_leader , int tag ,
957987 MPIR_Comm * * new_intercomm_ptr )
958988{
959989 int mpi_errno = MPI_SUCCESS ;
960- int final_context_id , recvcontext_id ;
961990 int remote_size = 0 ;
962991 MPIR_Lpid * remote_lpids = NULL ;
963- int comm_info [3 ];
964- int is_low_group = 0 ;
965992 MPIR_Session * session_ptr = local_comm_ptr -> session_ptr ;
966993
967994 MPIR_FUNC_ENTER ;
968995
969- /* Shift tag into the tagged coll space */
970- tag |= MPIR_TAG_COLL_BIT ;
971-
972- mpi_errno = MPID_Intercomm_exchange_map (local_comm_ptr , local_leader ,
973- peer_comm_ptr , remote_leader ,
974- & remote_size , & remote_lpids , & is_low_group );
975- MPIR_ERR_CHECK (mpi_errno );
976-
977996 /*
978997 * Create the contexts. Each group will have a context for sending
979998 * to the other group. All processes must be involved. Because
980999 * we know that the local and remote groups are disjoint, this
9811000 * step will complete
9821001 */
983- MPL_DBG_MSG_FMT (MPIR_DBG_COMM , VERBOSE ,
984- (MPL_DBG_FDEST , "About to get contextid (local_size=%d) on rank %d" ,
985- local_comm_ptr -> local_size , local_comm_ptr -> rank ));
9861002 /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
9871003 * calling routine already holds the single critical section */
9881004 /* TODO: Make sure this is tag-safe */
1005+ int recvcontext_id ;
9891006 mpi_errno = MPIR_Get_contextid_sparse (local_comm_ptr , & recvcontext_id , FALSE);
9901007 MPIR_ERR_CHECK (mpi_errno );
9911008 MPIR_Assert (recvcontext_id != 0 );
992- MPL_DBG_MSG_FMT (MPIR_DBG_COMM , VERBOSE , (MPL_DBG_FDEST , "Got contextid=%d" , recvcontext_id ));
993-
994- /* Leaders can now swap context ids and then broadcast the value
995- * to the local group of processes */
996- if (local_comm_ptr -> rank == local_leader ) {
997- int remote_context_id ;
9981009
999- mpi_errno =
1000- MPIC_Sendrecv (& recvcontext_id , 1 , MPIR_CONTEXT_ID_T_DATATYPE , remote_leader , tag ,
1001- & remote_context_id , 1 , MPIR_CONTEXT_ID_T_DATATYPE , remote_leader , tag ,
1002- peer_comm_ptr , MPI_STATUS_IGNORE , MPIR_ERR_NONE );
1003- MPIR_ERR_CHECK (mpi_errno );
1004-
1005- final_context_id = remote_context_id ;
1010+ /* Shift tag into the tagged coll space */
1011+ tag |= MPIR_TAG_COLL_BIT ;
10061012
1007- /* Now, send all of our local processes the remote_lpids,
1008- * along with the final context id */
1009- comm_info [0 ] = final_context_id ;
1010- MPL_DBG_MSG (MPIR_DBG_COMM , VERBOSE , "About to bcast on local_comm" );
1011- mpi_errno = MPIR_Bcast (comm_info , 1 , MPIR_INT_INTERNAL , local_leader ,
1012- local_comm_ptr , MPIR_ERR_NONE );
1013- MPIR_ERR_CHECK (mpi_errno );
1014- MPL_DBG_MSG_D (MPIR_DBG_COMM , VERBOSE , "end of bcast on local_comm of size %d" ,
1015- local_comm_ptr -> local_size );
1016- } else {
1017- /* we're the other processes */
1018- MPL_DBG_MSG (MPIR_DBG_COMM , VERBOSE , "About to receive bcast on local_comm" );
1019- mpi_errno = MPIR_Bcast (comm_info , 1 , MPIR_INT_INTERNAL , local_leader ,
1020- local_comm_ptr , MPIR_ERR_NONE );
1021- MPIR_ERR_CHECK (mpi_errno );
1013+ int remote_context_id ;
1014+ mpi_errno = MPID_Intercomm_exchange (local_comm_ptr , local_leader ,
1015+ peer_comm_ptr , remote_leader , tag ,
1016+ recvcontext_id , & remote_context_id ,
1017+ & remote_size , & remote_lpids );
1018+ MPIR_ERR_CHECK (mpi_errno );
10221019
1023- /* Extract the context and group sign information */
1024- final_context_id = comm_info [0 ];
1025- }
1020+ bool is_low_group ;
1021+ mpi_errno = determine_low_group ( remote_lpids [0 ], & is_low_group ) ;
1022+ MPIR_ERR_CHECK ( mpi_errno );
10261023
10271024 /* At last, we now have the information that we need to build the
10281025 * intercommunicator */
10291026
10301027 /* All processes in the local_comm now build the communicator */
10311028
10321029 mpi_errno = MPIR_Comm_create (new_intercomm_ptr );
1033- if (mpi_errno )
1034- goto fn_fail ;
1030+ MPIR_ERR_CHECK (mpi_errno );
10351031
1036- (* new_intercomm_ptr )-> context_id = final_context_id ;
1032+ (* new_intercomm_ptr )-> context_id = remote_context_id ;
10371033 (* new_intercomm_ptr )-> recvcontext_id = recvcontext_id ;
10381034 (* new_intercomm_ptr )-> remote_size = remote_size ;
10391035 (* new_intercomm_ptr )-> local_size = local_comm_ptr -> local_size ;
@@ -1055,6 +1051,7 @@ int MPIR_Intercomm_create_impl(MPIR_Comm * local_comm_ptr, int local_leader,
10551051 /* construct remote_group */
10561052 mpi_errno = MPIR_Group_create_map (remote_size , MPI_UNDEFINED , session_ptr , remote_lpids ,
10571053 & (* new_intercomm_ptr )-> remote_group );
1054+ MPIR_ERR_CHECK (mpi_errno );
10581055
10591056 MPIR_Comm_set_session_ptr (* new_intercomm_ptr , session_ptr );
10601057
0 commit comments