@@ -73,8 +73,7 @@ static int comm_create_local_group(MPIR_Comm * comm_ptr)
7373 MPIR_Lpid * map = MPL_malloc (n * sizeof (MPIR_Lpid ), MPL_MEM_GROUP );
7474
7575 for (int i = 0 ; i < n ; i ++ ) {
76- mpi_errno = MPID_Comm_get_lpid (comm_ptr , i , & map [i ], FALSE);
77- MPIR_ERR_CHECK (mpi_errno );
76+ map [i ] = MPIR_Group_rank_to_lpid (comm_ptr -> local_group , i );
7877 }
7978
8079 mpi_errno = MPIR_Group_create_map (n , comm_ptr -> rank , comm_ptr -> session_ptr , map ,
@@ -238,8 +237,7 @@ int MPII_Comm_create_calculate_mapping(MPIR_Group * group_ptr,
238237 /* FIXME : BUBBLE SORT */
239238 mapping [i ] = -1 ;
240239 for (j = 0 ; j < comm_ptr -> local_size ; j ++ ) {
241- MPIR_Lpid comm_lpid ;
242- MPID_Comm_get_lpid (comm_ptr , j , & comm_lpid , FALSE);
240+ MPIR_Lpid comm_lpid = MPIR_Group_rank_to_lpid (comm_ptr -> local_group , j );
243241 if (comm_lpid == MPIR_Group_rank_to_lpid (group_ptr , i )) {
244242 mapping [i ] = j ;
245243 break ;
@@ -918,8 +916,7 @@ int MPIR_Comm_remote_group_impl(MPIR_Comm * comm_ptr, MPIR_Group ** group_ptr)
918916 MPIR_Lpid * map = MPL_malloc (n * sizeof (MPIR_Lpid ), MPL_MEM_GROUP );
919917
920918 for (int i = 0 ; i < n ; i ++ ) {
921- mpi_errno = MPID_Comm_get_lpid (comm_ptr , i , & map [i ], TRUE);
922- MPIR_ERR_CHECK (mpi_errno );
919+ map [i ] = MPIR_Group_rank_to_lpid (comm_ptr -> remote_group , i );
923920 }
924921 mpi_errno = MPIR_Group_create_map (n , MPI_UNDEFINED , comm_ptr -> session_ptr , map ,
925922 & comm_ptr -> remote_group );
@@ -952,88 +949,112 @@ int MPIR_Comm_set_info_impl(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
952949 goto fn_exit ;
953950}
954951
952+ #if 0
953+ /* arbitrarily determine which group is the low_group by comparing
954+ * world namespaces and world ranks */
955+ static int determine_low_group (MPIR_Lpid remote_lpid , bool * is_low_group_out )
956+ {
957+ int mpi_errno = MPI_SUCCESS ;
958+
959+ int my_world_idx = 0 ;
960+ int my_world_rank = MPIR_Process .rank ;
961+ int remote_world_idx = MPIR_LPID_WORLD_INDEX (remote_lpid );
962+ int remote_world_rank = MPIR_LPID_WORLD_RANK (remote_lpid );
963+
964+ if (my_world_idx == remote_world_idx ) {
965+ /* same world, just compare world ranks */
966+ MPIR_Assert (my_world_idx == 0 );
967+ * is_low_group_out = (my_world_rank < remote_world_rank );
968+ } else {
969+ /* different world, compare namespace */
970+ int cmp_result = strncmp (MPIR_Worlds [my_world_idx ].namespace ,
971+ MPIR_Worlds [remote_world_idx ].namespace ,
972+ MPIR_NAMESPACE_MAX );
973+ MPIR_Assert (cmp_result != 0 );
974+ if (cmp_result < 0 )
975+ * is_low_group_out = false;
976+ else
977+ * is_low_group_out = true;
978+ }
979+
980+ return mpi_errno ;
981+ }
982+ #endif
983+
955984int MPIR_Intercomm_create_impl (MPIR_Comm * local_comm_ptr , int local_leader ,
956985 MPIR_Comm * peer_comm_ptr , int remote_leader , int tag ,
957986 MPIR_Comm * * new_intercomm_ptr )
958987{
959988 int mpi_errno = MPI_SUCCESS ;
960- int final_context_id , recvcontext_id ;
961989 int remote_size = 0 ;
962990 MPIR_Lpid * remote_lpids = NULL ;
963- int comm_info [3 ];
964- int is_low_group = 0 ;
965991 MPIR_Session * session_ptr = local_comm_ptr -> session_ptr ;
966992
967993 MPIR_FUNC_ENTER ;
968994
969- /* Shift tag into the tagged coll space */
970- tag |= MPIR_TAG_COLL_BIT ;
971-
972- mpi_errno = MPID_Intercomm_exchange_map (local_comm_ptr , local_leader ,
973- peer_comm_ptr , remote_leader ,
974- & remote_size , & remote_lpids , & is_low_group );
975- MPIR_ERR_CHECK (mpi_errno );
976-
977995 /*
978996 * Create the contexts. Each group will have a context for sending
979997 * to the other group. All processes must be involved. Because
980998 * we know that the local and remote groups are disjoint, this
981999 * step will complete
9821000 */
983- MPL_DBG_MSG_FMT (MPIR_DBG_COMM , VERBOSE ,
984- (MPL_DBG_FDEST , "About to get contextid (local_size=%d) on rank %d" ,
985- local_comm_ptr -> local_size , local_comm_ptr -> rank ));
9861001 /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
9871002 * calling routine already holds the single critical section */
9881003 /* TODO: Make sure this is tag-safe */
1004+ int recvcontext_id ;
9891005 mpi_errno = MPIR_Get_contextid_sparse (local_comm_ptr , & recvcontext_id , FALSE);
9901006 MPIR_ERR_CHECK (mpi_errno );
9911007 MPIR_Assert (recvcontext_id != 0 );
992- MPL_DBG_MSG_FMT (MPIR_DBG_COMM , VERBOSE , (MPL_DBG_FDEST , "Got contextid=%d" , recvcontext_id ));
993-
994- /* Leaders can now swap context ids and then broadcast the value
995- * to the local group of processes */
996- if (local_comm_ptr -> rank == local_leader ) {
997- int remote_context_id ;
9981008
999- mpi_errno =
1000- MPIC_Sendrecv (& recvcontext_id , 1 , MPIR_CONTEXT_ID_T_DATATYPE , remote_leader , tag ,
1001- & remote_context_id , 1 , MPIR_CONTEXT_ID_T_DATATYPE , remote_leader , tag ,
1002- peer_comm_ptr , MPI_STATUS_IGNORE , MPIR_ERR_NONE );
1003- MPIR_ERR_CHECK (mpi_errno );
1004-
1005- final_context_id = remote_context_id ;
1009+ /* Shift tag into the tagged coll space */
1010+ tag |= MPIR_TAG_COLL_BIT ;
10061011
1007- /* Now, send all of our local processes the remote_lpids,
1008- * along with the final context id */
1009- comm_info [0 ] = final_context_id ;
1010- MPL_DBG_MSG (MPIR_DBG_COMM , VERBOSE , "About to bcast on local_comm" );
1011- mpi_errno = MPIR_Bcast (comm_info , 1 , MPIR_INT_INTERNAL , local_leader ,
1012- local_comm_ptr , MPIR_ERR_NONE );
1013- MPIR_ERR_CHECK (mpi_errno );
1014- MPL_DBG_MSG_D (MPIR_DBG_COMM , VERBOSE , "end of bcast on local_comm of size %d" ,
1015- local_comm_ptr -> local_size );
1016- } else {
1017- /* we're the other processes */
1018- MPL_DBG_MSG (MPIR_DBG_COMM , VERBOSE , "About to receive bcast on local_comm" );
1019- mpi_errno = MPIR_Bcast (comm_info , 1 , MPIR_INT_INTERNAL , local_leader ,
1020- local_comm_ptr , MPIR_ERR_NONE );
1021- MPIR_ERR_CHECK (mpi_errno );
1012+ int remote_context_id ;
1013+ mpi_errno = MPID_Intercomm_exchange (local_comm_ptr , local_leader ,
1014+ peer_comm_ptr , remote_leader , tag ,
1015+ recvcontext_id , & remote_context_id ,
1016+ & remote_size , & remote_lpids );
1017+ MPIR_ERR_CHECK (mpi_errno );
10221018
1023- /* Extract the context and group sign information */
1024- final_context_id = comm_info [0 ];
1019+ bool is_low_group ;
1020+ #if 0
1021+ mpi_errno = determine_low_group (remote_lpids [0 ], & is_low_group );
1022+ MPIR_ERR_CHECK (mpi_errno );
1023+ #else
1024+ if (local_comm_ptr -> rank == local_leader ) {
1025+ if (MPIR_LPID_WORLD_INDEX (remote_lpids [0 ]) == 0 ) {
1026+ is_low_group = (MPIR_Process .rank < MPIR_LPID_WORLD_RANK (remote_lpids [0 ]));
1027+ } else {
1028+ char remote_namespace [MPIR_NAMESPACE_MAX ];
1029+ mpi_errno = MPIC_Sendrecv (MPIR_Worlds [0 ].namespace , MPIR_NAMESPACE_MAX ,
1030+ MPIR_CHAR_INTERNAL , remote_leader , tag ,
1031+ remote_namespace , MPIR_NAMESPACE_MAX , MPIR_CHAR_INTERNAL ,
1032+ remote_leader , tag , peer_comm_ptr , MPI_STATUS_IGNORE ,
1033+ MPIR_ERR_NONE );
1034+ MPIR_ERR_CHECK (mpi_errno );
1035+ int cmp_result ;
1036+ cmp_result = strncmp (MPIR_Worlds [0 ].namespace , remote_namespace , MPIR_NAMESPACE_MAX );
1037+ MPIR_Assert (cmp_result != 0 );
1038+ if (cmp_result < 0 )
1039+ is_low_group = false;
1040+ else
1041+ is_low_group = true;
1042+ }
10251043 }
1044+ mpi_errno = MPIR_Bcast_impl (& is_low_group , 1 , MPIR_C_BOOL_INTERNAL ,
1045+ local_leader , local_comm_ptr , MPIR_ERR_NONE );
1046+ MPIR_ERR_CHECK (mpi_errno );
1047+ #endif
10261048
10271049 /* At last, we now have the information that we need to build the
10281050 * intercommunicator */
10291051
10301052 /* All processes in the local_comm now build the communicator */
10311053
10321054 mpi_errno = MPIR_Comm_create (new_intercomm_ptr );
1033- if (mpi_errno )
1034- goto fn_fail ;
1055+ MPIR_ERR_CHECK (mpi_errno );
10351056
1036- (* new_intercomm_ptr )-> context_id = final_context_id ;
1057+ (* new_intercomm_ptr )-> context_id = remote_context_id ;
10371058 (* new_intercomm_ptr )-> recvcontext_id = recvcontext_id ;
10381059 (* new_intercomm_ptr )-> remote_size = remote_size ;
10391060 (* new_intercomm_ptr )-> local_size = local_comm_ptr -> local_size ;
@@ -1055,6 +1076,7 @@ int MPIR_Intercomm_create_impl(MPIR_Comm * local_comm_ptr, int local_leader,
10551076 /* construct remote_group */
10561077 mpi_errno = MPIR_Group_create_map (remote_size , MPI_UNDEFINED , session_ptr , remote_lpids ,
10571078 & (* new_intercomm_ptr )-> remote_group );
1079+ MPIR_ERR_CHECK (mpi_errno );
10581080
10591081 MPIR_Comm_set_session_ptr (* new_intercomm_ptr , session_ptr );
10601082
@@ -1113,10 +1135,9 @@ int MPIR_peer_intercomm_create(int context_id, int recvcontext_id,
11131135
11141136 MPIR_Session * session_ptr = NULL ; /* Can we just use NULL session since peer_intercomm is always temporary? */
11151137 MPIR_Lpid my_lpid = MPIR_Group_rank_to_lpid (comm_self -> local_group , 0 );
1116- mpi_errno = MPIR_Group_create_stride (1 , 0 , session_ptr , my_lpid , 1 , 1 ,
1117- & (* newcomm )-> local_group );
1138+ mpi_errno = MPIR_Group_create_stride (1 , 0 , session_ptr , my_lpid , 1 , & (* newcomm )-> local_group );
11181139 MPIR_ERR_CHECK (mpi_errno );
1119- mpi_errno = MPIR_Group_create_stride (1 , 0 , session_ptr , remote_lpid , 1 , 1 ,
1140+ mpi_errno = MPIR_Group_create_stride (1 , 0 , session_ptr , remote_lpid , 1 ,
11201141 & (* newcomm )-> remote_group );
11211142 MPIR_ERR_CHECK (mpi_errno );
11221143
0 commit comments