@@ -308,21 +308,16 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
308308                                        const  void  * arg0 , const  void  * arg1 , bool  send_first , int  mode ,
309309                                        ompi_request_t  * * req )
310310{
311-     pmix_info_t  pinfo , * results  =  NULL ;
311+     pmix_info_t  * pinfo , * results  =  NULL ;
312312    size_t  nresults ;
313-     opal_process_name_t  * name_array  =  NULL ;
314-     char  * tag  =  NULL ;
315-     size_t  proc_count ;
316-     size_t  cid_base  =  0 ;
313+     opal_process_name_t  opal_proc_name ;
317314    bool  cid_base_set  =  false;
315+     char  * tag  =  NULL ;
316+     size_t  proc_count  =  0 , rproc_count  =  0 , tproc_count  =  0 , cid_base  =  0UL , ninfo ;
318317    int  rc , leader_rank ;
319-     int  ret  =  OMPI_SUCCESS ;
320-     pmix_proc_t  * procs  =  NULL ;
321- 
322-     rc  =  ompi_group_to_proc_name_array  (newcomm -> c_local_group , & name_array , & proc_count );
323-     if  (OPAL_UNLIKELY (OMPI_SUCCESS  !=  rc )) {
324-         return  rc ;
325-     }
318+     pmix_proc_t  * procs ;
319+     void  * grpinfo  =  NULL , * list  =  NULL ;
320+     pmix_data_array_t  darray ;
326321
327322    switch  (mode ) {
328323    case  OMPI_COMM_CID_GROUP_NEW :
@@ -339,15 +334,75 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
339334        break ;
340335    }
341336
342-     PMIX_INFO_LOAD (& pinfo , PMIX_GROUP_ASSIGN_CONTEXT_ID , NULL , PMIX_BOOL );
337+     grpinfo  =  PMIx_Info_list_start ();
338+     if  (NULL  ==  grpinfo ) {
339+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
340+         goto fn_exit ;
341+     }
342+ 
343+     rc  =  PMIx_Info_list_add (grpinfo , PMIX_GROUP_ASSIGN_CONTEXT_ID , NULL , PMIX_BOOL );
344+     if  (PMIX_SUCCESS  !=  rc ) {
345+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
346+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
347+         goto fn_exit ;
348+     }
349+ 
350+     list  =  PMIx_Info_list_start ();
351+ 
352+     size_t  c_index  =  (size_t )newcomm -> c_index ;
353+     rc  =  PMIx_Info_list_add (list , PMIX_GROUP_LOCAL_CID , & c_index , PMIX_SIZE );
354+     if  (PMIX_SUCCESS  !=  rc ) {
355+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
356+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
357+         goto fn_exit ;
358+     }
359+ 
360+     rc  =  PMIx_Info_list_convert (list , & darray );
361+     if  (PMIX_SUCCESS  !=  rc ) {
362+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_convert failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
363+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
364+         goto fn_exit ;
365+     }
366+     rc  =  PMIx_Info_list_add (grpinfo , PMIX_GROUP_INFO , & darray , PMIX_DATA_ARRAY );
367+     PMIX_DATA_ARRAY_DESTRUCT (& darray );
368+     if  (PMIX_SUCCESS  !=  rc ) {
369+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_add failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
370+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
371+         goto fn_exit ;
372+     }
373+ 
374+     rc  =  PMIx_Info_list_convert (grpinfo , & darray );
375+     if  (PMIX_SUCCESS  !=  rc ) {
376+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Info_list_convert failed %s %d" , PMIx_Error_string (rc ), __LINE__ ));
377+         rc  =  OMPI_ERR_OUT_OF_RESOURCE ;
378+         goto fn_exit ;
379+     }
380+ 
381+     pinfo  =  (pmix_info_t * )darray .array ;
382+     ninfo  =  darray .size ;
383+ 
384+     proc_count  =  newcomm -> c_local_group -> grp_proc_count ;
385+     if  ( OMPI_COMM_IS_INTER  (newcomm ) ){
386+         rproc_count  =  newcomm -> c_remote_group -> grp_proc_count ;
387+     }
388+ 
389+     PMIX_PROC_CREATE (procs , proc_count  +  rproc_count );
343390
344-     PMIX_PROC_CREATE (procs , proc_count );
345391    for  (size_t  i  =  0  ; i  <  proc_count ; ++ i ) {
346-         OPAL_PMIX_CONVERT_NAME (& procs [i ],& name_array [i ]);
392+         opal_proc_name  =  ompi_group_get_proc_name (newcomm -> c_local_group , i );
393+         OPAL_PMIX_CONVERT_NAME (& procs [i ],& opal_proc_name );
394+     }
395+     for  (size_t  i  =  0 ; i  <  rproc_count ; ++ i ) {
396+         opal_proc_name  =  ompi_group_get_proc_name (newcomm -> c_remote_group , i );
397+         OPAL_PMIX_CONVERT_NAME (& procs [proc_count + i ],& opal_proc_name );
347398    }
348399
349-     rc  =  PMIx_Group_construct (tag , procs , proc_count , & pinfo , 1 , & results , & nresults );
350-     PMIX_INFO_DESTRUCT (& pinfo );
400+     tproc_count  =  proc_count  +  rproc_count ;
401+ 
402+     OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "calling PMIx_Group_construct - tag %s size %ld ninfo %ld cid_base %ld\n" ,
403+                          tag , tproc_count , ninfo , cid_base ));
404+     rc  =  PMIx_Group_construct (tag , procs , tproc_count , pinfo , ninfo , & results , & nresults );
405+     PMIX_DATA_ARRAY_DESTRUCT (& darray );
351406    if (PMIX_SUCCESS  !=  rc ) {
352407       char  msg_string [1024 ];
353408        switch  (rc ) {
@@ -359,7 +414,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
359414                           "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups" ,
360415                           msg_string );
361416
362-             ret  =  MPI_ERR_UNSUPPORTED_OPERATION ;
417+             rc  =  MPI_ERR_UNSUPPORTED_OPERATION ;
363418            break ;
364419        case  PMIX_ERR_NOT_SUPPORTED :
365420            sprintf (msg_string ,"PMIx server does not support PMIx Group operations" );
@@ -368,10 +423,10 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
368423                           true,
369424                           "MPI_Comm_create_from_group/MPI_Intercomm_create_from_groups" ,
370425                           msg_string );
371-             ret  =  MPI_ERR_UNSUPPORTED_OPERATION ;
426+             rc  =  MPI_ERR_UNSUPPORTED_OPERATION ;
372427            break ;
373428        default :
374-             ret  =  opal_pmix_convert_status (rc );
429+             rc  =  opal_pmix_convert_status (rc );
375430            break ;
376431        } 
377432        goto fn_exit ;
@@ -381,23 +436,28 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
381436        if  (PMIX_CHECK_KEY (& results [i ], PMIX_GROUP_CONTEXT_ID )) {
382437            PMIX_VALUE_GET_NUMBER (rc , & results [i ].value , cid_base , size_t );
383438            if (PMIX_SUCCESS  !=  rc ) {
384-                 ret  =  opal_pmix_convert_status (rc );
439+                 rc  =  opal_pmix_convert_status (rc );
385440                goto fn_exit ;
386441            }
387442            cid_base_set  =  true;
388443            break ;
389444        }
390445    }
391446
447+     OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Group_construct - tag %s size %ld ninfo %ld cid_base %ld\n" ,
448+                          tag , tproc_count , ninfo , cid_base ));
449+ 
450+     /* destruct the group */ 
392451    rc  =  PMIx_Group_destruct  (tag , NULL , 0 );
393452    if (PMIX_SUCCESS  !=  rc ) {
394-         ret  =  opal_pmix_convert_status (rc );
453+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Group_destruct failed %s" , PMIx_Error_string (rc )));
454+         rc  =  opal_pmix_convert_status (rc );
395455        goto fn_exit ;
396456    }
397457
398458    if  (!cid_base_set ) {
399459        opal_show_help ("help-comm.txt" , "cid-base-not-set" , true);
400-         ret  =  OMPI_ERROR ;
460+         rc  =  OMPI_ERROR ;
401461        goto fn_exit ;
402462    }
403463
@@ -410,16 +470,19 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
410470    }
411471
412472    if (NULL  !=  procs ) {
413-         PMIX_PROC_FREE (procs , proc_count );
473+         PMIX_PROC_FREE (procs , tproc_count );
414474        procs  =  NULL ;
415475    }
416476
417-     if (NULL  !=  name_array ) {
418-         free  (name_array );
419-         name_array  =  NULL ;
477+     if  (NULL  !=  grpinfo ) {
478+         PMIx_Info_list_release (grpinfo );
420479    }
421480
422-     return  ret ;
481+     if  (NULL  !=  list ) {
482+         PMIx_Info_list_release (list );
483+     }
484+ 
485+     return  rc ;
423486}
424487
425488static  int  ompi_comm_nextcid_ext_nb  (ompi_communicator_t  * newcomm , ompi_communicator_t  * comm ,
@@ -444,6 +507,15 @@ static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communic
444507        block  =  & comm -> c_contextidb ;
445508    }
446509
510+     for  (unsigned int   i  =  ompi_mpi_communicators .lowest_free  ; i  <  mca_pml .pml_max_contextid  ; ++ i ) {
511+         bool  flag  =  opal_pointer_array_test_and_set_item  (& ompi_mpi_communicators , i , newcomm );
512+         if  (true ==  flag ) {
513+             newcomm -> c_index  =  i ;
514+             break ;
515+         }
516+     }
517+     assert (newcomm -> c_index  >  2 );
518+ 
447519    if  (NULL  ==  arg1 ) {
448520        if  (OMPI_COMM_CID_GROUP  ==  mode  ||  OMPI_COMM_CID_GROUP_NEW  ==  mode  || 
449521            !ompi_comm_extended_cid_block_available  (& comm -> c_contextidb )) {
@@ -466,14 +538,6 @@ static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communic
466538        (void ) ompi_comm_extended_cid_block_new  (block , & newcomm -> c_contextidb , is_new_block );
467539    }
468540
469-     for  (unsigned int   i  =  ompi_mpi_communicators .lowest_free  ; i  <  mca_pml .pml_max_contextid  ; ++ i ) {
470-         bool  flag  =  opal_pointer_array_test_and_set_item  (& ompi_mpi_communicators , i , newcomm );
471-         if  (true ==  flag ) {
472-             newcomm -> c_index  =  i ;
473-             break ;
474-         }
475-     }
476- 
477541    newcomm -> c_contextid  =  newcomm -> c_contextidb .block_cid ;
478542
479543    opal_hash_table_set_value_ptr  (& ompi_comm_hash , & newcomm -> c_contextid ,
@@ -500,7 +564,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com
500564       functions but the pml does not support these functions so return not supported */ 
501565    if  (NULL  ==  comm ) {
502566       char  msg_string [1024 ];
503-        sprintf (msg_string ,"The PML being used - %s - does not support MPI sessions related features" ,  
567+        sprintf (msg_string ,"The PML being used - %s - does not support MPI sessions related features" ,
504568               mca_pml_base_selected_component .pmlm_version .mca_component_name );
505569       opal_show_help ("help-comm.txt" ,
506570                      "MPI function not supported" ,
@@ -855,6 +919,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
855919    ompi_comm_cid_context_t  * context ;
856920    ompi_comm_request_t  * request ;
857921    ompi_request_t  * subreq ;
922+     uint32_t  comm_size ;
858923    int  ret  =  0 ;
859924
860925    /* the caller should not pass NULL for comm (it may be the same as *newcomm) */ 
@@ -876,6 +941,25 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
876941
877942    request -> context  =  & context -> super ;
878943
944+     /* Prep communicator for handling remote cids if needed */ 
945+ 
946+     if  (!OMPI_COMM_IS_GLOBAL_INDEX (* newcomm )) {
947+         if  (OMPI_COMM_IS_INTER (* newcomm )) {
948+             comm_size  =  ompi_comm_remote_size (* newcomm );
949+         } else  {
950+             comm_size  =  ompi_comm_size (* newcomm );
951+         }
952+ 
953+         (* newcomm )-> c_index_vec  =  (uint32_t  * )calloc (comm_size , sizeof (uint32_t ));
954+         if  (NULL  ==  (* newcomm )-> c_index_vec ) {
955+             return  OMPI_ERR_OUT_OF_RESOURCE ;
956+         }
957+ 
958+         if  (OMPI_COMM_IS_INTRA (* newcomm )) {
959+             (* newcomm )-> c_index_vec [(* newcomm )-> c_my_rank ] =  (* newcomm )-> c_index ;
960+         }
961+     }
962+ 
879963    if  (MPI_UNDEFINED  !=  (* newcomm )-> c_local_group -> grp_my_rank ) {
880964        /* Initialize the PML stuff in the newcomm  */ 
881965        if  ( OMPI_SUCCESS  !=  (ret  =  MCA_PML_CALL (add_comm (* newcomm ))) ) {
@@ -926,6 +1010,61 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm
9261010    return  rc ;
9271011}
9281012
1013+ int  ompi_comm_get_remote_cid_from_pmix  (ompi_communicator_t  * comm , int  dest , uint32_t  * remote_cid )
1014+ {
1015+     ompi_proc_t  * ompi_proc ;
1016+     pmix_proc_t   pmix_proc ;
1017+     pmix_info_t  tinfo [2 ];
1018+     pmix_value_t  * val  =  NULL ;
1019+     ompi_comm_extended_cid_t  excid ;
1020+     int  rc  =  OMPI_SUCCESS ;
1021+     size_t  remote_cid64 ;
1022+ 
1023+     assert (NULL  !=  remote_cid );
1024+ 
1025+     ompi_proc  =  ompi_comm_peer_lookup (comm , dest );
1026+     OPAL_PMIX_CONVERT_NAME (& pmix_proc , & ompi_proc -> super .proc_name );
1027+ 
1028+     PMIx_Info_construct (& tinfo [0 ]);
1029+     PMIX_INFO_LOAD (& tinfo [0 ], PMIX_TIMEOUT , & ompi_pmix_connect_timeout , PMIX_UINT32 );
1030+ 
1031+     excid  =  ompi_comm_get_extended_cid (comm );
1032+ 
1033+     PMIX_INFO_CONSTRUCT (& tinfo [1 ]);
1034+     PMIX_INFO_LOAD (& tinfo [1 ], PMIX_GROUP_CONTEXT_ID , & excid .cid_base , PMIX_SIZE );
1035+     PMIX_INFO_SET_QUALIFIER (& tinfo [1 ]);
1036+     if  (PMIX_SUCCESS  !=  (rc  =  PMIx_Get (& pmix_proc , PMIX_GROUP_LOCAL_CID , tinfo , 2 , & val ))) {
1037+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID cid_base %ld %s" , excid .cid_base , PMIx_Error_string (rc )));
1038+         rc  =  OMPI_ERR_NOT_FOUND ;
1039+         goto done ;
1040+     }
1041+ 
1042+     if  (NULL  ==  val ) {
1043+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID val returned NULL" ));
1044+         rc  =  OMPI_ERR_NOT_FOUND ;
1045+         goto done ;
1046+     }
1047+ 
1048+     if  (val -> type  !=  PMIX_SIZE ) {
1049+         OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch" ));
1050+         rc  =  OMPI_ERR_TYPE_MISMATCH ;
1051+         goto done ;
1052+     }
1053+ 
1054+     PMIX_VALUE_GET_NUMBER (rc , val , remote_cid64 , size_t );
1055+     rc  =  OMPI_SUCCESS ;
1056+     * remote_cid  =  (uint32_t )remote_cid64 ;
1057+     comm -> c_index_vec [dest ] =  (uint32_t )remote_cid64 ;
1058+     OPAL_OUTPUT_VERBOSE ((10 , ompi_comm_output , "PMIx_Get PMIX_GROUP_LOCAL_CID %d for cid_base %ld" , * remote_cid , excid .cid_base ));
1059+ 
1060+ done :
1061+     if  (NULL  !=  val ) {
1062+         PMIX_VALUE_RELEASE (val );
1063+     }
1064+ 
1065+     return  rc ;
1066+ }
1067+ 
9291068static  int  ompi_comm_activate_nb_complete  (ompi_comm_request_t  * request )
9301069{
9311070    ompi_comm_cid_context_t  * context  =  (ompi_comm_cid_context_t  * ) request -> context ;
0 commit comments