2525// For converting comm_method strings to comm_method id# and back.
2626// This starts as our local set of strings, but gets Allreduced into
2727// a global mapping so all the strings at all the ranks are represented.
28- // If an MCA's name is more than 15 chars it gets truncated.
29- #define COMM_METHOD_STRING_SIZE 16
30- #define MAX_COMM_METHODS 50
28+ #define COMM_METHOD_STRING_SIZE 200
29+ #define MAX_COMM_METHODS 1000
30+ #define UCX_TAG "ucx="
31+
3132typedef struct {
3233 int n ;
3334 char str [MAX_COMM_METHODS ][COMM_METHOD_STRING_SIZE ];
@@ -87,27 +88,69 @@ lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) {
8788static char *
8889comm_method_string (MPI_Comm comm , int rank , int * comm_mode ) {
8990 char * p , * btl ;
90- char * string = malloc (COMM_METHOD_STRING_SIZE );
91-
92- if (!string ) { return NULL ; }
93-
94- p = lookup_pml_name ();
95- if (p && 0 == strncmp ("ob1" , p , 4 )) { // BTL
96- if (comm_mode ) { * comm_mode = MODE_IS_BTL ; }
97- btl = lookup_btl_name_for_send (comm , rank );
98- if (NULL == btl ) {
99- strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
100- } else {
101- strncpy (string , btl , COMM_METHOD_STRING_SIZE );
91+ char * string , * comma_delim = "" ;
92+ mca_pml_transports_t * transports = NULL ;
93+ int name_length ;
94+ unsigned int i ;
95+ if (NULL != mca_pml .pml_get_transports ) {
96+ transports = mca_pml .pml_get_transports (comm , rank );
97+ }
98+ if (NULL == transports ) {
99+ string = malloc (COMM_METHOD_STRING_SIZE );
100+ if (!string ) {
101+ return NULL ;
102+ }
103+ p = lookup_pml_name ();
104+ if (p && 0 == strncmp ("ob1" , p , 4 )) { // BTL
105+ if (comm_mode ) { * comm_mode = MODE_IS_BTL ; }
106+ btl = lookup_btl_name_for_send (comm , rank );
107+ if (NULL == btl ) {
108+ strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
109+ } else {
110+ strncpy (string , btl , COMM_METHOD_STRING_SIZE );
111+ }
112+ }
113+ else if (p && 0 == strncmp ("cm" , p , 3 )) { // MTL
114+ if (comm_mode ) { * comm_mode = MODE_IS_MTL ; }
115+ strncpy (string , lookup_mtl_name (), COMM_METHOD_STRING_SIZE );
116+ } else { // PML
117+ if (comm_mode ) { * comm_mode = MODE_IS_PML ; }
118+ if (p ) {
119+ strncpy (string , p , COMM_METHOD_STRING_SIZE );
120+ }
121+ else {
122+ strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
123+ }
102124 }
103125 }
104- else if (p && 0 == strncmp ("cm" , p , 3 )) { // MTL
105- if (comm_mode ) { * comm_mode = MODE_IS_MTL ; }
106- strncpy (string , lookup_mtl_name (), COMM_METHOD_STRING_SIZE );
107- } else { // PML
108- if (comm_mode ) { * comm_mode = MODE_IS_PML ; }
109- strncpy (string , p , COMM_METHOD_STRING_SIZE );
126+ else {
127+ /* Determine how much memory is needed to store UCX transport info */
128+ char * s = UCX_TAG ;
129+ name_length = strlen (s );
130+ for (i = 0 ; i < transports -> count ; i ++ ) {
131+ name_length = name_length + strlen (transports -> entries [i ].transport_name ) +
132+ strlen (transports -> entries [i ].device_name ) + 2 ;
133+ }
134+ /* Allocate storage to store UCX transport info then build the info string */
135+ string = malloc (name_length );
136+ if (!string ) {
137+ return NULL ;
138+ }
139+ strcpy (string , s );
140+ for (i = 0 ; i < transports -> count ; i ++ ) {
141+ strcat (string , comma_delim );
142+ comma_delim = "," ;
143+ strcat (string , transports -> entries [i ].transport_name );
144+ strcat (string , ";" );
145+ strcat (string , transports -> entries [i ].device_name );
146+ }
147+ }
148+ if (comm_mode ) {
149+ // UCX is used for PML mode only
150+ * comm_mode = MODE_IS_PML ;
110151 }
152+ free (transports -> entries );
153+ free (transports );
111154 return string ;
112155}
113156
@@ -135,7 +178,7 @@ lookup_string_in_conversion_struct(comm_method_string_conversion_t *data, char *
135178{
136179 int i ;
137180 for (i = 0 ; i < data -> n ; ++ i ) {
138- if (0 == strncmp (data -> str [i ], string , COMM_METHOD_STRING_SIZE )) {
181+ if (0 == strcmp (data -> str [i ], string )) {
139182 return i ;
140183 }
141184 }
@@ -160,7 +203,6 @@ add_string_to_conversion_struct(comm_method_string_conversion_t *data, char *str
160203 ++ (data -> n );
161204 }
162205 }
163- qsort (& data -> str [1 ], data -> n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
164206}
165207
166208// For MPI_Allreduce of a comm_method_string_conversion_t
@@ -174,7 +216,6 @@ static void myfn(void* invec, void* inoutvec, int *len, MPI_Datatype *dt) {
174216 for (j = 0 ; j < b -> n ; ++ j ) { // for each entry j in 'b', add it to 'a'
175217 add_string_to_conversion_struct (a , b -> str [j ]);
176218 }
177- qsort (& a -> str [1 ], a -> n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
178219 }
179220}
180221
@@ -321,14 +362,15 @@ abbreviate_list_into_string(char *str, int max, int *list, int nlist)
321362static void
322363ompi_report_comm_methods (int called_from_location )
323364{
324- int numhosts , i , j , k ;
365+ int numhosts , i , j , k , n ;
325366 int max2Dprottable = 12 ;
326367 int max2D1Cprottable = 36 ;
327368 int hpmp_myrank ;
328369 int mylocalrank , nlocalranks , myleaderrank , nleaderranks ;
329370 int ret ;
330371 ompi_communicator_t * local_comm , * leader_comm ;
331372 int * method ;
373+ unsigned char * methods_used ;
332374 char * hoststring ;
333375 char * * allhoststrings ;
334376 int comm_mode ; // MODE_IS_BTL / MTL / PML
@@ -423,17 +465,16 @@ ompi_report_comm_methods(int called_from_location)
423465
424466// If we're running during init, establish connections between all peers
425467// (in leader_comm, which is all the ranks that are here at this point)
426- if (CALLED_FROM_MPI_INIT == called_from_location ) {
468+ if (called_from_location == 1 ) {
469+ int speer = (myleaderrank + 1 ) % nleaderranks ;
470+ int rpeer = (myleaderrank - 1 + nleaderranks ) % nleaderranks ;
427471 for (i = 0 ; i <=nleaderranks /2 ; ++ i ) {
428472// (Examples to show why the loop is i<=nleaderranks/2)
429473// np4 : 0 1 2 3 i=0 0c0 i=1 0c0&1&3 i=2 0c0&1&3&2
430474// np5 : 0 1 2 3 4 i=0 0c0 i=1 0c0&1&4 i=2 0c0&1&4&2&3
431475 MPI_Request sreq , rreq ;
432476 MPI_Status status ;
433477 int sbuf , rbuf ;
434- int speer = (myleaderrank + 1 ) % nleaderranks ;
435- int rpeer = (myleaderrank - 1 + nleaderranks ) % nleaderranks ;
436-
437478 sbuf = rbuf = 0 ;
438479 MCA_PML_CALL (isend (& sbuf , 1 , MPI_INT , speer , 99 ,
439480 MCA_PML_BASE_SEND_STANDARD ,
@@ -442,6 +483,11 @@ ompi_report_comm_methods(int called_from_location)
442483 leader_comm , & rreq ));
443484 ompi_request_wait (& sreq , & status );
444485 ompi_request_wait (& rreq , & status );
486+ speer = (speer + 1 ) % nleaderranks ;
487+ rpeer = (rpeer - 1 ) % nleaderranks ;
488+ if (rpeer < 0 ) {
489+ rpeer = nleaderranks - 1 ;
490+ }
445491 }
446492 }
447493
@@ -471,19 +517,26 @@ ompi_report_comm_methods(int called_from_location)
471517 MPI_Op_free (& myop );
472518 MPI_Type_free (& mydt );
473519
520+ // Sort communication method string arrays after reduction
521+ qsort (& comm_method_string_conversion .str [1 ],
522+ comm_method_string_conversion .n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
523+
474524// Each host leader fills in a "numhosts" sized array method[] of
475525// how it communicates with each peer.
526+ // Use a bitmap to keep track of which communication methods are used
527+ n = ((comm_method_string_conversion .n + 7 ) / 8 ) * sizeof (unsigned char );
528+ methods_used = malloc (n );
529+ memset (methods_used , 0 , n );
530+
476531 for (i = 0 ; i < nleaderranks ; ++ i ) {
477532 method [i ] = comm_method (leader_comm , i );
478533
479534// For looking at our own local host though, we don't really want "self"
480535// unless there's only one rank and "self" is the best answer. So if
481536// there's more than one rank on our host, we get our local-host's
482537// communication method for a neighbor on this host.
483- if (i == myleaderrank ) {
484- if (nlocalranks > 1 ) {
485- method [i ] = comm_method (local_comm , 1 );
486- }
538+ if ((i == myleaderrank ) && (nlocalranks > 1 )) {
539+ method [i ] = comm_method (local_comm , 1 );
487540 }
488541 }
489542
@@ -493,6 +546,8 @@ ompi_report_comm_methods(int called_from_location)
493546 {
494547 int len , * lens , * disps ;
495548
549+ // First get the array of host strings (host names and task lists)
550+ // for all nodes.
496551 len = strlen (hoststring ) + 1 ;
497552 if (myleaderrank == 0 ) {
498553 lens = malloc (nleaderranks * sizeof (int ));
@@ -533,7 +588,9 @@ ompi_report_comm_methods(int called_from_location)
533588 free (lens );
534589 free (disps );
535590 }
536- // and a simpler gather for the methods
591+
592+ // and a simpler gather for the arrays of communication method indices
593+ // for all nodes.
537594 leader_comm -> c_coll -> coll_gather (
538595 method , nleaderranks , MPI_INT ,
539596 method , nleaderranks , MPI_INT ,
@@ -581,14 +638,22 @@ ompi_report_comm_methods(int called_from_location)
581638// 2: 2d table
582639 if (nleaderranks <= max2Dprottable ) {
583640 char * str , * p ;
584- int tmp , per ;
641+ int tmp , per , has_ucx_transport ;
585642 int strlens [NUM_COMM_METHODS ];
586643
587644 // characters per entry in the 2d table, must be large enough
588645 // for the digits needed for host numbers, and for whatever is
589646 // the longest string used in the table, plus a space.
590647 for (i = 0 ; i < NUM_COMM_METHODS ; ++ i ) {
591- strlens [i ] = strlen (comm_method_to_string (i ));
648+ p = comm_method_to_string (i );
649+ if (0 == strncmp (p , UCX_TAG , strlen (UCX_TAG ))) {
650+ // Assume no more than 1000 UCX transport strings
651+ // See PML_UCX_MAX_TRANSPORT_ENTRIES in pml_ucx.c
652+ strlens [i ] = strlen ("ucx[000]" );
653+ }
654+ else {
655+ strlens [i ] = strlen (p );
656+ }
592657 }
593658 per = 2 ;
594659 tmp = nleaderranks ;
@@ -610,19 +675,38 @@ ompi_report_comm_methods(int called_from_location)
610675 p [j ] = 0 ;
611676 p += j ;
612677 }
678+ // Use a bitmap to trace which UCX transport strings are used.
679+ n = (nleaderranks + 7 ) / 8 ;
680+ methods_used = malloc (n * sizeof (unsigned char ));
681+ memset (methods_used , 0 , n );
613682 tmp = (int )strlen (str );
614683 -- p ;
615684 while (p >=str && ((* p )== ' ' )) { * (p -- )= 0 ; }
616685 printf (" host | %s\n" , str );
617686 memset (str , (int )'=' , tmp );
618687 str [tmp ] = 0 ;
619688 printf ("======|=%s\n" , str );
689+ has_ucx_transport = 0 ;
620690
621691 for (i = 0 ; i < nleaderranks ; ++ i ) {
622692 str [0 ] = 0 ;
623693 p = str ;
624694 for (k = 0 ; k < nleaderranks ; ++ k ) {
625- strcat (p , comm_method_to_string (method [i * nleaderranks + k ]));
695+ char * method_string ;
696+ char ucx_label [10 ];
697+
698+ method_string = comm_method_to_string (method [i * nleaderranks + k ]);
699+ if (0 == strncmp (method_string , UCX_TAG , strlen (UCX_TAG ))) {
700+ n = lookup_string_in_conversion_struct (& comm_method_string_conversion ,
701+ method_string );
702+ sprintf (ucx_label , "ucx[%3d]" , n );
703+ strcat (p , ucx_label );
704+ methods_used [n / 8 ] |= (1 << (n % 8 ));
705+ has_ucx_transport = 1 ;
706+ }
707+ else {
708+ strcat (p , method_string );
709+ }
626710 for (j = (int )strlen (p ); j < per ; ++ j ) {
627711 p [j ] = ' ' ;
628712 }
@@ -635,6 +719,35 @@ ompi_report_comm_methods(int called_from_location)
635719 }
636720 printf ("\n" );
637721 free (str );
722+ if (has_ucx_transport ) {
723+ printf ("UCX Transport/Device\n" );
724+ for (i = 0 ; i < comm_method_string_conversion .n ; i ++ ) {
725+ // Check bitmap to check if method was used
726+ if (methods_used [i / 8 ] & (1 << (i % 8 ))) {
727+ p = comm_method_to_string (i );
728+ if (0 == strncmp (p , UCX_TAG , strlen (UCX_TAG ))) {
729+ char * temp_str , * token ;
730+ n = lookup_string_in_conversion_struct (& comm_method_string_conversion , p );
731+ printf ("ucx[%3d]:\n" , n );
732+ temp_str = strdup (p + 4 );
733+ token = strtok (temp_str , "," );
734+ while (NULL != token ) {
735+ p = strchr (token , ';' );
736+ if (NULL == p ) {
737+ printf (" %-16s\n" , token );
738+ }
739+ else {
740+ * p = '\0' ;
741+ printf (" %-16s %-16s\n" , token , p + 1 );
742+ }
743+ token = strtok (NULL , "," );
744+ }
745+ free (temp_str );
746+ }
747+ }
748+ }
749+ }
750+ free (methods_used );
638751 }
639752 else if (nleaderranks <= max2D1Cprottable ) {
640753 char * str , * p ;
0 commit comments