@@ -490,11 +490,59 @@ static int mca_btl_tcp_component_close(void)
490490 * Create a btl instance and add to modules list.
491491 */
492492
493- static int mca_btl_tcp_create (int if_kindex , const char * if_name )
493+ static int mca_btl_tcp_create (const int if_kindex , const char * if_name )
494494{
495495 struct mca_btl_tcp_module_t * btl ;
496496 char param [256 ];
497497 int i ;
498+ struct sockaddr_storage addr ;
499+ bool found = false;
500+
501+ /*
502+ * Look for an address on the given device (ie, kindex) which
503+ * isn't disabled by the disable_family option. If there isn't
504+ * one, skip creating the modules for this interface. We store
505+ * the address on the module both to publish in the modex and to
506+ * use as the source address of all packets sent by this module.
507+ *
508+ * This still isn't quite right. Better would be to pull apart
509+ * split_and_resolve and pass the address used to select the
510+ * device into mca_btl_tcp_create(). This is a cleanup of the
511+ * logic that's been in use for years, but the case it doesn't
512+ * cover is (say) only specifing mca_btl_if_include 10.0.0.0/16
513+ * when the interface has addresses of both 10.0.0.1 and 10.1.0.1;
514+ * there's absolutely nothing that keeps this code from picking
515+ * 10.1.0.1 as the one that is published in the modex and used for
516+ * connection.
517+ */
518+ for (i = opal_ifbegin () ; i >= 0 ; i = opal_ifnext (i )) {
519+ int ret ;
520+
521+ if (if_kindex != opal_ifindextokindex (i )) {
522+ continue ;
523+ }
524+
525+ ret = opal_ifindextoaddr (i , (struct sockaddr * )& addr ,
526+ sizeof (struct sockaddr_storage ));
527+ if (OPAL_SUCCESS != ret ) {
528+ return ret ;
529+ }
530+
531+ if (addr .ss_family == AF_INET &&
532+ 4 != mca_btl_tcp_component .tcp_disable_family ) {
533+ found = true;
534+ break ;
535+ } else if (addr .ss_family == AF_INET6 &&
536+ 6 != mca_btl_tcp_component .tcp_disable_family ) {
537+ found = true;
538+ break ;
539+ }
540+ }
541+ /* if we didn't find an address that works for us on this
542+ interface, just move on. */
543+ if (!found ) {
544+ return OPAL_SUCCESS ;
545+ }
498546
499547 for ( i = 0 ; i < (int )mca_btl_tcp_component .tcp_num_links ; i ++ ) {
500548 btl = (struct mca_btl_tcp_module_t * )malloc (sizeof (mca_btl_tcp_module_t ));
@@ -513,17 +561,8 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
513561 btl -> tcp_send_handler = 0 ;
514562#endif
515563
516- struct sockaddr_storage addr ;
517- opal_ifkindextoaddr (if_kindex , (struct sockaddr * ) & addr ,
518- sizeof (struct sockaddr_storage ));
519- #if OPAL_ENABLE_IPV6
520- if (addr .ss_family == AF_INET6 ) {
521- btl -> tcp_ifaddr_6 = addr ;
522- }
523- #endif
524- if (addr .ss_family == AF_INET ) {
525- btl -> tcp_ifaddr = addr ;
526- }
564+ memcpy (& btl -> tcp_ifaddr , & addr , sizeof (struct sockaddr_storage ));
565+
527566 /* allow user to specify interface bandwidth */
528567 sprintf (param , "bandwidth_%s" , if_name );
529568 mca_btl_tcp_param_register_uint (param , NULL , btl -> super .btl_bandwidth , OPAL_INFO_LVL_5 , & btl -> super .btl_bandwidth );
@@ -567,8 +606,8 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
567606 opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
568607 "btl:tcp: %p: if %s kidx %d cnt %i addr %s %s bw %d lt %d\n" ,
569608 (void * )btl , if_name , (int ) btl -> tcp_ifkindex , i ,
570- opal_net_get_hostname ((struct sockaddr * )& addr ),
571- (addr .ss_family == AF_INET ) ? "IPv4" : "IPv6" ,
609+ opal_net_get_hostname ((struct sockaddr * )& btl -> tcp_ifaddr ),
610+ (btl -> tcp_ifaddr .ss_family == AF_INET ) ? "IPv4" : "IPv6" ,
572611 btl -> super .btl_bandwidth , btl -> super .btl_latency );
573612 }
574613 return OPAL_SUCCESS ;
@@ -721,6 +760,7 @@ static int mca_btl_tcp_component_create_instances(void)
721760 */
722761 for (if_index = opal_ifbegin (); if_index >= 0 ; if_index = opal_ifnext (if_index )){
723762 int index = opal_ifindextokindex (if_index );
763+
724764 if (index > 0 ) {
725765 bool want_this_if = true;
726766
@@ -1121,97 +1161,64 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
11211161
11221162static int mca_btl_tcp_component_exchange (void )
11231163{
1124- int rc = 0 , index ;
1125- size_t i = 0 ;
1126- size_t size = mca_btl_tcp_component .tcp_addr_count *
1127- mca_btl_tcp_component .tcp_num_links * sizeof (mca_btl_tcp_addr_t );
1128- /* adi@2007-04-12:
1129- *
1130- * We'll need to explain things a bit here:
1131- * 1. We normally have as many BTLs as physical NICs.
1132- * 2. With num_links, we now have num_btl = num_links * #NICs
1133- * 3. we might have more than one address per NIC
1134- */
1135- size_t xfer_size = 0 ; /* real size to transfer (may differ from 'size') */
1136- size_t current_addr = 0 ;
1137-
1138- if (mca_btl_tcp_component .tcp_num_btls != 0 ) {
1139- char ifn [32 ];
1140- mca_btl_tcp_addr_t * addrs = (mca_btl_tcp_addr_t * )malloc (size );
1141- memset (addrs , 0 , size );
1142-
1143- /* here we start populating our addresses */
1144- for ( i = 0 ; i < mca_btl_tcp_component .tcp_num_btls ; i ++ ) {
1145- for (index = opal_ifbegin (); index >= 0 ;
1146- index = opal_ifnext (index )) {
1147- struct sockaddr_storage my_ss ;
1148-
1149- /* Look if the module's address belongs to this
1150- * kernel IP interface. If not, go to next address.
1151- */
1152- if (opal_ifindextokindex (index ) !=
1153- mca_btl_tcp_component .tcp_btls [i ]-> tcp_ifkindex ) {
1154- continue ;
1155- }
1156-
1157- opal_ifindextoname (index , ifn , sizeof (ifn ));
1158- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1159- "btl: tcp: component_exchange: examining interface %s" ,
1160- ifn );
1161- if (OPAL_SUCCESS !=
1162- opal_ifindextoaddr (index , (struct sockaddr * ) & my_ss ,
1163- sizeof (my_ss ))) {
1164- opal_output (0 ,
1165- "btl_tcp_component: problems getting address for index %i (kernel index %i)\n" ,
1166- index , opal_ifindextokindex (index ));
1167- continue ;
1168- }
1164+ int rc ;
1165+ size_t i ;
1166+ size_t num_btls = mca_btl_tcp_component .tcp_num_btls ;
1167+ size_t size = num_btls * sizeof (mca_btl_tcp_modex_addr_t );
1168+ mca_btl_tcp_modex_addr_t * addrs ;
1169+
1170+ if (num_btls <= 0 ) {
1171+ return 0 ;
1172+ }
1173+
1174+ addrs = (mca_btl_tcp_modex_addr_t * )malloc (size );
1175+ if (NULL == addrs ) {
1176+ return OPAL_ERR_OUT_OF_RESOURCE ;
1177+ }
1178+ memset (addrs , 0 , size );
1179+
1180+ for (i = 0 ; i < num_btls ; i ++ ) {
1181+ struct mca_btl_tcp_module_t * btl = mca_btl_tcp_component .tcp_btls [i ];
1182+ struct sockaddr * addr = (struct sockaddr * )& (btl -> tcp_ifaddr );
11691183
11701184#if OPAL_ENABLE_IPV6
1171- if ((AF_INET6 == my_ss .ss_family ) &&
1172- (6 != mca_btl_tcp_component .tcp_disable_family )) {
1173- memcpy (& addrs [current_addr ].addr_inet ,
1174- & ((struct sockaddr_in6 * )& my_ss )-> sin6_addr ,
1175- sizeof (addrs [0 ].addr_inet ));
1176- addrs [current_addr ].addr_port =
1177- mca_btl_tcp_component .tcp6_listen_port ;
1178- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1179- xfer_size += sizeof (mca_btl_tcp_addr_t );
1180- addrs [current_addr ].addr_inuse = 0 ;
1181- addrs [current_addr ].addr_ifkindex =
1182- opal_ifindextokindex (index );
1183- current_addr ++ ;
1184- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1185- "btl: tcp: component_exchange: "
1186- "%s IPv6 %s" , ifn ,
1187- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1188- } else
1185+ if (AF_INET6 == addr -> sa_family ) {
1186+ struct sockaddr_in6 * inaddr6 = (struct sockaddr_in6 * )addr ;
1187+
1188+ memcpy (& addrs [i ].addr , & (inaddr6 -> sin6_addr ),
1189+ sizeof (struct in6_addr ));
1190+ addrs [i ].addr_port = mca_btl_tcp_component .tcp6_listen_port ;
1191+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1192+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1193+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1194+ "btl: tcp: exchange: %d %d IPv6 %s" ,
1195+ (int )i , btl -> tcp_ifkindex ,
1196+ opal_net_get_hostname (addr ));
1197+ } else
11891198#endif
1190- if ((AF_INET == my_ss .ss_family ) &&
1191- (4 != mca_btl_tcp_component .tcp_disable_family )) {
1192- memcpy (& addrs [current_addr ].addr_inet ,
1193- & ((struct sockaddr_in * )& my_ss )-> sin_addr ,
1194- sizeof (struct in_addr ));
1195- addrs [current_addr ].addr_port =
1196- mca_btl_tcp_component .tcp_listen_port ;
1197- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET ;
1198- xfer_size += sizeof (mca_btl_tcp_addr_t );
1199- addrs [current_addr ].addr_inuse = 0 ;
1200- addrs [current_addr ].addr_ifkindex =
1201- opal_ifindextokindex (index );
1202- current_addr ++ ;
1203- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1204- "btl: tcp: component_exchange: "
1205- "%s IPv4 %s" , ifn ,
1206- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1207- }
1208- } /* end of for opal_ifbegin() */
1209- } /* end of for tcp_num_btls */
1210- OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1211- & mca_btl_tcp_component .super .btl_version ,
1212- addrs , xfer_size );
1213- free (addrs );
1214- } /* end if */
1199+ if (AF_INET == addr -> sa_family ) {
1200+ struct sockaddr_in * inaddr = (struct sockaddr_in * )addr ;
1201+
1202+ memcpy (& addrs [i ].addr , & (inaddr -> sin_addr ),
1203+ sizeof (struct in_addr ));
1204+ addrs [i ].addr_port = mca_btl_tcp_component .tcp_listen_port ;
1205+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1206+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET ;
1207+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1208+ "btl: tcp: exchange: %d %d IPv4 %s" ,
1209+ (int )i , btl -> tcp_ifkindex ,
1210+ opal_net_get_hostname (addr ));
1211+ } else {
1212+ BTL_ERROR (("Unexpected address family: %d" , addr -> sa_family ));
1213+ return OPAL_ERR_BAD_PARAM ;
1214+ }
1215+ }
1216+
1217+ OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1218+ & mca_btl_tcp_component .super .btl_version ,
1219+ addrs , size );
1220+ free (addrs );
1221+
12151222 return rc ;
12161223}
12171224
0 commit comments