@@ -490,11 +490,59 @@ static int mca_btl_tcp_component_close(void)
490490 * Create a btl instance and add to modules list.
491491 */
492492
493- static int mca_btl_tcp_create (int if_kindex , const char * if_name )
493+ static int mca_btl_tcp_create (const int if_kindex , const char * if_name )
494494{
495495 struct mca_btl_tcp_module_t * btl ;
496496 char param [256 ];
497497 int i ;
498+ struct sockaddr_storage addr ;
499+ bool found = false;
500+
501+ /*
502+ * Look for an address on the given device (ie, kindex) which
503+ * isn't disabled by the disable_family option. If there isn't
504+ * one, skip creating the modules for this interface. We store
505+ * the address on the module both to publish in the modex and to
506+ * use as the source address of all packets sent by this module.
507+ *
508+ * This still isn't quite right. Better would be to pull apart
509+ * split_and_resolve and pass the address used to select the
510+ * device into mca_btl_tcp_create(). This is a cleanup of the
511+ * logic that's been in use for years, but the case it doesn't
512+ * cover is (say) only specifing mca_btl_if_include 10.0.0.0/16
513+ * when the interface has addresses of both 10.0.0.1 and 10.1.0.1;
514+ * there's absolutely nothing that keeps this code from picking
515+ * 10.1.0.1 as the one that is published in the modex and used for
516+ * connection.
517+ */
518+ for (i = opal_ifbegin () ; i >= 0 ; i = opal_ifnext (i )) {
519+ int ret ;
520+
521+ if (if_kindex != opal_ifindextokindex (i )) {
522+ continue ;
523+ }
524+
525+ ret = opal_ifindextoaddr (i , (struct sockaddr * )& addr ,
526+ sizeof (struct sockaddr_storage ));
527+ if (OPAL_SUCCESS != ret ) {
528+ return ret ;
529+ }
530+
531+ if (addr .ss_family == AF_INET &&
532+ 4 != mca_btl_tcp_component .tcp_disable_family ) {
533+ found = true;
534+ break ;
535+ } else if (addr .ss_family == AF_INET6 &&
536+ 6 != mca_btl_tcp_component .tcp_disable_family ) {
537+ found = true;
538+ break ;
539+ }
540+ }
541+ /* if we didn't find an address that works for us on this
542+ interface, just move on. */
543+ if (!found ) {
544+ return OPAL_SUCCESS ;
545+ }
498546
499547 for ( i = 0 ; i < (int )mca_btl_tcp_component .tcp_num_links ; i ++ ) {
500548 btl = (struct mca_btl_tcp_module_t * )malloc (sizeof (mca_btl_tcp_module_t ));
@@ -513,11 +561,7 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
513561 btl -> tcp_send_handler = 0 ;
514562#endif
515563
516- /* save the address associated with this kindex in the module
517- to publish in the modex and use as the source of all packets
518- from this module */
519- opal_ifkindextoaddr (if_kindex , (struct sockaddr * )& btl -> tcp_ifaddr ,
520- sizeof (struct sockaddr_storage ));
564+ memcpy (& btl -> tcp_ifaddr , & addr , sizeof (struct sockaddr_storage ));
521565
522566 /* allow user to specify interface bandwidth */
523567 sprintf (param , "bandwidth_%s" , if_name );
@@ -1117,97 +1161,64 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
11171161
11181162static int mca_btl_tcp_component_exchange (void )
11191163{
1120- int rc = 0 , index ;
1121- size_t i = 0 ;
1122- size_t size = mca_btl_tcp_component .tcp_addr_count *
1123- mca_btl_tcp_component .tcp_num_links * sizeof (mca_btl_tcp_addr_t );
1124- /* adi@2007-04-12:
1125- *
1126- * We'll need to explain things a bit here:
1127- * 1. We normally have as many BTLs as physical NICs.
1128- * 2. With num_links, we now have num_btl = num_links * #NICs
1129- * 3. we might have more than one address per NIC
1130- */
1131- size_t xfer_size = 0 ; /* real size to transfer (may differ from 'size') */
1132- size_t current_addr = 0 ;
1133-
1134- if (mca_btl_tcp_component .tcp_num_btls != 0 ) {
1135- char ifn [32 ];
1136- mca_btl_tcp_addr_t * addrs = (mca_btl_tcp_addr_t * )malloc (size );
1137- memset (addrs , 0 , size );
1138-
1139- /* here we start populating our addresses */
1140- for ( i = 0 ; i < mca_btl_tcp_component .tcp_num_btls ; i ++ ) {
1141- for (index = opal_ifbegin (); index >= 0 ;
1142- index = opal_ifnext (index )) {
1143- struct sockaddr_storage my_ss ;
1144-
1145- /* Look if the module's address belongs to this
1146- * kernel IP interface. If not, go to next address.
1147- */
1148- if (opal_ifindextokindex (index ) !=
1149- mca_btl_tcp_component .tcp_btls [i ]-> tcp_ifkindex ) {
1150- continue ;
1151- }
1152-
1153- opal_ifindextoname (index , ifn , sizeof (ifn ));
1154- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1155- "btl: tcp: component_exchange: examining interface %s" ,
1156- ifn );
1157- if (OPAL_SUCCESS !=
1158- opal_ifindextoaddr (index , (struct sockaddr * ) & my_ss ,
1159- sizeof (my_ss ))) {
1160- opal_output (0 ,
1161- "btl_tcp_component: problems getting address for index %i (kernel index %i)\n" ,
1162- index , opal_ifindextokindex (index ));
1163- continue ;
1164- }
1164+ int rc ;
1165+ size_t i ;
1166+ size_t num_btls = mca_btl_tcp_component .tcp_num_btls ;
1167+ size_t size = num_btls * sizeof (mca_btl_tcp_modex_addr_t );
1168+ mca_btl_tcp_modex_addr_t * addrs ;
1169+
1170+ if (num_btls <= 0 ) {
1171+ return 0 ;
1172+ }
1173+
1174+ addrs = (mca_btl_tcp_modex_addr_t * )malloc (size );
1175+ if (NULL == addrs ) {
1176+ return OPAL_ERR_OUT_OF_RESOURCE ;
1177+ }
1178+ memset (addrs , 0 , size );
1179+
1180+ for (i = 0 ; i < num_btls ; i ++ ) {
1181+ struct mca_btl_tcp_module_t * btl = mca_btl_tcp_component .tcp_btls [i ];
1182+ struct sockaddr * addr = (struct sockaddr * )& (btl -> tcp_ifaddr );
11651183
11661184#if OPAL_ENABLE_IPV6
1167- if ((AF_INET6 == my_ss .ss_family ) &&
1168- (6 != mca_btl_tcp_component .tcp_disable_family )) {
1169- memcpy (& addrs [current_addr ].addr_inet ,
1170- & ((struct sockaddr_in6 * )& my_ss )-> sin6_addr ,
1171- sizeof (addrs [0 ].addr_inet ));
1172- addrs [current_addr ].addr_port =
1173- mca_btl_tcp_component .tcp6_listen_port ;
1174- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1175- xfer_size += sizeof (mca_btl_tcp_addr_t );
1176- addrs [current_addr ].addr_inuse = 0 ;
1177- addrs [current_addr ].addr_ifkindex =
1178- opal_ifindextokindex (index );
1179- current_addr ++ ;
1180- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1181- "btl: tcp: component_exchange: "
1182- "%s IPv6 %s" , ifn ,
1183- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1184- } else
1185+ if (AF_INET6 == addr -> sa_family ) {
1186+ struct sockaddr_in6 * inaddr6 = (struct sockaddr_in6 * )addr ;
1187+
1188+ memcpy (& addrs [i ].addr , & (inaddr6 -> sin6_addr ),
1189+ sizeof (struct in6_addr ));
1190+ addrs [i ].addr_port = mca_btl_tcp_component .tcp6_listen_port ;
1191+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1192+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1193+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1194+ "btl: tcp: exchange: %d %d IPv6 %s" ,
1195+ (int )i , btl -> tcp_ifkindex ,
1196+ opal_net_get_hostname (addr ));
1197+ } else
11851198#endif
1186- if ((AF_INET == my_ss .ss_family ) &&
1187- (4 != mca_btl_tcp_component .tcp_disable_family )) {
1188- memcpy (& addrs [current_addr ].addr_inet ,
1189- & ((struct sockaddr_in * )& my_ss )-> sin_addr ,
1190- sizeof (struct in_addr ));
1191- addrs [current_addr ].addr_port =
1192- mca_btl_tcp_component .tcp_listen_port ;
1193- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET ;
1194- xfer_size += sizeof (mca_btl_tcp_addr_t );
1195- addrs [current_addr ].addr_inuse = 0 ;
1196- addrs [current_addr ].addr_ifkindex =
1197- opal_ifindextokindex (index );
1198- current_addr ++ ;
1199- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1200- "btl: tcp: component_exchange: "
1201- "%s IPv4 %s" , ifn ,
1202- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1203- }
1204- } /* end of for opal_ifbegin() */
1205- } /* end of for tcp_num_btls */
1206- OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1207- & mca_btl_tcp_component .super .btl_version ,
1208- addrs , xfer_size );
1209- free (addrs );
1210- } /* end if */
1199+ if (AF_INET == addr -> sa_family ) {
1200+ struct sockaddr_in * inaddr = (struct sockaddr_in * )addr ;
1201+
1202+ memcpy (& addrs [i ].addr , & (inaddr -> sin_addr ),
1203+ sizeof (struct in_addr ));
1204+ addrs [i ].addr_port = mca_btl_tcp_component .tcp_listen_port ;
1205+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1206+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET ;
1207+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1208+ "btl: tcp: exchange: %d %d IPv4 %s" ,
1209+ (int )i , btl -> tcp_ifkindex ,
1210+ opal_net_get_hostname (addr ));
1211+ } else {
1212+ BTL_ERROR (("Unexpected address family: %d" , addr -> sa_family ));
1213+ return OPAL_ERR_BAD_PARAM ;
1214+ }
1215+ }
1216+
1217+ OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1218+ & mca_btl_tcp_component .super .btl_version ,
1219+ addrs , size );
1220+ free (addrs );
1221+
12111222 return rc ;
12121223}
12131224
0 commit comments