diff --git a/.gitignore b/.gitignore index ad498753498..2d91c9003b9 100644 --- a/.gitignore +++ b/.gitignore @@ -682,3 +682,6 @@ test/util/opal_path_nfs test/util/opal_path_nfs.out test/util/opal_bit_ops test/util/bipartite_graph + +opal/test/reachable/reachable_netlink +opal/test/reachable/reachable_weighted diff --git a/opal/mca/reachable/base/Makefile.am b/opal/mca/reachable/base/Makefile.am index 9214aae6814..fb72725e926 100644 --- a/opal/mca/reachable/base/Makefile.am +++ b/opal/mca/reachable/base/Makefile.am @@ -14,4 +14,5 @@ headers += \ libmca_reachable_la_SOURCES += \ base/reachable_base_frame.c \ - base/reachable_base_select.c + base/reachable_base_select.c \ + base/reachable_base_alloc.c diff --git a/opal/mca/reachable/base/base.h b/opal/mca/reachable/base/base.h index ed737e7841d..6ab36d5b62f 100644 --- a/opal/mca/reachable/base/base.h +++ b/opal/mca/reachable/base/base.h @@ -29,6 +29,10 @@ OPAL_DECLSPEC extern mca_base_framework_t opal_reachable_base_framework; */ OPAL_DECLSPEC int opal_reachable_base_select(void); +OPAL_DECLSPEC opal_reachable_t * opal_reachable_allocate(unsigned int num_local, + unsigned int num_remote); + + END_C_DECLS #endif diff --git a/opal/mca/reachable/base/owner.txt b/opal/mca/reachable/base/owner.txt index 5361011bda4..786349a14c0 100644 --- a/opal/mca/reachable/base/owner.txt +++ b/opal/mca/reachable/base/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL -status: unmaintained +owner: AMAZON +status: active diff --git a/opal/mca/reachable/base/reachable_base_alloc.c b/opal/mca/reachable/base/reachable_base_alloc.c new file mode 100644 index 00000000000..faec53ab20b --- /dev/null +++ b/opal/mca/reachable/base/reachable_base_alloc.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/class/opal_object.h" + +#include "opal/mca/reachable/reachable.h" +#include "opal/mca/reachable/base/base.h" + + +static void opal_reachable_construct(opal_reachable_t *reachable) +{ + reachable->weights = NULL; +} + + +static void opal_reachable_destruct(opal_reachable_t * reachable) +{ + if (NULL != reachable->memory) { + free(reachable->memory); + } +} + + +opal_reachable_t * opal_reachable_allocate(unsigned int num_local, + unsigned int num_remote) +{ + char *memory; + unsigned int i; + opal_reachable_t *reachable = OBJ_NEW(opal_reachable_t); + + reachable->num_local = num_local; + reachable->num_remote = num_remote; + + /* allocate all the pieces of the two dimensional array in one + malloc, rather than a bunch of little allocations */ + memory = malloc(sizeof(int*) * num_local + + num_local * (sizeof(int) * num_remote)); + if (memory == NULL) return NULL; + + reachable->memory = (void*)memory; + reachable->weights = (int**)reachable->memory; + memory += (sizeof(int*) * num_local); + + for (i = 0; i < num_local; i++) { + reachable->weights[i] = (int*)memory; + memory += (sizeof(int) * num_remote); + } + + return reachable; +} + +OBJ_CLASS_INSTANCE( + opal_reachable_t, + opal_object_t, + opal_reachable_construct, + opal_reachable_destruct +); diff --git a/opal/mca/reachable/netlink/Makefile.am b/opal/mca/reachable/netlink/Makefile.am index 02d7cb28003..77b96f729fe 100644 --- a/opal/mca/reachable/netlink/Makefile.am +++ b/opal/mca/reachable/netlink/Makefile.am @@ -14,7 +14,6 @@ sources = \ reachable_netlink.h \ reachable_netlink_component.c \ reachable_netlink_module.c \ - libnl1_utils.h \ libnl3_utils.h \ libnl_utils.h \ reachable_netlink_utils_common.c diff --git a/opal/mca/reachable/netlink/configure.m4 b/opal/mca/reachable/netlink/configure.m4 index 163095659e3..15a996a4e97 100644 --- a/opal/mca/reachable/netlink/configure.m4 +++ b/opal/mca/reachable/netlink/configure.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -10,87 +12,6 @@ # $HEADER$ # -dnl -dnl Portions of this software copied from libfabric -dnl (https://github.com/ofiwg/libfabric) -dnl - -dnl BSD license -dnl -dnl Redistribution and use in source and binary forms, with or without -dnl modification, are permitted provided that the following conditions -dnl are met: -dnl -dnl * Redistributions of source code must retain the above copyright -dnl notice, this list of conditions and the following disclaimer. -dnl -dnl * Redistributions in binary form must reproduce the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer in the documentation and/or other materials provided -dnl with the distribution. -dnl -dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -dnl "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -dnl LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -dnl FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -dnl COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -dnl INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -dnl BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -dnl CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -dnl LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -dnl ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -dnl POSSIBILITY OF SUCH DAMAGE. - -dnl Check for libnl; prefer version 3 instead of version 1. Abort (i.e., -dnl AC_MSG_ERROR) if neither libnl v1 or v3 can be found. -dnl -dnl Outputs: -dnl -dnl - Set $1 to the CPPFLAGS necessary to compile with libnl -dnl - Set $2 to the LIBS necessary to link with libnl -dnl - If $3 is 1, AC_MSG_ERROR (i.e., abort) if neither libnl or -dnl libnl3 can be found -dnl - Set OPAL_HAVE_LIBNL3 to 1 if libnl v3 will be used; 0 if libnl v1 will be used -dnl - AC_SUBST $OPAL_HAVE_LIBNL3 -dnl - AC_DEFINE OPAL_HAVE_LIBNL3 -dnl -dnl -------------------------------------------------------- -AC_DEFUN([OPAL_REACHABLE_NETLINK_CHECK_LIBNL_Vx],[ - - # Default to a numeric value (this value gets AC_DEFINEd) - OPAL_HAVE_LIBNL3=0 - - ################################################### - # NOTE: We *must* check for libnl3 before libnl. - ################################################### - - AS_IF([test $opal_libnl_version -ne 1], - [OPAL_CHECK_LIBNL_V3([$opal_libnl_location], [opal_reachable_netlink])]) - AS_IF([test $opal_libnl_version -ne 3 && - test -z "$opal_reachable_netlink_LIBS"], - [OPAL_CHECK_LIBNL_V1([$opal_libnl_location], [opal_reachable_netlink])]) - - AS_IF([test "$opal_want_libnl" = "yes" && - test "$opal_reachable_netlink_LIBS" = ""], - [AC_MSG_WARN([--with-libnl specified, but not found]) - AC_MSG_ERROR([Cannot continue])]) - - # Final result - AC_SUBST([OPAL_HAVE_LIBNL3]) - AC_DEFINE_UNQUOTED([OPAL_HAVE_LIBNL3], [$OPAL_HAVE_LIBNL3], - [Whether we have libl v1 or libnl v3]) - - AC_SUBST([opal_reachable_netlink_CPPFLAGS]) - AC_SUBST([opal_reachable_netlink_LDFLAGS]) - AC_SUBST([opal_reachable_netlink_LIBS]) - - AS_IF([test "$opal_reachable_netlink_LIBS" = ""], - [opal_reachable_netlink_happy=0]) -]) - -dnl ============================================================== - # MCA_opal_reachable_netlink_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ @@ -106,12 +27,26 @@ AC_DEFUN([MCA_opal_reachable_netlink_CONFIG],[ #include ]) - AS_IF([test $opal_reachable_netlink_happy -eq 1], - [OPAL_REACHABLE_NETLINK_CHECK_LIBNL_Vx]) + # this is terrible, but libnl-1 and libnl-3 are incompatible in + # weird ways, and once there are libraries in LIBS for one, the + # other is hard to get right. So if someone has already decided + # we have libnl version 1, get out. Otherwise, see if we have + # libnl-3, which is the only version supported by the netlink + # component. + AS_IF([test $opal_libnl_version -eq 1], + [opal_reachable_netlink_happy=0], + [OPAL_CHECK_LIBNL_V3([$opal_libnl_location], + [opal_reachable_netlink]) + AS_IF([test "$OPAL_HAVE_LIBNL3" != "1"], + [opal_reachable_netlink_happy=0])]) AS_IF([test $opal_reachable_netlink_happy -eq 1], [$1], [$2]) + AC_SUBST([opal_reachable_netlink_CPPFLAGS]) + AC_SUBST([opal_reachable_netlink_LDFLAGS]) + AC_SUBST([opal_reachable_netlink_LIBS]) + OPAL_VAR_SCOPE_POP() ]) diff --git a/opal/mca/reachable/netlink/libnl1_utils.h b/opal/mca/reachable/netlink/libnl1_utils.h deleted file mode 100644 index 6665c587115..00000000000 --- a/opal/mca/reachable/netlink/libnl1_utils.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * Portions of this software copied from libfabric - * (https://github.com/ofiwg/libfabric) - * - * LICENSE_BEGIN - * - * BSD license: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef LIBNL1_UTILS_H -#define LIBNL1_UTILS_H - -#include -#include -#include -#include -#include - -typedef struct nl_handle NL_HANDLE; - -#define NLMSG_SIZE(size) nlmsg_msg_size(size) -#define NL_GETERROR(err) nl_geterror() -#define NL_HANDLE_ALLOC nl_handle_alloc -#define NL_HANDLE_FREE nl_handle_destroy -#define NL_DISABLE_SEQ_CHECK nl_disable_sequence_check -#define INC_CB_MSGCNT(arg) \ - do { \ - arg->msg_cnt++; \ - } while (0) - -/* - * the return value of nl_recvmsgs_default does not tell - * whether it returns because of successful read or socket - * timeout. This is a limitation in libnl1. So we compare - * message count before and after the call to decide if there - * is no new message arriving. In this case, this function - * needs to terminate to prevent the caller from - * blocking forever. - * NL_CB_MSG_IN traps every received message, so - * there should be no premature exit - */ -#define NL_RECVMSGS(nlh, cb_arg, rc, err, out) \ - do { \ - int msg_cnt = cb_arg.msg_cnt; \ - err = nl_recvmsgs_default(nlh); \ - if (err < 0) { \ - opal_output(0, "Failed to receive netlink reply message, error %s\n", \ - NL_GETERROR(err)); \ - goto out; \ - } \ - if (msg_cnt == cb_arg.msg_cnt) {\ - err = rc; \ - goto out; \ - } \ - } while (0) - -struct usnic_rt_cb_arg { - uint32_t nh_addr; - int oif; - int found; - int msg_cnt; - struct usnic_nl_sk *unlsk; -}; - -#endif /* LIBNL1_UTILS_H */ diff --git a/opal/mca/reachable/netlink/libnl3_utils.h b/opal/mca/reachable/netlink/libnl3_utils.h index ea99c88fc1c..3668685824f 100644 --- a/opal/mca/reachable/netlink/libnl3_utils.h +++ b/opal/mca/reachable/netlink/libnl3_utils.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * * Portions of this software copied from libfabric * (https://github.com/ofiwg/libfabric) @@ -69,12 +71,12 @@ typedef struct nl_sock NL_HANDLE; } \ } while (0) -struct usnic_rt_cb_arg { - uint32_t nh_addr; +struct opal_reachable_netlink_rt_cb_arg { int oif; int found; + int has_gateway; int replied; - struct usnic_nl_sk *unlsk; + struct opal_reachable_netlink_sk *unlsk; }; #endif /* LIBNL3_UTILS_H */ diff --git a/opal/mca/reachable/netlink/libnl_utils.h b/opal/mca/reachable/netlink/libnl_utils.h index 3e3abbcabff..6a7c7cc5538 100644 --- a/opal/mca/reachable/netlink/libnl_utils.h +++ b/opal/mca/reachable/netlink/libnl_utils.h @@ -2,6 +2,8 @@ * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * * Portions of this software copied from libfabric * (https://github.com/ofiwg/libfabric) @@ -44,21 +46,33 @@ #ifndef LIBNL_UTILS_H #define LIBNL_UTILS_H -#if !defined (OPAL_HAVE_LIBNL3) -#error You must define OPAL_HAVE_LIBNL3 to 0 or 1 before including libnl_utils.h -#elif OPAL_HAVE_LIBNL3 #include "libnl3_utils.h" -#else -#include "libnl1_utils.h" -#endif -struct usnic_nl_sk { +struct opal_reachable_netlink_sk { NL_HANDLE *nlh; uint32_t seq; }; -int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, - uint32_t dst_addr, int oif, - uint32_t *nh_addr); +/* returns 0 if host is reachable, EHOSTUNREACH if the host + * is not reachable, non-zero in other errors. + * + * If the route to the destination is through a gateway, *has_gateway + * is set to 1. Otherwise, it is set to 0. + */ +int opal_reachable_netlink_rt_lookup(uint32_t src_addr, + uint32_t dst_addr, int oif, + int *has_gateway); + +#if OPAL_ENABLE_IPV6 +/* returns 0 if host is reachable, EHOSTUNREACH if the host + * is not reachable, non-zero in other errors. + * + * If the route to the destination is through a gateway, *has_gateway + * is set to 1. Otherwise, it is set to 0. + */ +int opal_reachable_netlink_rt_lookup6(struct in6_addr *src_addr, + struct in6_addr *dst_addr, int oif, + int *has_gateway); +#endif #endif /* LIBNL_UTILS_H */ diff --git a/opal/mca/reachable/netlink/owner.txt b/opal/mca/reachable/netlink/owner.txt index 5361011bda4..786349a14c0 100644 --- a/opal/mca/reachable/netlink/owner.txt +++ b/opal/mca/reachable/netlink/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL -status: unmaintained +owner: AMAZON +status: active diff --git a/opal/mca/reachable/netlink/reachable_netlink_module.c b/opal/mca/reachable/netlink/reachable_netlink_module.c index 60c8e075aee..3bb82049a8c 100644 --- a/opal/mca/reachable/netlink/reachable_netlink_module.c +++ b/opal/mca/reachable/netlink/reachable_netlink_module.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems. All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,12 +15,27 @@ #include "opal/constants.h" #include "opal/types.h" +#ifdef HAVE_MATH_H +#include +#endif + +#include "opal/util/net.h" #include "opal/mca/reachable/base/base.h" #include "reachable_netlink.h" +#include "libnl_utils.h" + +enum connection_quality { + CQ_NO_CONNECTION = 0, + CQ_DIFFERENT_NETWORK = 50, + CQ_SAME_NETWORK = 100 +}; /* Local variables */ static int init_counter = 0; +static int get_weights(opal_if_t *local_if, opal_if_t *remote_if); +static int calculate_weight(int bandwidth_local, int bandwidth_remote, + int connection_quality); static int netlink_init(void) { @@ -34,15 +51,168 @@ static int netlink_fini(void) return OPAL_SUCCESS; } -static opal_if_t* netlink_reachable(opal_list_t *local_if, - opal_list_t *remote_if) +/* + * Determines whether a connection is possible between + * pairs of local and remote interfaces. To determine + * reachability, the kernel's routing table is queried. + * Higher weightings are given to connections on the same + * network. + */ +static opal_reachable_t* netlink_reachable(opal_list_t *local_if, + opal_list_t *remote_if) { - /* JMS Fill me in */ - return NULL; + opal_reachable_t *reachable_results = NULL; + int i, j; + opal_if_t *local_iter, *remote_iter; + + reachable_results = opal_reachable_allocate(local_if->opal_list_length, + remote_if->opal_list_length); + if (NULL == reachable_results) { + return NULL; + } + + i = 0; + OPAL_LIST_FOREACH(local_iter, local_if, opal_if_t) { + j = 0; + OPAL_LIST_FOREACH(remote_iter, remote_if, opal_if_t) { + reachable_results->weights[i][j] = get_weights(local_iter, remote_iter); + j++; + } + i++; + } + + return reachable_results; +} + + +static int get_weights(opal_if_t *local_if, opal_if_t *remote_if) +{ + char str_local[128], str_remote[128], *conn_type; + int outgoing_interface, ret, weight, has_gateway; + + /* opal_net_get_hostname returns a static buffer. Great for + single address printfs, need to copy in this case */ + strncpy(str_local, + opal_net_get_hostname((struct sockaddr *)&local_if->if_addr), + sizeof(str_local)); + strncpy(str_remote, + opal_net_get_hostname((struct sockaddr *)&remote_if->if_addr), + sizeof(str_remote)); + + /* initially, assume no connection is possible */ + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); + + if (AF_INET == local_if->af_family && AF_INET == remote_if->af_family) { + uint32_t local_ip, remote_ip; + + local_ip = (uint32_t)((struct sockaddr_in *)&(local_if->if_addr))->sin_addr.s_addr; + remote_ip = (uint32_t)((struct sockaddr_in *)&(remote_if->if_addr))->sin_addr.s_addr; + outgoing_interface = local_if->if_kernel_index; + + ret = opal_reachable_netlink_rt_lookup(local_ip, + remote_ip, + outgoing_interface, + &has_gateway); + if (0 == ret) { + if (0 == has_gateway) { + conn_type = "IPv4 SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_SAME_NETWORK); + } else { + conn_type = "IPv4 DIFFERENT NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_DIFFERENT_NETWORK); + } + } else { + conn_type = "IPv4 NO CONNECTION"; + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); + } + +#if OPAL_ENABLE_IPV6 + } else if (AF_INET6 == local_if->af_family && AF_INET6 == remote_if->af_family) { + struct in6_addr *local_ip, *remote_ip; + + local_ip = &((struct sockaddr_in6 *)&(local_if->if_addr))->sin6_addr; + remote_ip = &((struct sockaddr_in6 *)&(remote_if->if_addr))->sin6_addr; + outgoing_interface = local_if->if_kernel_index; + + ret = opal_reachable_netlink_rt_lookup6(local_ip, + remote_ip, + outgoing_interface, + &has_gateway); + + if (0 == ret) { + if (0 == has_gateway) { + conn_type = "IPv6 SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_SAME_NETWORK); + } else { + conn_type = "IPv6 DIFFERENT NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_DIFFERENT_NETWORK); + } + } else { + conn_type = "IPv6 NO CONNECTION"; + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); + } +#endif /* #if OPAL_ENABLE_IPV6 */ + + } else { + /* we don't have an address family match, so assume no + connection */ + conn_type = "Address type mismatch"; + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); + } + + opal_output_verbose(20, opal_reachable_base_framework.framework_output, + "reachable:netlink: path from %s to %s: %s", + str_local, str_remote, conn_type); + + return weight; } + const opal_reachable_base_module_t opal_reachable_netlink_module = { netlink_init, netlink_fini, netlink_reachable }; + + +/* + * Weights determined by bandwidth between + * interfaces (limited by lower bandwidth + * interface). A penalty is added to minimize + * the discrepancy in bandwidth. This helps + * prevent pairing of fast and slow interfaces + * + * Formula: connection_quality * (min(a,b) + 1/(1 + |a-b|)) + * + * Examples: a b f(a,b) + * 0 0 1 + * 0 1 0.5 + * 1 1 2 + * 1 2 1.5 + * 1 3 1.33 + * 1 10 1.1 + * 10 10 11 + * 10 14 10.2 + * 11 14 11.25 + * 11 15 11.2 + * + * NOTE: connection_quality of 1 is assumed for examples. + * In reality, since we're using integers, we need + * connection_quality to be large enough + * to capture decimals + */ +static int calculate_weight(int bandwidth_local, int bandwidth_remote, + int connection_quality) +{ + int weight = connection_quality * (MIN(bandwidth_local, bandwidth_remote) + + 1.0/(1.0 + (double)abs(bandwidth_local - bandwidth_remote))); + return weight; +} diff --git a/opal/mca/reachable/netlink/reachable_netlink_utils_common.c b/opal/mca/reachable/netlink/reachable_netlink_utils_common.c index 81abe44e204..9422c22d180 100644 --- a/opal/mca/reachable/netlink/reachable_netlink_utils_common.c +++ b/opal/mca/reachable/netlink/reachable_netlink_utils_common.c @@ -1,6 +1,7 @@ /* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * Portions of this software copied from libfabric * (https://github.com/ofiwg/libfabric) * @@ -44,6 +45,9 @@ #include #include #include +#ifdef HAVE_NETINET_IN_H +#include +#endif #include "libnl_utils.h" @@ -63,8 +67,8 @@ static struct nla_policy route_policy[RTA_MAX+1] = { [RTA_MULTIPATH] = { .type = NLA_NESTED }, }; -static int usnic_is_nlreply_expected(struct usnic_nl_sk *unlsk, - struct nlmsghdr *nlm_hdr) +static int opal_reachable_netlink_is_nlreply_expected(struct opal_reachable_netlink_sk *unlsk, + struct nlmsghdr *nlm_hdr) { #if OPAL_ENABLE_DEBUG if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh) @@ -80,25 +84,25 @@ static int usnic_is_nlreply_expected(struct usnic_nl_sk *unlsk, return 1; } -static int usnic_is_nlreply_err(struct nlmsghdr *nlm_hdr) +static int opal_reachable_netlink_is_nlreply_err(struct nlmsghdr *nlm_hdr) { if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e))) - opal_output(0, - "Received a netlink error message"); + opal_output_verbose(20, 0, + "Received a netlink error message"); else - opal_output(0, - "Received a truncated netlink error message\n"); + opal_output_verbose(20, 0, + "Received a truncated netlink error message\n"); return 1; } return 0; } -static int usnic_nl_send_query(struct usnic_nl_sk *unlsk, - struct nl_msg *msg, - int protocol, int flag) +static int opal_reachable_netlink_send_query(struct opal_reachable_netlink_sk *unlsk, + struct nl_msg *msg, + int protocol, int flag) { struct nlmsghdr *nlhdr; @@ -111,7 +115,7 @@ static int usnic_nl_send_query(struct usnic_nl_sk *unlsk, return nl_send(unlsk->nlh, msg); } -static int usnic_nl_set_rcvsk_timer(NL_HANDLE *nlh) +static int opal_reachable_netlink_set_rcvsk_timer(NL_HANDLE *nlh) { int err = 0; struct timeval timeout; @@ -129,15 +133,15 @@ static int usnic_nl_set_rcvsk_timer(NL_HANDLE *nlh) return err; } -static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) +static int opal_reachable_netlink_sk_alloc(struct opal_reachable_netlink_sk **p_sk, int protocol) { - struct usnic_nl_sk *unlsk; + struct opal_reachable_netlink_sk *unlsk; NL_HANDLE *nlh; int err; unlsk = calloc(1, sizeof(*unlsk)); if (!unlsk) { - opal_output(0, "Failed to allocate usnic_nl_sk struct\n"); + opal_output(0, "Failed to allocate opal_reachable_netlink_sk struct\n"); return ENOMEM; } @@ -157,7 +161,7 @@ static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) } NL_DISABLE_SEQ_CHECK(nlh); - err = usnic_nl_set_rcvsk_timer(nlh); + err = opal_reachable_netlink_set_rcvsk_timer(nlh); if (err < 0) goto err_close_nlh; @@ -175,17 +179,17 @@ static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) return err; } -static void usnic_nl_sk_free(struct usnic_nl_sk *unlsk) +static void opal_reachable_netlink_sk_free(struct opal_reachable_netlink_sk *unlsk) { nl_close(unlsk->nlh); NL_HANDLE_FREE(unlsk->nlh); free(unlsk); } -static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) +static int opal_reachable_netlink_rt_raw_parse_cb(struct nl_msg *msg, void *arg) { - struct usnic_rt_cb_arg *lookup_arg = (struct usnic_rt_cb_arg *)arg; - struct usnic_nl_sk *unlsk = lookup_arg->unlsk; + struct opal_reachable_netlink_rt_cb_arg *lookup_arg = (struct opal_reachable_netlink_rt_cb_arg *)arg; + struct opal_reachable_netlink_sk *unlsk = lookup_arg->unlsk; struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX + 1]; @@ -194,14 +198,14 @@ static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) INC_CB_MSGCNT(lookup_arg); - if (!usnic_is_nlreply_expected(unlsk, nlm_hdr)) { + if (!opal_reachable_netlink_is_nlreply_expected(unlsk, nlm_hdr)) { #if OPAL_ENABLE_DEBUG nl_msg_dump(msg, stderr); #endif return NL_SKIP; } - if (usnic_is_nlreply_err(nlm_hdr)) { + if (opal_reachable_netlink_is_nlreply_err(nlm_hdr)) { #if OPAL_ENABLE_DEBUG nl_msg_dump(msg, stderr); #endif @@ -220,7 +224,11 @@ static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) } rtm = nlmsg_data(nlm_hdr); - if (rtm->rtm_family != AF_INET) { + if (rtm->rtm_family != AF_INET +#if OPAL_ENABLE_IPV6 + && rtm->rtm_family != AF_INET6 +#endif + ) { #if OPAL_ENABLE_DEBUG opal_output(0, "RTM message contains invalid AF family: %u\n", rtm->rtm_family); @@ -243,38 +251,45 @@ static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif) found = 1; else - opal_output(0, "Retrieved route has a different outgoing interface %d (expected %d)\n", - nla_get_u32(tb[RTA_OIF]), - lookup_arg->oif); + /* usually, this means that there is a route to the remote + host, but that it's not through the given interface. For + our purposes, that means it's not reachable. */ + opal_output_verbose(20, 0, "Retrieved route has a different outgoing interface %d (expected %d)\n", + nla_get_u32(tb[RTA_OIF]), + lookup_arg->oif); } - if (found && tb[RTA_GATEWAY]) - lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]); - + if (found && tb[RTA_GATEWAY]) { + lookup_arg->has_gateway = 1; + } lookup_arg->found = found; return NL_STOP; } -int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, - uint32_t dst_addr, int oif, - uint32_t *nh_addr) +int opal_reachable_netlink_rt_lookup(uint32_t src_addr, + uint32_t dst_addr, + int outgoing_interface, + int *has_gateway) { - struct usnic_nl_sk *unlsk; - struct nl_msg *nlm; - struct rtmsg rmsg; - struct usnic_rt_cb_arg arg; - int err; + struct opal_reachable_netlink_sk *unlsk; /* netlink socket */ + struct nl_msg *nlm; /* netlink message */ + struct rtmsg rmsg; /* route message */ + struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */ + int err; + /* allocate netlink socket */ unlsk = NULL; - err = usnic_nl_sk_alloc(&unlsk, NETLINK_ROUTE); + err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE); if (err) return err; + /* allocate route message */ memset(&rmsg, 0, sizeof(rmsg)); rmsg.rtm_family = AF_INET; rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT; rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT; + /* allocate netlink message of type RTM_GETROUTE */ nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); if (!nlm) { opal_output(0, "Failed to alloc nl message, %s\n", @@ -282,11 +297,14 @@ int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, err = ENOMEM; goto out; } + + /* append route message and addresses to netlink message. */ nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); nla_put_u32(nlm, RTA_DST, dst_addr); nla_put_u32(nlm, RTA_SRC, src_addr); - err = usnic_nl_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); + /* query kernel */ + err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); nlmsg_free(nlm); if (err < 0) { opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n", @@ -295,11 +313,12 @@ int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, goto out; } + /* Setup callback function */ memset(&arg, 0, sizeof(arg)); - arg.oif = oif; - arg.unlsk = unlsk; + arg.oif = outgoing_interface; + arg.unlsk = unlsk; err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, - usnic_rt_raw_parse_cb, &arg); + opal_reachable_netlink_rt_raw_parse_cb, &arg); if (err != 0) { opal_output(0, "Failed to setup callback function, error %s\n", NL_GETERROR(err)); @@ -307,16 +326,100 @@ int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, goto out; } + /* recieve results */ NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out); + /* check whether a route was found */ if (arg.found) { - *nh_addr = arg.nh_addr; + *has_gateway = arg.has_gateway; err = 0; } else { + *has_gateway = 0; err = EHOSTUNREACH; } out: - usnic_nl_sk_free(unlsk); + opal_reachable_netlink_sk_free(unlsk); + return err; +} + + +#if OPAL_ENABLE_IPV6 +int opal_reachable_netlink_rt_lookup6(struct in6_addr *src_addr, + struct in6_addr *dst_addr, + int outgoing_interface, + int *has_gateway) +{ + + struct opal_reachable_netlink_sk *unlsk; /* netlink socket */ + struct nl_msg *nlm; /* netlink message */ + struct rtmsg rmsg; /* route message */ + struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */ + int err; + + /* allocate netlink socket */ + unlsk = NULL; + err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE); + if (err) + return err; + + /* allocate route message */ + memset(&rmsg, 0, sizeof(rmsg)); + rmsg.rtm_family = AF_INET6; + rmsg.rtm_dst_len = sizeof(*dst_addr) * CHAR_BIT; + rmsg.rtm_src_len = sizeof(*src_addr) * CHAR_BIT; + + /* allocate netlink message of type RTM_GETROUTE */ + nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); + if (!nlm) { + opal_output(0, "Failed to alloc nl message, %s\n", + NL_GETERROR(err)); + err = ENOMEM; + goto out; + } + + /* append route message and addresses to netlink message. */ + nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); + nla_put(nlm, RTA_DST, sizeof(dst_addr->s6_addr), &(dst_addr->s6_addr)); + nla_put(nlm, RTA_SRC, sizeof(src_addr->s6_addr), &(src_addr->s6_addr)); + + /* query kernel */ + err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); + nlmsg_free(nlm); + if (err < 0) { + opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n", + NL_GETERROR(err)); + err = EINVAL; + goto out; + } + + /* Setup callback function */ + memset(&arg, 0, sizeof(arg)); + arg.oif = outgoing_interface; + arg.unlsk = unlsk; + err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, + opal_reachable_netlink_rt_raw_parse_cb, &arg); + if (err != 0) { + opal_output(0, "Failed to setup callback function, error %s\n", + NL_GETERROR(err)); + err = EINVAL; + goto out; + } + + /* receive results */ + NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out); + + /* check whether a route was found */ + if (arg.found) { + *has_gateway = arg.has_gateway; + err = 0; + } else { + *has_gateway = 0; + err = EHOSTUNREACH; + } + + out: + opal_reachable_netlink_sk_free(unlsk); return err; } +#endif /* #if OPAL_ENABLE_IPV6 */ diff --git a/opal/mca/reachable/reachable.h b/opal/mca/reachable/reachable.h index 77630b7d0d5..8f3a4659cb9 100644 --- a/opal/mca/reachable/reachable.h +++ b/opal/mca/reachable/reachable.h @@ -3,6 +3,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,12 +17,37 @@ #include "opal_config.h" #include "opal/types.h" +#include "opal/class/opal_object.h" #include "opal/mca/mca.h" #include "opal/mca/if/if.h" + BEGIN_C_DECLS +/** + * Reachability matrix between endpoints of a given pair of hosts + * + * The output of the reachable() call is a opal_reachable_t, which + * gives an matrix of the connectivity between local and remote + * ethernet endpoints. Any given value in weights is the connectivity + * between the local endpoint index (first index) and the remote + * endpoint index (second index), and is a value between 0 and INT_MAX + * representing a relative connectivity. + */ +struct opal_reachable_t { + opal_object_t super; + /** number of local interfaces passed to reachable() */ + int num_local; + /** number of remote interfaces passed to reachable() */ + int num_remote; + /** matric of connectivity weights */ + int **weights; + /** \internal */ + void *memory; +}; +typedef struct opal_reachable_t opal_reachable_t; +OBJ_CLASS_DECLARATION(opal_reachable_t); /* Init */ typedef int (*opal_reachable_base_module_init_fn_t)(void); @@ -28,20 +55,19 @@ typedef int (*opal_reachable_base_module_init_fn_t)(void); /* Finalize */ typedef int (*opal_reachable_base_module_fini_fn_t)(void); -/* Given a list of local interfaces and a list of remote - * interfaces, return the interface that is the "best" - * for connecting to the remote process. +/* Build reachability matrix between local and remote ethernet + * interfaces * - * local_if: list of local opal_if_t interfaces - * remote_if: list of opal_if_t interfaces for the remote - * process + * Given a list of local interfaces and remote interfaces from a + * single peer, build a reachability matrix between the two peers. + * This function does not select the best pairing of local and remote + * interfaces, but only a (comparable) reachability between any pair + * of local/remote interfaces. * - * return value: pointer to opal_if_t on local_if that is - * the "best" option for connecting. NULL - * indicates that the remote process cannot - * be reached on any interface + * @returns a reachable object containing the reachability matrix on + * success, NULL on failure. */ -typedef opal_if_t* +typedef opal_reachable_t* (*opal_reachable_base_module_reachable_fn_t)(opal_list_t *local_if, opal_list_t *remote_if); @@ -65,7 +91,7 @@ typedef struct { /* * Macro for use in components that are of type reachable */ -#define OPAL_REACHABLE_BASE_VERSION_2_0_0 \ +#define OPAL_REACHABLE_BASE_VERSION_2_0_0 \ OPAL_MCA_BASE_VERSION_2_1_0("reachable", 2, 0, 0) /* Global structure for accessing reachability functions */ diff --git a/opal/mca/reachable/weighted/.opal_ignore b/opal/mca/reachable/weighted/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/opal/mca/reachable/weighted/owner.txt b/opal/mca/reachable/weighted/owner.txt index 5361011bda4..786349a14c0 100644 --- a/opal/mca/reachable/weighted/owner.txt +++ b/opal/mca/reachable/weighted/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL -status: unmaintained +owner: AMAZON +status: active diff --git a/opal/mca/reachable/weighted/reachable_weighted.c b/opal/mca/reachable/weighted/reachable_weighted.c index 6996a61981d..ecd68ac03ec 100644 --- a/opal/mca/reachable/weighted/reachable_weighted.c +++ b/opal/mca/reachable/weighted/reachable_weighted.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,30 +22,41 @@ #ifdef HAVE_UNISTD_H #include #endif +#ifdef HAVE_MATH_H +#include +#endif #include "opal/mca/if/if.h" #include "opal/mca/reachable/base/base.h" #include "reachable_weighted.h" +#include "opal/util/net.h" static int weighted_init(void); static int weighted_fini(void); -static opal_if_t* weighted_reachable(opal_list_t *local_if, - opal_list_t *remote_if); +static opal_reachable_t* weighted_reachable(opal_list_t *local_if, + opal_list_t *remote_if); + +static int get_weights(opal_if_t *local_if, opal_if_t *remote_if); +static int calculate_weight(int bandwidth_local, int bandwidth_remote, + int connection_quality); /* - * describes the quality of a possible connection between a local and - * a remote network interface + * Describes the quality of a possible connection between a local and + * a remote network interface. Highest connection quality is assigned + * to connections between interfaces on same network. This is because + * same network implies a single hop to destination. Public addresses + * are preferred over private addresses. This is all guessing, + * because we don't know actual network topology. */ enum connection_quality { - CQ_NO_CONNECTION, - CQ_PRIVATE_DIFFERENT_NETWORK, - CQ_PRIVATE_SAME_NETWORK, - CQ_PUBLIC_DIFFERENT_NETWORK, - CQ_PUBLIC_SAME_NETWORK + CQ_NO_CONNECTION = 0, + CQ_PRIVATE_DIFFERENT_NETWORK = 50, + CQ_PRIVATE_SAME_NETWORK = 80, + CQ_PUBLIC_DIFFERENT_NETWORK = 90, + CQ_PUBLIC_SAME_NETWORK = 100 }; - const opal_reachable_base_module_t opal_reachable_weighted_module = { weighted_init, weighted_fini, @@ -53,6 +66,7 @@ const opal_reachable_base_module_t opal_reachable_weighted_module = { // local variables static int init_cntr = 0; + static int weighted_init(void) { ++init_cntr; @@ -67,207 +81,183 @@ static int weighted_fini(void) return OPAL_SUCCESS; } -static opal_if_t* weighted_reachable(opal_list_t *local_if, - opal_list_t *remote_if) + +static opal_reachable_t* weighted_reachable(opal_list_t *local_if, + opal_list_t *remote_if) { - size_t perm_size, num_local_interfaces, num_peer_interfaces; - enum connection_quality **weights; - - /* - * assign weights to each possible pair of interfaces - */ - num_local_interfaces = opal_list_get_size(local_if); - num_peer_interfaces = opal_list_get_size(remote_if); - - perm_size = num_local_interfaces; - if (num_peer_interfaces > perm_size) { - perm_size = num_peer_interfaces; + opal_reachable_t *reachable_results = NULL; + int i, j; + opal_if_t *local_iter, *remote_iter; + + reachable_results = opal_reachable_allocate(opal_list_get_size(local_if), + opal_list_get_size(remote_if)); + if (NULL == reachable_results) { + return NULL; } - weights = (enum connection_quality**)malloc(perm_size * sizeof(enum connection_quality*)); + i = 0; + OPAL_LIST_FOREACH(local_iter, local_if, opal_if_t) { + j = 0; + OPAL_LIST_FOREACH(remote_iter, remote_if, opal_if_t) { + reachable_results->weights[i][j] = get_weights(local_iter, remote_iter); + j++; + } + i++; + } - best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size - * sizeof(mca_btl_tcp_addr_t **)); - for(i = 0; i < perm_size; ++i) { - weights[i] = (enum connection_quality*) malloc(perm_size * sizeof(enum connection_quality)); - memset(weights[i], 0, perm_size * sizeof(enum connection_quality)); + return reachable_results; +} - best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size * sizeof(mca_btl_tcp_addr_t *)); - memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *)); - } - for(i=0; iipv4_address && - NULL != peer_interfaces[j]->ipv4_address) { - - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) - && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)) - || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address) - && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)) - || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) - && !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - /* check for RFC1918 */ - } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address) - && opal_net_addr_isipv4public((struct sockaddr*) - peer_interfaces[j]->ipv4_address)) { - if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, - (struct sockaddr*) peer_interfaces[j]->ipv4_address, - local_interfaces[i]->ipv4_netmask)) { - weights[i][j] = CQ_PUBLIC_SAME_NETWORK; - } else { - weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; - } - best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; - continue; - } else { - if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, - (struct sockaddr*) peer_interfaces[j]->ipv4_address, - local_interfaces[i]->ipv4_netmask)) { - weights[i][j] = CQ_PRIVATE_SAME_NETWORK; - } else { - weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; - } - best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; - } +static int get_weights(opal_if_t *local_if, opal_if_t *remote_if) +{ + char str_local[128], str_remote[128], *conn_type; + struct sockaddr *local_sockaddr, *remote_sockaddr; + int weight; + + local_sockaddr = (struct sockaddr *)&local_if->if_addr; + remote_sockaddr = (struct sockaddr *)&remote_if->if_addr; + + /* opal_net_get_hostname returns a static buffer. Great for + single address printfs, need to copy in this case */ + strncpy(str_local, opal_net_get_hostname(local_sockaddr), sizeof(str_local)); + strncpy(str_remote, opal_net_get_hostname(remote_sockaddr), sizeof(str_remote)); + + /* initially, assume no connection is possible */ + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); + + if (AF_INET == local_sockaddr->sa_family && + AF_INET == remote_sockaddr->sa_family) { + + if (opal_net_addr_isipv4public(local_sockaddr) && + opal_net_addr_isipv4public(remote_sockaddr)) { + if (opal_net_samenetwork(local_sockaddr, + remote_sockaddr, + local_if->if_mask)) { + conn_type = "IPv4 PUBLIC SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PUBLIC_SAME_NETWORK); + } else { + conn_type = "IPv4 PUBLIC DIFFERENT NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PUBLIC_DIFFERENT_NETWORK); } - - /* check state of ipv6 address pair - ipv6 is always public, - * since link-local addresses are skipped in opal_ifinit() - */ - if(NULL != local_interfaces[i]->ipv6_address && - NULL != peer_interfaces[j]->ipv6_address) { - - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) - && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)) - || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address) - && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)) - || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) - && !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - } else if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address, - (struct sockaddr*) peer_interfaces[j]->ipv6_address, - local_interfaces[i]->ipv6_netmask)) { - weights[i][j] = CQ_PUBLIC_SAME_NETWORK; - } else { - weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; - } - best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr; + } else if (!opal_net_addr_isipv4public(local_sockaddr) && + !opal_net_addr_isipv4public(remote_sockaddr)) { + if (opal_net_samenetwork(local_sockaddr, + remote_sockaddr, + local_if->if_mask)) { + conn_type = "IPv4 PRIVATE SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PRIVATE_SAME_NETWORK); + } else { + conn_type = "IPv4 PRIVATE DIFFERENT NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PRIVATE_DIFFERENT_NETWORK); } + } else { + /* one private, one public address. likely not a match. */ + conn_type = "IPv4 NO CONNECTION"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_NO_CONNECTION); + } - } /* for each peer interface */ - } /* for each local interface */ - - /* - * determine the size of the set to permute (max number of - * interfaces - */ - - best_assignment = (unsigned int *) malloc (perm_size * sizeof(int)); - - a = (int *) malloc(perm_size * sizeof(int)); - if (NULL == a) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* Can only find the best set of connections when the number of - * interfaces is not too big. When it gets larger, we fall back - * to a simpler and faster (and not as optimal) algorithm. - * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 - * for more details about this issue. */ - if (perm_size <= MAX_PERMUTATION_INTERFACES) { - memset(a, 0, perm_size * sizeof(int)); - max_assignment_cardinality = -1; - max_assignment_weight = -1; - visit(0, -1, perm_size, a); - - rc = OPAL_ERR_UNREACH; - for(i = 0; i < perm_size; ++i) { - if(best_assignment[i] > num_peer_interfaces - || weights[i][best_assignment[i]] == CQ_NO_CONNECTION - || peer_interfaces[best_assignment[i]]->inuse - || NULL == peer_interfaces[best_assignment[i]]) { - continue; +#if OPAL_ENABLE_IPV6 + } else if (AF_INET6 == local_sockaddr->sa_family && + AF_INET6 == remote_sockaddr->sa_family) { + if (opal_net_addr_isipv6linklocal(local_sockaddr) && + opal_net_addr_isipv6linklocal(remote_sockaddr)) { + /* we can't actually tell if link local addresses are on + * the same network or not with the weighted component. + * Assume they are on the same network, so that they'll be + * most likely to be paired together, breaking the fewest + * number of connections. + * + * There used to be a comment in this code (and one in the + * BTL TCP code as well) that the opal_if code doesn't + * pass link-local addresses through. However, this is + * demonstratably not true on Linux, where link-local + * interfaces are created. Since it's easy to handle + * either case, do so. + */ + conn_type = "IPv6 LINK-LOCAL SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PRIVATE_SAME_NETWORK); + } else if (!opal_net_addr_isipv6linklocal(local_sockaddr) && + !opal_net_addr_isipv6linklocal(remote_sockaddr)) { + if (opal_net_samenetwork(local_sockaddr, + remote_sockaddr, + local_if->if_mask)) { + conn_type = "IPv6 PUBLIC SAME NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PUBLIC_SAME_NETWORK); + } else { + conn_type = "IPv6 PUBLIC DIFFERENT NETWORK"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_PUBLIC_DIFFERENT_NETWORK); } - peer_interfaces[best_assignment[i]]->inuse++; - btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]]; - btl_endpoint->endpoint_addr->addr_inuse++; - rc = OPAL_SUCCESS; - break; + } else { + /* one link-local, one public address. likely not a match. */ + conn_type = "IPv6 NO CONNECTION"; + weight = calculate_weight(local_if->if_bandwidth, + remote_if->if_bandwidth, + CQ_NO_CONNECTION); } +#endif /* #if OPAL_ENABLE_IPV6 */ + } else { - enum mca_btl_tcp_connection_quality max; - int i_max = 0, j_max = 0; - /* Find the best connection that is not in use. Save away - * the indices of the best location. */ - max = CQ_NO_CONNECTION; - for(i=0; iinuse) { - if (weights[i][j] > max) { - max = weights[i][j]; - i_max = i; - j_max = j; - } - } - } - } - /* Now see if there is a some type of connection available. */ - rc = OPAL_ERR_UNREACH; - if (CQ_NO_CONNECTION != max) { - peer_interfaces[j_max]->inuse++; - btl_endpoint->endpoint_addr = best_addr[i_max][j_max]; - btl_endpoint->endpoint_addr->addr_inuse++; - rc = OPAL_SUCCESS; - } + /* we don't have an address family match, so assume no + connection */ + conn_type = "Address type mismatch"; + weight = calculate_weight(0, 0, CQ_NO_CONNECTION); } - for(i = 0; i < perm_size; ++i) { - free(weights[i]); - free(best_addr[i]); - } + opal_output_verbose(20, opal_reachable_base_framework.framework_output, + "reachable:weighted: path from %s to %s: %s", + str_local, str_remote, conn_type); - for(i = 0; i < num_peer_interfaces; ++i) { - if(NULL != peer_interfaces[i]->ipv4_address) { - free(peer_interfaces[i]->ipv4_address); - } - if(NULL != peer_interfaces[i]->ipv6_address) { - free(peer_interfaces[i]->ipv6_address); - } - free(peer_interfaces[i]); - } - free(peer_interfaces); - peer_interfaces = NULL; - max_peer_interfaces = 0; + return weight; +} - for(i = 0; i < num_local_interfaces; ++i) { - if(NULL != local_interfaces[i]->ipv4_address) { - free(local_interfaces[i]->ipv4_address); - } - if(NULL != local_interfaces[i]->ipv6_address) { - free(local_interfaces[i]->ipv6_address); - } - free(local_interfaces[i]); - } - free(local_interfaces); - local_interfaces = NULL; - max_local_interfaces = 0; - - free(weights); - free(best_addr); - free(best_assignment); - free(a); - return false; + +/* + * Weights determined by bandwidth between + * interfaces (limited by lower bandwidth + * interface). A penalty is added to minimize + * the discrepancy in bandwidth. This helps + * prevent pairing of fast and slow interfaces + * + * Formula: connection_quality * (min(a,b) + 1/(1 + |a-b|)) + * + * Examples: a b f(a,b) + * 0 0 1 + * 0 1 0.5 + * 1 1 2 + * 1 2 1.5 + * 1 3 1.33 + * 1 10 1.1 + * 10 10 11 + * 10 14 10.2 + * 11 14 11.25 + * 11 15 11.2 + * + * NOTE: connection_quality of 1 is assumed for examples. + * In reality, since we're using integers, we need + * connection_quality to be large enough + * to capture decimals + */ +static int calculate_weight(int bandwidth_local, int bandwidth_remote, + int connection_quality) +{ + int weight = connection_quality * (MIN(bandwidth_local, bandwidth_remote) + + 1.0 / (1.0 + (double)abs(bandwidth_local - bandwidth_remote))); + return weight; } diff --git a/opal/mca/reachable/weighted/reachable_weighted.h b/opal/mca/reachable/weighted/reachable_weighted.h index 04113b10468..6a0220c2ca0 100644 --- a/opal/mca/reachable/weighted/reachable_weighted.h +++ b/opal/mca/reachable/weighted/reachable_weighted.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,6 +21,7 @@ #include #endif +#include "opal/mca/reachable/reachable.h" #include "opal/mca/mca.h" #include "opal/mca/event/event.h" #include "opal/util/proc.h" diff --git a/opal/mca/reachable/weighted/reachable_weighted_component.c b/opal/mca/reachable/weighted/reachable_weighted_component.c index fbbd27308e8..6e8098b7698 100644 --- a/opal/mca/reachable/weighted/reachable_weighted_component.c +++ b/opal/mca/reachable/weighted/reachable_weighted_component.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,7 +73,7 @@ opal_reachable_weighted_component_t mca_reachable_weighted_component = { .mca_register_component_params = component_register, }, /* Next the MCA v1.0.0 component meta data */ - .base_version = { + .base_data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index a029d6b2932..05c06e0f9c3 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -15,6 +15,8 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,6 +46,7 @@ #include "opal/mca/memchecker/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/backtrace/base/base.h" +#include "opal/mca/reachable/base/base.h" #include "opal/mca/timer/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/event/base/base.h" @@ -135,6 +138,8 @@ opal_finalize(void) (void) mca_base_framework_close(&opal_compress_base_framework); #endif + (void) mca_base_framework_close(&opal_reachable_base_framework); + (void) mca_base_framework_close(&opal_event_base_framework); /* close high resolution timers */ diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 03ffa7118d5..67a7ef3ad60 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -18,6 +18,8 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,6 +51,7 @@ #include "opal/mca/patcher/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/mca/reachable/base/base.h" #include "opal/mca/timer/base/base.h" #include "opal/mca/memchecker/base/base.h" #include "opal/mca/if/base/base.h" @@ -596,6 +599,16 @@ opal_init(int* pargc, char*** pargv) goto return_error; } + /* Load reachable framework */ + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_reachable_base_framework, 0))){ + error = "opal_reachable_base_framework"; + goto return_error; + } + if (OPAL_SUCCESS != (ret = opal_reachable_base_select())) { + error = "opal_reachable_base_select"; + goto return_error; + } + #if OPAL_ENABLE_FT_CR == 1 /* * Initialize the compression framework diff --git a/opal/test/reachable/Makefile b/opal/test/reachable/Makefile new file mode 100644 index 00000000000..028cb93e68d --- /dev/null +++ b/opal/test/reachable/Makefile @@ -0,0 +1,19 @@ +# Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PROGS = reachable_weighted reachable_netlink + +all: $(PROGS) + +CC = ortecc +CFLAGS = -g + +clean: + rm -f $(PROGS) *~ diff --git a/opal/test/reachable/reachable_netlink.c b/opal/test/reachable/reachable_netlink.c new file mode 100644 index 00000000000..3fdedbf84ef --- /dev/null +++ b/opal/test/reachable/reachable_netlink.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "opal_config.h" + +#include "reachable_shared.h" + +#include "opal/runtime/opal.h" +#include "opal/mca/reachable/reachable.h" +#include "opal/util/if.h" +#include "opal/class/opal_list.h" +#include "opal/util/if.h" + +/* + * Creates list of remote interfaces for testing reachability. + * Only minimum information is filled out. + */ +opal_list_t* build_if_list(void) +{ + /* Allocate memory for and create interface list */ + opal_list_t *if_list = OBJ_NEW(opal_list_t); + opal_if_t *intf; + + /* + * Add localhost to list + */ + intf = create_if(AF_INET, "127.0.0.1", 8, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * Add localhost with non-standard address + */ + intf = create_if(AF_INET, "127.31.41.59", 8, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * Add another localhost with non-standard address + */ + intf = create_if(AF_INET, "127.26.53.58", 8, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * Google's public DNS + */ + intf = create_if(AF_INET, "8.8.8.8", 16, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * Google's public DNS (2) + */ + intf = create_if(AF_INET, "8.8.4.4", 16, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * IPv6: Google's public DNS (IPv6) + */ + intf = create_if(AF_INET6, "2001:4860:4860::8888", 64, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * IPv6: Google's public DNS 2 (IPv6) + */ + intf = create_if(AF_INET6, "2001:4860:4860::8844", 128, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * IPv6: Google's public DNS 1 (IPv6) EXPLICIT ADDRESS + */ + intf = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8888", 64, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * IPv6: Google's public DNS 2 (IPv6) EXPLICIT ADDRESS + */ + intf = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8844", 64, 0); + opal_list_append(if_list, &(intf->super)); + + /* + * IPv6: something that should be on the same link local... + */ + intf = create_if(AF_INET6, "fe80::0001", 64, 0); + opal_list_append(if_list, &(intf->super)); + + return if_list; +} + + +int main(int argc, char **argv) +{ + opal_list_t *local_list, *remote_list; + opal_reachable_t *results; + uint32_t i, j; + int successful_connections = 0; + int local_ifs; + int remote_ifs; + opal_if_t *local_if; + + opal_init(&argc, &argv); + + /* List of interfaces generated by opal */ + local_list = &opal_if_list; + /* Create test interfaces */ + remote_list = build_if_list(); + + local_ifs = opal_list_get_size(local_list); + remote_ifs = opal_list_get_size(remote_list); + + /* Tests reachability by looking up entries in routing table. + * Tests routes to localhost and google's nameservers. + */ + results = opal_reachable.reachable(local_list, remote_list); + + printf("Local interfaces:\n"); + i = 0; + OPAL_LIST_FOREACH(local_if, local_list, opal_if_t) { + char addr[128]; + char *family; + + switch (local_if->af_family) { + case AF_INET: + family = "IPv4"; + inet_ntop(AF_INET, &(((struct sockaddr_in*) &local_if->if_addr))->sin_addr, + addr, sizeof(addr)); + break; + case AF_INET6: + family = "IPv6"; + inet_ntop(AF_INET6, &(((struct sockaddr_in6*) &local_if->if_addr))->sin6_addr, + addr, sizeof(addr)); + break; + default: + family = "Unknown"; + strcpy(addr, "Unknown"); + break; + } + + printf(" %3d: %s\t%s\t%s/%d\n", i, local_if->if_name, + family, addr, local_if->if_mask); + i++; + } + + printf("\nRemote interfaces:\n"); + i = 0; + OPAL_LIST_FOREACH(local_if, remote_list, opal_if_t) { + char addr[128]; + char *family; + + switch (local_if->af_family) { + case AF_INET: + family = "IPv4"; + inet_ntop(AF_INET, &(((struct sockaddr_in*) &local_if->if_addr))->sin_addr, + addr, sizeof(addr)); + break; + case AF_INET6: + family = "IPv6"; + inet_ntop(AF_INET6, &(((struct sockaddr_in6*) &local_if->if_addr))->sin6_addr, + addr, sizeof(addr)); + break; + default: + family = "Unknown"; + strcpy(addr, "Unknown"); + break; + } + + printf(" %3d: %s\t%s\t%s/%d\n", i, local_if->if_name, + family, addr, local_if->if_mask); + i++; + } + + printf("\nConnectivity Table:\n "); + for (j = 0 ; j < remote_ifs ; j++) { + printf("%3d ", j); + } + printf("\n"); + + for (i = 0; i < local_ifs ; i++) { + printf(" %3d: ", i); + for (j = 0 ; j < remote_ifs ; j++) { + printf("%3d ", results->weights[i][j]); + } + printf("\n"); + } + printf("\n"); + + OBJ_RELEASE(remote_list); + + opal_output(0, "Passed all tests!\n"); + return 0; +} diff --git a/opal/test/reachable/reachable_shared.h b/opal/test/reachable/reachable_shared.h new file mode 100644 index 00000000000..4b9941a03da --- /dev/null +++ b/opal/test/reachable/reachable_shared.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017-XXXX Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef TEST_REACHABLE_SHARED +#define TEST_REACHABLE_SHARED 1 + +#include + +#include "opal/runtime/opal.h" +#include "opal/mca/reachable/reachable.h" +#include "opal/util/if.h" + +BEGIN_C_DECLS + +/* Create and populate opal_if_t with information required by opal_reachable */ +opal_if_t* create_if(int af_family, char *address, int mask, int bandwidth) +{ + opal_if_t *interface = OBJ_NEW(opal_if_t); + strncpy(interface->if_name, "interface0", IF_NAMESIZE); + interface->af_family = af_family; + ((struct sockaddr *)&(interface->if_addr))->sa_family = af_family; + + if (AF_INET == af_family){ + assert(1 == inet_pton(af_family, address, &((struct sockaddr_in *)&(interface->if_addr))->sin_addr)); + } else if (AF_INET6 == af_family){ + assert(1 == inet_pton(af_family, address, &((struct sockaddr_in6 *)&(interface->if_addr))->sin6_addr)); + } + + interface->if_mask = mask; + interface->if_bandwidth = bandwidth; + + return interface; +} + + +/* Run a test between a pair of interfaces + * and clean up the memory afterwards. + * Return the weight between the pair of + * interfaces + */ +int run_single_test(opal_if_t *local_if, opal_if_t *remote_if) +{ + + opal_list_t *local_list = OBJ_NEW(opal_list_t); + opal_list_t *remote_list = OBJ_NEW(opal_list_t); + + opal_list_append(local_list, &(local_if->super)); + opal_list_append(remote_list, &(remote_if->super)); + + opal_reachable_t *results; + results = opal_reachable.reachable(local_list, remote_list); + OBJ_RELEASE(local_list); + OBJ_RELEASE(remote_list); + int result = results->weights[0][0]; + + /* release results */ + OBJ_RELEASE(results); + return result; +} + +END_C_DECLS + +#endif diff --git a/opal/test/reachable/reachable_weighted.c b/opal/test/reachable/reachable_weighted.c new file mode 100644 index 00000000000..5a6db2fc091 --- /dev/null +++ b/opal/test/reachable/reachable_weighted.c @@ -0,0 +1,1015 @@ +/* + * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "opal_config.h" + +#include "reachable_shared.h" + +/* sigh; needs to match with code in reachable_weighted, but those + headers aren't installed. */ +enum connection_quality { + CQ_NO_CONNECTION = 0, + CQ_PRIVATE_DIFFERENT_NETWORK = 50, + CQ_PRIVATE_SAME_NETWORK = 80, + CQ_PUBLIC_DIFFERENT_NETWORK = 90, + CQ_PUBLIC_SAME_NETWORK = 100 +}; + + +/* SUITE 1: + * Tests IPv4 connections by + * modifying ip addresses and + * subnet masks. Also tests + * IPv4->IPv6 and the other way + * around, to assure no connection + * is returned in that case. + */ +int ipv4_test() +{ + opal_if_t *int1; + opal_if_t *int2; + int expected_result; + int result; + int test_no = 0; + int failed_no = 0; + + /* TEST1 + * Localhost to localhost. Since localhost range is not a + * private network (RFC1918), expected result is public + * same network. + */ + test_no++; + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET, "127.0.0.1", 24, 0); + int2 = create_if(AF_INET, "127.0.0.2", 0, 0); + result = run_single_test(int1, int2); + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST2 + * Testing public same network with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET, "31.14.15.92", 24, 0); + int2 = create_if(AF_INET, "31.14.15.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST3 + * Testing public same network with subnet mask + * 255.255.0.0 + */ + test_no++; + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET, "65.35.89.79", 16, 0); + int2 = create_if(AF_INET, "65.35.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST4 + * Testing public same network with subnet mask + * 255.0.0.0 + */ + test_no++; + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET, "3.23.84.62", 8, 0); + int2 = create_if(AF_INET, "3.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST5 + * Testing public same network with subnet mask + * 0.0.0.0 + */ + test_no++; + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET, "64.33.83.27", 0, 0); + int2 = create_if(AF_INET, "27.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST6 + * Testing public different network with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_PUBLIC_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "95.2.88.41", 24, 0); + int2 = create_if(AF_INET, "95.2.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST7 + * Testing public different network with subnet mask + * 255.255.0.0 + */ + test_no++; + expected_result = CQ_PUBLIC_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "97.16.93.99", 16, 0); + int2 = create_if(AF_INET, "97.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST8 + * Testing public different network with subnet mask + * 255.0.0.0 + */ + test_no++; + expected_result = CQ_PUBLIC_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "37.51.5.82", 8, 0); + int2 = create_if(AF_INET, "27.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST9 + * Testing private same network with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_PRIVATE_SAME_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 24, 0); + int2 = create_if(AF_INET, "192.168.0.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST10 + * Testing private same network with subnet mask + * 255.255.0.0 + */ + test_no++; + expected_result = CQ_PRIVATE_SAME_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 16, 0); + int2 = create_if(AF_INET, "192.168.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed this test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST11 + * Testing private same network with subnet mask + * 255.0.0.0 + */ + test_no++; + expected_result = CQ_PRIVATE_SAME_NETWORK; + int1 = create_if(AF_INET, "172.16.0.1", 8, 0); + int2 = create_if(AF_INET, "172.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed this test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST12 + * Testing private same network with subnet mask + * 0.0.0.0 + */ + test_no++; + expected_result = CQ_PRIVATE_SAME_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 0, 0); + int2 = create_if(AF_INET, "10.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST13 + * Testing private different network with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_PRIVATE_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 24, 0); + int2 = create_if(AF_INET, "192.168.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST14 + * Testing private different network with subnet mask + * 255.255.0.0 + */ + test_no++; + expected_result = CQ_PRIVATE_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 16, 0); + int2 = create_if(AF_INET, "10.1.0.1", 16, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST15 + * Testing private different network with subnet mask + * 255.0.0.0 + */ + test_no++; + expected_result = CQ_PRIVATE_DIFFERENT_NETWORK; + int1 = create_if(AF_INET, "192.168.0.1", 8, 0); + int2 = create_if(AF_INET, "10.27.27.27", 0, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST16 + * Testing public to private with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_NO_CONNECTION; + int1 = create_if(AF_INET, "27.27.27.27", 24 , 0); + int2 = create_if(AF_INET, "192.168.0.1", 16, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST17 + * Testing private to public with subnet mask + * 255.255.255.0 + */ + test_no++; + expected_result = CQ_NO_CONNECTION; + int1 = create_if(AF_INET, "192.168.0.1", 24, 0); + int2 = create_if(AF_INET, "27.27.27.27", 8, 0); + result = run_single_test(int1, int2); + + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST18 + * IPv4->IPv6 + */ + expected_result = CQ_NO_CONNECTION; + int1 = create_if(AF_INET, "8.8.8.8", 24, 0); + int2 = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8888", 0, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST19 + * IPv6->IPv4 + */ + expected_result = CQ_NO_CONNECTION; + int1 = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8888", 64, 0); + int2 = create_if(AF_INET, "8.8.8.8", 0, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + opal_output(0, "Finished Reachable IPv4 Tests. %d/%d successful", test_no-failed_no, test_no); + + if (0 == failed_no) { + return 0; + } else { + return 1; + } +} + + +/* SUITE 2: + * Compares connections with different + * bandwidths to see ensure the + * relative ranking is as expected + */ +int ranking_test() +{ + opal_if_t *int1; + opal_if_t *int2; + int result1; + int result2; + int test_no = 0; + int failed_no = 0; + + /* TEST1 + * Compares pairs with bandwidths 0->0 and 1->0. + * The former connection should be better, as + * there is a smaller difference in bandwidth + * (This is an edge case, but this behavior makes + * sense. We want 0->0 to still work, incase + * bandwidth was never set. Thus, the behavior + * for a->b where a = 0 and a != b should + * act the same as any other case, where + * a greater difference in leads to a greater + * penalty in bandwidth) + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 0); + int2 = create_if(AF_INET, "31.14.15.27", 0, 0); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 0); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 > result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST2 + * Compares interface pairs with bandwidth 0->0 and 1->2. + * The latter should be better as it has greater bandwidth + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 0); + int2 = create_if(AF_INET, "31.14.15.27", 0, 0); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 2); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST3 + * Compares interface pairs with bandwidth 1->2 and 1->1. + * The later should be better as there is a smaller + * difference in bandwidth + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 0); + int2 = create_if(AF_INET, "31.14.15.27", 0, 0); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 1); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST4 + * Compares interface pairs with bandwidth 1->3 and 1->2. + * The later should be better as there is a smaller + * difference in bandwidth + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 3); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 2); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST5 + * Compares interface pairs with bandwidth 1->10 and 1->3. + * The later should be better as there is less discrepancy + * in bandwidth + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 10); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 1); + int2 = create_if(AF_INET, "31.14.15.27", 0, 3); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST6 + * Compares interface pairs with bandwidth 5->5 and 10->10. + * The later should be better as it has higher bandwidth + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 5); + int2 = create_if(AF_INET, "31.14.15.27", 0, 5); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 10); + int2 = create_if(AF_INET, "31.14.15.27", 0, 10); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST7 + * Compares interface pairs with bandwidth 10->11 and 10->10. + * The later should be better as there is no discrepancy in + * bandwidth. + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 10); + int2 = create_if(AF_INET, "31.14.15.27", 0, 11); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 10); + int2 = create_if(AF_INET, "31.14.15.27", 0, 10); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST8 + * Compares interface pairs with bandwidth 10->11 and 11->10. + * These connections should be equivilant, as they have the same + * bandwidth and same discrepancy. + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 10); + int2 = create_if(AF_INET, "31.14.15.27", 0, 11); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 11); + int2 = create_if(AF_INET, "31.14.15.27", 0, 10); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 == result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + /* TEST9 + * Compares interface pairs with bandwidth 10->14 and 11->15. + * The latter should be better as it has higher bandwidth. + */ + test_no++; + + int1 = create_if(AF_INET, "31.14.15.92", 24, 10); + int2 = create_if(AF_INET, "31.14.15.27", 0, 14); + result1 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + int1 = create_if(AF_INET, "31.14.15.92", 24, 11); + int2 = create_if(AF_INET, "31.14.15.27", 0, 15); + result2 = run_single_test(int1, int2); + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + if (!(result1 < result2)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + + opal_output(0, "Finished Reachable Weighted Ranking Tests. %d/%d successful", test_no-failed_no, test_no); + if (0 == failed_no) { + return 0; + } else { + return 1; + } +} + + +/* SUITE 3: + * Tests interfaces lists of various sizes + * to ensure no crashes occur and results + * are outputted in proper order + */ +int loop_test() +{ + int test_no = 0; + int failed_no = 0; + + opal_list_t *if_list1, *if_list2; + opal_if_t *intf; + opal_reachable_t *results; + + int i; + + /* TEST1: + * Ensure opal_reachable doesn't crash + * when called with empty lists + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(0 == results->num_local && 0 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST2: + * Ensure opal_reachable doesn't crash + * when called with empty local list + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list2, &(intf->super)); + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(0 == results->num_local && 1 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST3: + * Ensure opal_reachable doesn't crash + * when called with empty remote list + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(1 == results->num_local && 0 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST4: + * Ensure opal_reachable doesn't crash + * when the remote list has more elements + * than the local list + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + for (i = 0; i < 3; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + } + for (i = 0; i < 14; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list2, &(intf->super)); + } + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(3 == results->num_local && 14 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST5: + * Ensure opal_reachable doesn't crash + * when the local list has more elements + * than the remote list + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + for (i = 0; i < 14; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + } + for (i = 0; i < 3; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list2, &(intf->super)); + } + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(14 == results->num_local && 3 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST6: + * Ensure opal_reachable doesn't crash + * when the local list has the same number + * of elements as the remote list + */ + test_no++; + + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + for (i = 0; i < 27; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + } + for (i = 0; i < 27; i++) { + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list2, &(intf->super)); + } + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(27 == results->num_local && 27 == results->num_remote)) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST7: + * Tests proper ordering of results + * when same number of local interfaces and + * remote interfaces + */ + test_no++; + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + + intf = create_if(AF_INET, "31.14.19.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(CQ_PUBLIC_SAME_NETWORK == results->weights[0][0] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[0][1] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[1][0] && + CQ_PUBLIC_SAME_NETWORK == results->weights[1][1])) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST8: + * Tests proper ordering of results + * when greater number of remote interfaces + * than local interfaces + */ + test_no++; + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + + intf = create_if(AF_INET, "31.14.19.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + intf = create_if(AF_INET, "31.14.21.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(CQ_PUBLIC_SAME_NETWORK == results->weights[0][0] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[0][1] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[0][2] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[1][0] && + CQ_PUBLIC_SAME_NETWORK == results->weights[1][1]) && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[1][2]) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + /* TEST9: + * Tests proper ordering of results + * when greater number of local interfaces + * than remote interfaces + */ + test_no++; + if_list1 = OBJ_NEW(opal_list_t); + if_list2 = OBJ_NEW(opal_list_t); + + intf = create_if(AF_INET, "31.14.19.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.92", 24, 0); + opal_list_append(if_list1, &(intf->super)); + intf = create_if(AF_INET, "31.14.21.93", 24, 0); + opal_list_append(if_list1, &(intf->super)); + + intf = create_if(AF_INET, "31.14.19.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + intf = create_if(AF_INET, "31.14.20.93", 24, 0); + opal_list_append(if_list2, &(intf->super)); + + results = opal_reachable.reachable(if_list1, if_list2); + + OBJ_RELEASE(if_list1); + OBJ_RELEASE(if_list2); + + if (!(CQ_PUBLIC_SAME_NETWORK == results->weights[0][0] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[0][1] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[1][0] && + CQ_PUBLIC_SAME_NETWORK == results->weights[1][1] && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[2][0]) && + CQ_PUBLIC_DIFFERENT_NETWORK == results->weights[2][1]) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(results); + + opal_output(0, "Finished Reachable Weighted Loop Tests. %d/%d successful", test_no-failed_no, test_no); + + if (0 == failed_no) { + return 0; + } else { + return 1; + } + +} + + +/* SUITE 4: + * Test IPv6 + */ +int test_ipv6() +{ + int failed_no = 0; + +#if OPAL_ENABLE_IPV6 + opal_if_t *int1; + opal_if_t *int2; + int expected_result; + int result; + int test_no = 0; + + /* TEST1 + * Testing ipv6 same network with subnet mask + * \64 + */ + expected_result = CQ_PUBLIC_SAME_NETWORK; + int1 = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8888", 64, 0); + int2 = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8889", 8, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + /* TEST2 + * Testing ipv6 different network with subnet mask + * \64 + */ + expected_result = CQ_PUBLIC_DIFFERENT_NETWORK; + int1 = create_if(AF_INET6, "2001:4860:4860:0:0:0:0:8888", 64, 0); + int2 = create_if(AF_INET6, "2001:4860:4860:1:0:0:0:8888", 0, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + expected_result = CQ_PRIVATE_SAME_NETWORK; + int1 = create_if(AF_INET6, "fe80::8888", 64, 0); + int2 = create_if(AF_INET6, "fe80::8889", 64, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + expected_result = CQ_NO_CONNECTION; + int1 = create_if(AF_INET6, "2001::8888", 64, 0); + int2 = create_if(AF_INET6, "fe80::8889", 64, 0); + result = run_single_test(int1, int2); + test_no++; + if (result != expected_result) { + ++failed_no; + opal_output(0, "Failed test #%d", test_no); + } + OBJ_RELEASE(int1); + OBJ_RELEASE(int2); + + opal_output(0, "Finished Reachable Weighted IPv6 Tests. %d/%d successful", test_no-failed_no, test_no); + + if (0 == failed_no) { + return 0; + } else { + return 1; + } +#else + opal_output(0, "No IPv6 support; skipped tests"); + return 0; +#endif +} + +int main(int argc, char **argv) +{ + int failed = 0; + int total = 0; + + opal_init(&argc, &argv); + opal_output(0, "\n\nBeginning Reachable Weighted tests\n\n"); + + total++; + if (ipv4_test()) { + failed++; + } + + total++; + if (ranking_test()) { + failed++; + } + + total++; + if (loop_test()) { + failed++; + } + + total++; + if (test_ipv6()) { + failed++; + } + + if (0 != failed) { + opal_output(0, "\n\nFailed %d/%d Reachable Weighted Test Suites :(\n\n", failed, total); + } else { + opal_output(0, "\n\nPassed %d/%d Reachable Weighted Test Suites :)\n\n", total, total); + } + + return failed; +} diff --git a/opal/test/reachable/tests b/opal/test/reachable/tests new file mode 100755 index 00000000000..344bdf24a16 --- /dev/null +++ b/opal/test/reachable/tests @@ -0,0 +1,12 @@ +#!/bin/bash +# Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +mpirun -np 1 --mca reachable netlink reachable_netlink +mpirun -np 1 --mca reachable weighted reachable_weighted diff --git a/opal/util/net.c b/opal/util/net.c index f8c71e1e4a8..06178e0c89b 100644 --- a/opal/util/net.c +++ b/opal/util/net.c @@ -358,6 +358,30 @@ opal_net_addr_isipv4public(const struct sockaddr *addr) return false; } +bool +opal_net_addr_isipv6linklocal(const struct sockaddr *addr) +{ + struct sockaddr_in6 if_addr; + + switch (addr->sa_family) { +#if OPAL_ENABLE_IPV6 + case AF_INET6: + if_addr.sin6_family = AF_INET6; + if (1 != inet_pton(AF_INET6, "fe80::0000", &if_addr.sin6_addr)) { + return false; + } + return opal_net_samenetwork(addr, (struct sockaddr*)&if_addr, 64); +#endif + case AF_INET: + return false; + default: + opal_output (0, + "unhandled sa_family %d passed to opal_net_addr_isipv6linklocal\n", + addr->sa_family); + } + + return false; +} char* opal_net_get_hostname(const struct sockaddr *addr) diff --git a/opal/util/net.h b/opal/util/net.h index 27dad966625..aff84125730 100644 --- a/opal/util/net.h +++ b/opal/util/net.h @@ -112,6 +112,14 @@ OPAL_DECLSPEC bool opal_net_samenetwork(const struct sockaddr *addr1, */ OPAL_DECLSPEC bool opal_net_addr_isipv4public(const struct sockaddr *addr); +/** + * Is the given address a link-local IPv6 address? Returns false for IPv4 + * address. + * + * @param addr address as struct sockaddr + * @return true, if \c addr is IPv6 link-local, false otherwise + */ +OPAL_DECLSPEC bool opal_net_addr_isipv6linklocal(const struct sockaddr *addr); /** * Get string version of address