Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 30 additions & 12 deletions src/ucs/sys/netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ typedef struct {
const struct sockaddr *sa_remote;
int if_index;
int found;
int allow_default_gw; /* Allow matching default
gateway routes */
} ucs_netlink_route_info_t;


Expand Down Expand Up @@ -174,7 +176,7 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type,

static ucs_status_t
ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p,
const void **dst_in_addr)
const void **dst_in_addr, size_t rtm_dst_len)
{
*if_index_p = -1;
*dst_in_addr = NULL;
Expand All @@ -187,7 +189,10 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p,
}
}

if ((*if_index_p == -1) || (*dst_in_addr == NULL)) {
if (/* Network interface index is not valid */
(*if_index_p == -1) ||
/* dst_in_addr required but not present */
((rtm_dst_len != 0) && (*dst_in_addr == NULL))) {
return UCS_ERR_INVALID_PARAM;
}

Expand All @@ -206,7 +211,8 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg)
int khret;

if (ucs_netlink_get_route_info(RTM_RTA(rt_msg), RTM_PAYLOAD(nlh),
&iface_index, &dst_in_addr) != UCS_OK) {
&iface_index, &dst_in_addr,
rt_msg->rtm_dst_len) != UCS_OK) {
return UCS_INPROGRESS;
}

Expand All @@ -228,12 +234,14 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg)
ucs_error("could not allocate route entry");
return UCS_ERR_NO_MEMORY);

memset(&new_rule->dest, 0, sizeof(sizeof(new_rule->dest)));
memset(&new_rule->dest, 0, sizeof(new_rule->dest));
new_rule->dest.ss_family = rt_msg->rtm_family;
if (UCS_OK != ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest,
dst_in_addr)) {
ucs_array_pop_back(iface_rules);
return UCS_ERR_IO_ERROR;
if (dst_in_addr != NULL) {
if (ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest,
dst_in_addr) != UCS_OK) {
ucs_array_pop_back(iface_rules);
return UCS_ERR_IO_ERROR;
}
}

new_rule->subnet_prefix_len = rt_msg->rtm_dst_len;
Expand All @@ -256,6 +264,13 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)

iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter);
ucs_array_for_each(curr_entry, iface_rules) {

if ((curr_entry->subnet_prefix_len == 0) && !info->allow_default_gw) {
ucs_trace("iface_index=%d: skipping default gateway route",
info->if_index);
continue;
}

if (ucs_sockaddr_is_same_subnet(
info->sa_remote,
(const struct sockaddr *)&curr_entry->dest,
Expand All @@ -266,7 +281,8 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
}
}

int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
int allow_default_gw)
{
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
struct rtmsg rtm = {0};
Expand All @@ -285,9 +301,11 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
NULL);
}

info.if_index = if_index;
info.sa_remote = sa_remote;
info.found = 0;
info.if_index = if_index;
info.sa_remote = sa_remote;
info.found = 0;
info.allow_default_gw = allow_default_gw;

ucs_netlink_lookup_route(&info);

return info.found;
Expand Down
13 changes: 8 additions & 5 deletions src/ucs/sys/netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,17 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type,
* Check whether a routing table rule exists for a given network
* interface name and a destination address.
*
* @param [in] if_index A global index representing the network interface,
as assigned by the system (e.g., obtained via
if_nametoindex()).
* @param [in] sa_remote Pointer to the destination address.
* @param [in] if_index A global index representing the network
interface, as assigned by the system
(e.g., obtained via if_nametoindex()).
* @param [in] sa_remote Pointer to the destination address.
* @param [in] allow_default_gw Allow matching default gateway routes (1) or
* only specific subnet routes (0).
*
* @return 1 if rule exists, or 0 otherwise.
*/
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote);
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
int allow_default_gw);

END_C_DECLS

Expand Down
16 changes: 16 additions & 0 deletions src/ucs/sys/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <ucs/sys/checker.h>
#include <ucs/sys/ptr_arith.h>
#include <ucs/sys/string.h>
#include <ucs/sys/sock.h>
#include <ucs/sys/sys.h>
#include <ucs/debug/log.h>
#include <ucs/time/time.h>
Expand Down Expand Up @@ -177,6 +178,21 @@ ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p)
return UCS_OK;
}

int ucs_netif_is_ipoib(const char *if_name)
{
struct ifreq ifr;
ucs_status_t status;

status = ucs_netif_ioctl(if_name, SIOCGIFHWADDR, &ifr);
if (status != UCS_OK) {
/* If we can't determine the hardware type, assume it's not IPoIB */
ucs_debug("failed to get hardware address for %s", if_name);
return 0;
}

return ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND;
}

static uint64_t ucs_get_mac_address()
{
static uint64_t mac_address = 0;
Expand Down
10 changes: 10 additions & 0 deletions src/ucs/sys/sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,16 @@ uint32_t ucs_file_checksum(const char *filename);
ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p);


/**
* Check if a network interface is an IPoIB (IP over InfiniBand) device.
*
* @param [in] if_name Network interface name to check.
*
* @return 1 if the interface is IPoIB, 0 otherwise.
*/
int ucs_netif_is_ipoib(const char *if_name);


/**
* Get a globally unique identifier of the machine running the current process.
*/
Expand Down
4 changes: 2 additions & 2 deletions src/uct/ib/base/ib_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
return 0;
}

if (!ucs_netlink_route_exists(ndev_index, sa_remote)) {
if (!ucs_netlink_route_exists(ndev_index, sa_remote, 1)) {
/* try to use loopback interface for reachability check, because it may
* be used for routing in case of an interface with VRF is configured
* and a RoCE IP interface uses this VRF table for routing.
Expand All @@ -721,7 +721,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
return 0;
}

if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote)) {
if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote, 1)) {
uct_iface_fill_info_str_buf(params,
"remote address %s is not routable "
"neither by interface "UCT_IB_IFACE_FMT
Expand Down
7 changes: 6 additions & 1 deletion src/uct/tcp/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,12 @@ typedef enum uct_tcp_device_addr_flags {
* Device address is extended by additional information:
* @ref uct_iface_local_addr_ns_t for loopback reachability
*/
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0)
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0),

/**
* Allow communication with default gateway
*/
UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW = UCS_BIT(1)
} uct_tcp_device_addr_flags_t;


Expand Down
13 changes: 12 additions & 1 deletion src/uct/tcp/tcp_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ucs/async/async.h>
#include <ucs/sys/netlink.h>
#include <ucs/sys/string.h>
#include <ucs/sys/sys.h>
#include <ucs/config/types.h>
#include <sys/socket.h>
#include <sys/poll.h>
Expand Down Expand Up @@ -138,6 +139,11 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface,
dev_addr->flags = 0;
dev_addr->sa_family = saddr->sa_family;

/* Default gateway is not relevant for IPoIB interfaces */
if (!ucs_netif_is_ipoib(iface->if_name)) {
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW;
}

if (ucs_sockaddr_is_inaddr_loopback(saddr)) {
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK;
memset(pack_ptr, 0, sizeof(uct_iface_local_addr_ns_t));
Expand Down Expand Up @@ -205,6 +211,7 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
struct sockaddr_storage remote_addr;
char remote_addr_str[UCS_SOCKADDR_STRING_LEN];
unsigned ndev_index;
int allow_default_gw;
ucs_status_t status;

if (!uct_iface_is_reachable_params_valid(
Expand Down Expand Up @@ -263,8 +270,12 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
return 0;
}

allow_default_gw = !!(tcp_dev_addr->flags &
UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW);

if (!ucs_netlink_route_exists(ndev_index,
(const struct sockaddr *)&remote_addr)) {
(const struct sockaddr *)&remote_addr,
allow_default_gw)) {
uct_iface_fill_info_str_buf(
params, "no route to %s",
ucs_sockaddr_str((const struct sockaddr *)&remote_addr,
Expand Down
Loading