From 71e990c5ace71d3a520f68c20203e8bae46e3034 Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Thu, 30 Oct 2025 18:59:17 +0200 Subject: [PATCH 1/9] UCS/NETLINK: Support default gateway rules --- src/ucs/sys/netlink.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 23885742425..6c62ad62341 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -174,7 +174,7 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type, static ucs_status_t ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, - const void **dst_in_addr) + const void **dst_in_addr, unsigned char rtm_dst_len) { *if_index_p = -1; *dst_in_addr = NULL; @@ -182,7 +182,7 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { if (rta->rta_type == RTA_OIF) { *if_index_p = *((const int *)RTA_DATA(rta)); - } else if (rta->rta_type == RTA_DST) { + } else if ((rta->rta_type == RTA_DST) || (rta->rta_type == RTA_GATEWAY)) { *dst_in_addr = RTA_DATA(rta); } } @@ -206,7 +206,8 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg) int khret; if (ucs_netlink_get_route_info(RTM_RTA(rt_msg), RTM_PAYLOAD(nlh), - &iface_index, &dst_in_addr) != UCS_OK) { + &iface_index, &dst_in_addr, + rt_msg->rtm_dst_len) != UCS_OK) { return UCS_INPROGRESS; } @@ -228,15 +229,17 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg) ucs_error("could not allocate route entry"); return UCS_ERR_NO_MEMORY); - memset(&new_rule->dest, 0, sizeof(sizeof(new_rule->dest))); + memset(&new_rule->dest, 0, sizeof(new_rule->dest)); new_rule->dest.ss_family = rt_msg->rtm_family; - if (UCS_OK != ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest, - dst_in_addr)) { - ucs_array_pop_back(iface_rules); - return UCS_ERR_IO_ERROR; - } + if (rt_msg->rtm_dst_len != 0) { + if (ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest, + dst_in_addr) != UCS_OK) { + ucs_array_pop_back(iface_rules); + return UCS_ERR_IO_ERROR; + } - new_rule->subnet_prefix_len = rt_msg->rtm_dst_len; + new_rule->subnet_prefix_len = rt_msg->rtm_dst_len; + } return UCS_INPROGRESS; } @@ -256,7 +259,8 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter); ucs_array_for_each(curr_entry, iface_rules) { - if (ucs_sockaddr_is_same_subnet( + if ((curr_entry->subnet_prefix_len == 0) || + ucs_sockaddr_is_same_subnet( info->sa_remote, (const struct sockaddr *)&curr_entry->dest, curr_entry->subnet_prefix_len)) { From aa29ae9f8f087940af362772aacb54b5b2170c80 Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Mon, 10 Nov 2025 12:55:17 +0200 Subject: [PATCH 2/9] UCS/NETLINK: don't rely on the default GW if the remote device's link layer is IB --- src/ucs/sys/netlink.c | 33 +++++++++++++++++++++++++++------ src/ucs/sys/netlink.h | 19 ++++++++++++++++++- src/ucs/sys/sys.c | 16 ++++++++++++++++ src/ucs/sys/sys.h | 10 ++++++++++ src/uct/ib/base/ib_iface.c | 5 +++-- src/uct/tcp/tcp.h | 6 +++++- src/uct/tcp/tcp_iface.c | 13 ++++++++++++- 7 files changed, 91 insertions(+), 11 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 6c62ad62341..44bd5eace93 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -29,6 +29,8 @@ typedef struct { const struct sockaddr *sa_remote; int if_index; int found; + int allow_default_gw; /* Allow matching default + gateway routes */ } ucs_netlink_route_info_t; @@ -248,6 +250,7 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) { ucs_netlink_rt_rules_t *iface_rules; ucs_netlink_route_entry_t *curr_entry; + int is_default_gw; khiter_t iter; iter = kh_get(ucs_netlink_rt_cache, &ucs_netlink_routing_table_cache, @@ -259,8 +262,17 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter); ucs_array_for_each(curr_entry, iface_rules) { - if ((curr_entry->subnet_prefix_len == 0) || - ucs_sockaddr_is_same_subnet( + is_default_gw = (curr_entry->subnet_prefix_len == 0); + + /* Skip default gateway routes if not allowed (e.g., for + IPoIB remote devices) */ + if (is_default_gw && !info->allow_default_gw) { + ucs_trace("iface_index=%d: skipping default gateway route", + info->if_index); + continue; + } + + if (is_default_gw || ucs_sockaddr_is_same_subnet( info->sa_remote, (const struct sockaddr *)&curr_entry->dest, curr_entry->subnet_prefix_len)) { @@ -270,7 +282,8 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) } } -int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote) +int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, + int allow_default_gw) { static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER; struct rtmsg rtm = {0}; @@ -289,10 +302,18 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote) NULL); } - info.if_index = if_index; - info.sa_remote = sa_remote; - info.found = 0; + info.if_index = if_index; + info.sa_remote = sa_remote; + info.found = 0; + info.allow_default_gw = allow_default_gw; + ucs_netlink_lookup_route(&info); return info.found; } + +int ucs_netlink_ethernet_device_route_exists(int if_index, + const struct sockaddr *sa_remote) +{ + return ucs_netlink_route_exists(if_index, sa_remote, 1); +} diff --git a/src/ucs/sys/netlink.h b/src/ucs/sys/netlink.h index eabcc265b1d..bf458e2e38a 100644 --- a/src/ucs/sys/netlink.h +++ b/src/ucs/sys/netlink.h @@ -48,6 +48,22 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type, * Check whether a routing table rule exists for a given network * interface name and a destination address. * + * @param [in] if_index A global index representing the network + interface, as assigned by the system + (e.g., obtained via if_nametoindex()). + * @param [in] sa_remote Pointer to the destination address. + * @param [in] allow_default_gw Allow matching default gateway routes (1) or + * only specific subnet routes (0). + * + * @return 1 if rule exists, or 0 otherwise. + */ +int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, + int allow_default_gw); + +/** + * Check whether a routing table rule exists for a given network + * interface name and a destination address (allows default gateway). + * * @param [in] if_index A global index representing the network interface, as assigned by the system (e.g., obtained via if_nametoindex()). @@ -55,7 +71,8 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type, * * @return 1 if rule exists, or 0 otherwise. */ -int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote); +int ucs_netlink_ethernet_device_route_exists(int if_index, + const struct sockaddr *sa_remote); END_C_DECLS diff --git a/src/ucs/sys/sys.c b/src/ucs/sys/sys.c index aa042425909..9903f845e03 100644 --- a/src/ucs/sys/sys.c +++ b/src/ucs/sys/sys.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,21 @@ ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p) return UCS_OK; } +int ucs_netif_is_ipoib(const char *if_name) +{ + struct ifreq ifr; + ucs_status_t status; + + status = ucs_netif_ioctl(if_name, SIOCGIFHWADDR, &ifr); + if (status != UCS_OK) { + /* If we can't determine the hardware type, assume it's not IPoIB */ + ucs_debug("failed to get hardware address for %s", if_name); + return 0; + } + + return (ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND); +} + static uint64_t ucs_get_mac_address() { static uint64_t mac_address = 0; diff --git a/src/ucs/sys/sys.h b/src/ucs/sys/sys.h index 986826660a7..9ad9b8991e4 100644 --- a/src/ucs/sys/sys.h +++ b/src/ucs/sys/sys.h @@ -192,6 +192,16 @@ uint32_t ucs_file_checksum(const char *filename); ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p); +/** + * Check if a network interface is an IPoIB (IP over InfiniBand) device. + * + * @param [in] if_name Network interface name to check. + * + * @return 1 if the interface is IPoIB, 0 otherwise. + */ +int ucs_netif_is_ipoib(const char *if_name); + + /** * Get a globally unique identifier of the machine running the current process. */ diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 3fb49df0964..27707cb74d9 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_route_exists(ndev_index, sa_remote)) { + if (!ucs_netlink_ethernet_device_route_exists(ndev_index, sa_remote)) { /* try to use loopback interface for reachability check, because it may * be used for routing in case of an interface with VRF is configured * and a RoCE IP interface uses this VRF table for routing. @@ -721,7 +721,8 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote)) { + if (!ucs_netlink_ethernet_device_route_exists(lo_ndev_index, + sa_remote)) { uct_iface_fill_info_str_buf(params, "remote address %s is not routable " "neither by interface "UCT_IB_IFACE_FMT diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index e96591b7cee..7d53d33f38b 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -295,7 +295,11 @@ typedef enum uct_tcp_device_addr_flags { * Device address is extended by additional information: * @ref uct_iface_local_addr_ns_t for loopback reachability */ - UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0) + UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0), + /** + * Interface is IPoIB (IP over InfiniBand) + */ + UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB = UCS_BIT(1) } uct_tcp_device_addr_flags_t; diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 2aff8d7895d..33e5f8990a9 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -138,6 +139,10 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface, dev_addr->flags = 0; dev_addr->sa_family = saddr->sa_family; + if (ucs_netif_is_ipoib(iface->if_name)) { + dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB; + } + if (ucs_sockaddr_is_inaddr_loopback(saddr)) { dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK; memset(pack_ptr, 0, sizeof(uct_iface_local_addr_ns_t)); @@ -205,6 +210,7 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface, struct sockaddr_storage remote_addr; char remote_addr_str[UCS_SOCKADDR_STRING_LEN]; unsigned ndev_index; + int allow_default_gw; ucs_status_t status; if (!uct_iface_is_reachable_params_valid( @@ -263,8 +269,13 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface, return 0; } + /* Default gateway is not relevant for IPoIB interfaces */ + allow_default_gw = !(tcp_dev_addr->flags & + UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB); + if (!ucs_netlink_route_exists(ndev_index, - (const struct sockaddr *)&remote_addr)) { + (const struct sockaddr *)&remote_addr, + allow_default_gw)) { uct_iface_fill_info_str_buf( params, "no route to %s", ucs_sockaddr_str((const struct sockaddr *)&remote_addr, From 37f31280ff76ebfc02a8b6109febab8a285c1638 Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Mon, 10 Nov 2025 15:17:01 +0200 Subject: [PATCH 3/9] UCS/NETLINK: coderabbitai fix --- src/ucs/sys/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 44bd5eace93..418920c3640 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -239,10 +239,10 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg) ucs_array_pop_back(iface_rules); return UCS_ERR_IO_ERROR; } - - new_rule->subnet_prefix_len = rt_msg->rtm_dst_len; } + new_rule->subnet_prefix_len = rt_msg->rtm_dst_len; + return UCS_INPROGRESS; } From 41c9c1fd3c804b0bfb5a0f9236748e826d55197b Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Tue, 11 Nov 2025 14:41:43 +0200 Subject: [PATCH 4/9] UCS/NETLINK: code review fixes --- src/ucs/sys/netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 418920c3640..f14d1bcef5a 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -184,12 +184,12 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { if (rta->rta_type == RTA_OIF) { *if_index_p = *((const int *)RTA_DATA(rta)); - } else if ((rta->rta_type == RTA_DST) || (rta->rta_type == RTA_GATEWAY)) { + } else if (rta->rta_type == RTA_DST) { *dst_in_addr = RTA_DATA(rta); } } - if ((*if_index_p == -1) || (*dst_in_addr == NULL)) { + if ((*if_index_p == -1) || ((*dst_in_addr == NULL) && (rtm_dst_len != 0))) { return UCS_ERR_INVALID_PARAM; } @@ -272,7 +272,7 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) continue; } - if (is_default_gw || ucs_sockaddr_is_same_subnet( + if (ucs_sockaddr_is_same_subnet( info->sa_remote, (const struct sockaddr *)&curr_entry->dest, curr_entry->subnet_prefix_len)) { From d2f227e9aee8399409393e6bb0466debaa2cf33f Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Mon, 17 Nov 2025 13:19:22 +0200 Subject: [PATCH 5/9] UCS/NETLINK: code review fixes --- src/ucs/sys/netlink.c | 10 ++++------ src/ucs/sys/netlink.h | 7 ++++--- src/ucs/sys/sys.c | 2 +- src/uct/ib/base/ib_iface.c | 6 +++--- src/uct/tcp/tcp.h | 1 + 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index f14d1bcef5a..1886b9213b2 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -233,7 +233,7 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg) memset(&new_rule->dest, 0, sizeof(new_rule->dest)); new_rule->dest.ss_family = rt_msg->rtm_family; - if (rt_msg->rtm_dst_len != 0) { + if (dst_in_addr != NULL) { if (ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest, dst_in_addr) != UCS_OK) { ucs_array_pop_back(iface_rules); @@ -250,7 +250,6 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) { ucs_netlink_rt_rules_t *iface_rules; ucs_netlink_route_entry_t *curr_entry; - int is_default_gw; khiter_t iter; iter = kh_get(ucs_netlink_rt_cache, &ucs_netlink_routing_table_cache, @@ -262,11 +261,10 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter); ucs_array_for_each(curr_entry, iface_rules) { - is_default_gw = (curr_entry->subnet_prefix_len == 0); /* Skip default gateway routes if not allowed (e.g., for IPoIB remote devices) */ - if (is_default_gw && !info->allow_default_gw) { + if ((curr_entry->subnet_prefix_len == 0) && !info->allow_default_gw) { ucs_trace("iface_index=%d: skipping default gateway route", info->if_index); continue; @@ -312,8 +310,8 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, return info.found; } -int ucs_netlink_ethernet_device_route_exists(int if_index, - const struct sockaddr *sa_remote) +int ucs_netlink_route_exists_allow_default(int if_index, + const struct sockaddr *sa_remote) { return ucs_netlink_route_exists(if_index, sa_remote, 1); } diff --git a/src/ucs/sys/netlink.h b/src/ucs/sys/netlink.h index bf458e2e38a..510da96f969 100644 --- a/src/ucs/sys/netlink.h +++ b/src/ucs/sys/netlink.h @@ -62,7 +62,8 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, /** * Check whether a routing table rule exists for a given network - * interface name and a destination address (allows default gateway). + * interface name and a destination address, while allowing default gateway + * routes. * * @param [in] if_index A global index representing the network interface, as assigned by the system (e.g., obtained via @@ -71,8 +72,8 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, * * @return 1 if rule exists, or 0 otherwise. */ -int ucs_netlink_ethernet_device_route_exists(int if_index, - const struct sockaddr *sa_remote); +int ucs_netlink_route_exists_allow_default(int if_index, + const struct sockaddr *sa_remote); END_C_DECLS diff --git a/src/ucs/sys/sys.c b/src/ucs/sys/sys.c index 9903f845e03..7c8b976b095 100644 --- a/src/ucs/sys/sys.c +++ b/src/ucs/sys/sys.c @@ -190,7 +190,7 @@ int ucs_netif_is_ipoib(const char *if_name) return 0; } - return (ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND); + return ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND; } static uint64_t ucs_get_mac_address() diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 27707cb74d9..29f1052319f 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_ethernet_device_route_exists(ndev_index, sa_remote)) { + if (!ucs_netlink_route_exists_allow_default(ndev_index, sa_remote)) { /* try to use loopback interface for reachability check, because it may * be used for routing in case of an interface with VRF is configured * and a RoCE IP interface uses this VRF table for routing. @@ -721,8 +721,8 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_ethernet_device_route_exists(lo_ndev_index, - sa_remote)) { + if (!ucs_netlink_route_exists_allow_default(lo_ndev_index, + sa_remote)) { uct_iface_fill_info_str_buf(params, "remote address %s is not routable " "neither by interface "UCT_IB_IFACE_FMT diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index 7d53d33f38b..ee71cef9d95 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -296,6 +296,7 @@ typedef enum uct_tcp_device_addr_flags { * @ref uct_iface_local_addr_ns_t for loopback reachability */ UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0), + /** * Interface is IPoIB (IP over InfiniBand) */ From a806b274a537a4c02c1fe05e9806518e55c2b40f Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Mon, 17 Nov 2025 23:22:50 +0200 Subject: [PATCH 6/9] UCS/NETLINK: code review fixes --- src/ucs/sys/netlink.c | 6 ------ src/ucs/sys/netlink.h | 15 --------------- src/uct/ib/base/ib_iface.c | 5 ++--- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 1886b9213b2..c760456c948 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -309,9 +309,3 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, return info.found; } - -int ucs_netlink_route_exists_allow_default(int if_index, - const struct sockaddr *sa_remote) -{ - return ucs_netlink_route_exists(if_index, sa_remote, 1); -} diff --git a/src/ucs/sys/netlink.h b/src/ucs/sys/netlink.h index 510da96f969..5d1103899aa 100644 --- a/src/ucs/sys/netlink.h +++ b/src/ucs/sys/netlink.h @@ -60,21 +60,6 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type, int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote, int allow_default_gw); -/** - * Check whether a routing table rule exists for a given network - * interface name and a destination address, while allowing default gateway - * routes. - * - * @param [in] if_index A global index representing the network interface, - as assigned by the system (e.g., obtained via - if_nametoindex()). - * @param [in] sa_remote Pointer to the destination address. - * - * @return 1 if rule exists, or 0 otherwise. - */ -int ucs_netlink_route_exists_allow_default(int if_index, - const struct sockaddr *sa_remote); - END_C_DECLS #endif /* UCS_NETLINK_H */ diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 29f1052319f..55c847c8eda 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_route_exists_allow_default(ndev_index, sa_remote)) { + if (!ucs_netlink_route_exists(ndev_index, sa_remote, 1)) { /* try to use loopback interface for reachability check, because it may * be used for routing in case of an interface with VRF is configured * and a RoCE IP interface uses this VRF table for routing. @@ -721,8 +721,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index, return 0; } - if (!ucs_netlink_route_exists_allow_default(lo_ndev_index, - sa_remote)) { + if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote, 1)) { uct_iface_fill_info_str_buf(params, "remote address %s is not routable " "neither by interface "UCT_IB_IFACE_FMT From 51056339965033903401f4c089a9c817025a9068 Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Tue, 18 Nov 2025 13:23:02 +0200 Subject: [PATCH 7/9] UCS/NETLINK: use a designated flag to allow using the default GW --- src/ucs/sys/netlink.c | 2 ++ src/uct/tcp/tcp.h | 6 +++--- src/uct/tcp/tcp_iface.c | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index c760456c948..d7940230a15 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -189,6 +189,8 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, } } + /* In some kernel versions, RTA_DST attribute is not set for default + * gateway routes */ if ((*if_index_p == -1) || ((*dst_in_addr == NULL) && (rtm_dst_len != 0))) { return UCS_ERR_INVALID_PARAM; } diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index ee71cef9d95..73df0299862 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -295,12 +295,12 @@ typedef enum uct_tcp_device_addr_flags { * Device address is extended by additional information: * @ref uct_iface_local_addr_ns_t for loopback reachability */ - UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0), + UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0), /** - * Interface is IPoIB (IP over InfiniBand) + * Allow communication with default gateway */ - UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB = UCS_BIT(1) + UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW = UCS_BIT(1) } uct_tcp_device_addr_flags_t; diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 33e5f8990a9..6b9e491ec44 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -139,8 +139,8 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface, dev_addr->flags = 0; dev_addr->sa_family = saddr->sa_family; - if (ucs_netif_is_ipoib(iface->if_name)) { - dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB; + if (!ucs_netif_is_ipoib(iface->if_name)) { + dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW; } if (ucs_sockaddr_is_inaddr_loopback(saddr)) { @@ -270,8 +270,8 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface, } /* Default gateway is not relevant for IPoIB interfaces */ - allow_default_gw = !(tcp_dev_addr->flags & - UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB); + allow_default_gw = !!(tcp_dev_addr->flags & + UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW); if (!ucs_netlink_route_exists(ndev_index, (const struct sockaddr *)&remote_addr, From eb97742ce9ee7c7a403d8e2b036cc1dd12b151db Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Tue, 18 Nov 2025 17:15:37 +0200 Subject: [PATCH 8/9] UCS/NETLINK: minor fixes --- src/ucs/sys/netlink.c | 3 +-- src/uct/tcp/tcp_iface.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index d7940230a15..664a6089f8e 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -189,8 +189,7 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, } } - /* In some kernel versions, RTA_DST attribute is not set for default - * gateway routes */ + /* dst_in_addr is required only for non-default gateway routes */ if ((*if_index_p == -1) || ((*dst_in_addr == NULL) && (rtm_dst_len != 0))) { return UCS_ERR_INVALID_PARAM; } diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 6b9e491ec44..cc2cf010c32 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -139,6 +139,7 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface, dev_addr->flags = 0; dev_addr->sa_family = saddr->sa_family; + /* Default gateway is not relevant for IPoIB interfaces */ if (!ucs_netif_is_ipoib(iface->if_name)) { dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW; } @@ -269,7 +270,6 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface, return 0; } - /* Default gateway is not relevant for IPoIB interfaces */ allow_default_gw = !!(tcp_dev_addr->flags & UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW); From fbcadb48ad87e7d356d15ff322650fb57fa31b12 Mon Sep 17 00:00:00 2001 From: Alma Mastbaum Date: Tue, 18 Nov 2025 21:45:38 +0200 Subject: [PATCH 9/9] UCS/NETLINK: some fixes --- src/ucs/sys/netlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ucs/sys/netlink.c b/src/ucs/sys/netlink.c index 664a6089f8e..c3bc40e516d 100644 --- a/src/ucs/sys/netlink.c +++ b/src/ucs/sys/netlink.c @@ -176,7 +176,7 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type, static ucs_status_t ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, - const void **dst_in_addr, unsigned char rtm_dst_len) + const void **dst_in_addr, size_t rtm_dst_len) { *if_index_p = -1; *dst_in_addr = NULL; @@ -189,8 +189,10 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p, } } - /* dst_in_addr is required only for non-default gateway routes */ - if ((*if_index_p == -1) || ((*dst_in_addr == NULL) && (rtm_dst_len != 0))) { + if (/* Network interface index is not valid */ + (*if_index_p == -1) || + /* dst_in_addr required but not present */ + ((rtm_dst_len != 0) && (*dst_in_addr == NULL))) { return UCS_ERR_INVALID_PARAM; } @@ -263,8 +265,6 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info) iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter); ucs_array_for_each(curr_entry, iface_rules) { - /* Skip default gateway routes if not allowed (e.g., for - IPoIB remote devices) */ if ((curr_entry->subnet_prefix_len == 0) && !info->allow_default_gw) { ucs_trace("iface_index=%d: skipping default gateway route", info->if_index);