Skip to content

Commit 83ce439

Browse files
amastbaumgleon99
authored andcommitted
Added default gateway support to the routing table reachability check (openucx#11000)
1 parent 48e9e47 commit 83ce439

File tree

7 files changed

+84
-21
lines changed

7 files changed

+84
-21
lines changed

src/ucs/sys/netlink.c

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ typedef struct {
2929
const struct sockaddr *sa_remote;
3030
int if_index;
3131
int found;
32+
int allow_default_gw; /* Allow matching default
33+
gateway routes */
3234
} ucs_netlink_route_info_t;
3335

3436

@@ -174,7 +176,7 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type,
174176

175177
static ucs_status_t
176178
ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p,
177-
const void **dst_in_addr)
179+
const void **dst_in_addr, size_t rtm_dst_len)
178180
{
179181
*if_index_p = -1;
180182
*dst_in_addr = NULL;
@@ -187,7 +189,10 @@ ucs_netlink_get_route_info(const struct rtattr *rta, int len, int *if_index_p,
187189
}
188190
}
189191

190-
if ((*if_index_p == -1) || (*dst_in_addr == NULL)) {
192+
if (/* Network interface index is not valid */
193+
(*if_index_p == -1) ||
194+
/* dst_in_addr required but not present */
195+
((rtm_dst_len != 0) && (*dst_in_addr == NULL))) {
191196
return UCS_ERR_INVALID_PARAM;
192197
}
193198

@@ -206,7 +211,8 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg)
206211
int khret;
207212

208213
if (ucs_netlink_get_route_info(RTM_RTA(rt_msg), RTM_PAYLOAD(nlh),
209-
&iface_index, &dst_in_addr) != UCS_OK) {
214+
&iface_index, &dst_in_addr,
215+
rt_msg->rtm_dst_len) != UCS_OK) {
210216
return UCS_INPROGRESS;
211217
}
212218

@@ -228,12 +234,14 @@ ucs_netlink_parse_rt_entry_cb(const struct nlmsghdr *nlh, void *arg)
228234
ucs_error("could not allocate route entry");
229235
return UCS_ERR_NO_MEMORY);
230236

231-
memset(&new_rule->dest, 0, sizeof(sizeof(new_rule->dest)));
237+
memset(&new_rule->dest, 0, sizeof(new_rule->dest));
232238
new_rule->dest.ss_family = rt_msg->rtm_family;
233-
if (UCS_OK != ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest,
234-
dst_in_addr)) {
235-
ucs_array_pop_back(iface_rules);
236-
return UCS_ERR_IO_ERROR;
239+
if (dst_in_addr != NULL) {
240+
if (ucs_sockaddr_set_inet_addr((struct sockaddr *)&new_rule->dest,
241+
dst_in_addr) != UCS_OK) {
242+
ucs_array_pop_back(iface_rules);
243+
return UCS_ERR_IO_ERROR;
244+
}
237245
}
238246

239247
new_rule->subnet_prefix_len = rt_msg->rtm_dst_len;
@@ -256,6 +264,13 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
256264

257265
iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter);
258266
ucs_array_for_each(curr_entry, iface_rules) {
267+
268+
if ((curr_entry->subnet_prefix_len == 0) && !info->allow_default_gw) {
269+
ucs_trace("iface_index=%d: skipping default gateway route",
270+
info->if_index);
271+
continue;
272+
}
273+
259274
if (ucs_sockaddr_is_same_subnet(
260275
info->sa_remote,
261276
(const struct sockaddr *)&curr_entry->dest,
@@ -266,7 +281,8 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
266281
}
267282
}
268283

269-
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
284+
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
285+
int allow_default_gw)
270286
{
271287
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
272288
struct rtmsg rtm = {0};
@@ -285,9 +301,11 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
285301
NULL);
286302
}
287303

288-
info.if_index = if_index;
289-
info.sa_remote = sa_remote;
290-
info.found = 0;
304+
info.if_index = if_index;
305+
info.sa_remote = sa_remote;
306+
info.found = 0;
307+
info.allow_default_gw = allow_default_gw;
308+
291309
ucs_netlink_lookup_route(&info);
292310

293311
return info.found;

src/ucs/sys/netlink.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,17 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type,
4848
* Check whether a routing table rule exists for a given network
4949
* interface name and a destination address.
5050
*
51-
* @param [in] if_index A global index representing the network interface,
52-
as assigned by the system (e.g., obtained via
53-
if_nametoindex()).
54-
* @param [in] sa_remote Pointer to the destination address.
51+
* @param [in] if_index A global index representing the network
52+
interface, as assigned by the system
53+
(e.g., obtained via if_nametoindex()).
54+
* @param [in] sa_remote Pointer to the destination address.
55+
* @param [in] allow_default_gw Allow matching default gateway routes (1) or
56+
* only specific subnet routes (0).
5557
*
5658
* @return 1 if rule exists, or 0 otherwise.
5759
*/
58-
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote);
60+
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
61+
int allow_default_gw);
5962

6063
END_C_DECLS
6164

src/ucs/sys/sys.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ucs/sys/checker.h>
1616
#include <ucs/sys/ptr_arith.h>
1717
#include <ucs/sys/string.h>
18+
#include <ucs/sys/sock.h>
1819
#include <ucs/sys/sys.h>
1920
#include <ucs/debug/log.h>
2021
#include <ucs/time/time.h>
@@ -177,6 +178,21 @@ ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p)
177178
return UCS_OK;
178179
}
179180

181+
int ucs_netif_is_ipoib(const char *if_name)
182+
{
183+
struct ifreq ifr;
184+
ucs_status_t status;
185+
186+
status = ucs_netif_ioctl(if_name, SIOCGIFHWADDR, &ifr);
187+
if (status != UCS_OK) {
188+
/* If we can't determine the hardware type, assume it's not IPoIB */
189+
ucs_debug("failed to get hardware address for %s", if_name);
190+
return 0;
191+
}
192+
193+
return ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND;
194+
}
195+
180196
static uint64_t ucs_get_mac_address()
181197
{
182198
static uint64_t mac_address = 0;

src/ucs/sys/sys.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,16 @@ uint32_t ucs_file_checksum(const char *filename);
192192
ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p);
193193

194194

195+
/**
196+
* Check if a network interface is an IPoIB (IP over InfiniBand) device.
197+
*
198+
* @param [in] if_name Network interface name to check.
199+
*
200+
* @return 1 if the interface is IPoIB, 0 otherwise.
201+
*/
202+
int ucs_netif_is_ipoib(const char *if_name);
203+
204+
195205
/**
196206
* Get a globally unique identifier of the machine running the current process.
197207
*/

src/uct/ib/base/ib_iface.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
710710
return 0;
711711
}
712712

713-
if (!ucs_netlink_route_exists(ndev_index, sa_remote)) {
713+
if (!ucs_netlink_route_exists(ndev_index, sa_remote, 1)) {
714714
/* try to use loopback interface for reachability check, because it may
715715
* be used for routing in case of an interface with VRF is configured
716716
* and a RoCE IP interface uses this VRF table for routing.
@@ -721,7 +721,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
721721
return 0;
722722
}
723723

724-
if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote)) {
724+
if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote, 1)) {
725725
uct_iface_fill_info_str_buf(params,
726726
"remote address %s is not routable "
727727
"neither by interface "UCT_IB_IFACE_FMT

src/uct/tcp/tcp.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,12 @@ typedef enum uct_tcp_device_addr_flags {
295295
* Device address is extended by additional information:
296296
* @ref uct_iface_local_addr_ns_t for loopback reachability
297297
*/
298-
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0)
298+
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0),
299+
300+
/**
301+
* Allow communication with default gateway
302+
*/
303+
UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW = UCS_BIT(1)
299304
} uct_tcp_device_addr_flags_t;
300305

301306

src/uct/tcp/tcp_iface.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <ucs/async/async.h>
1414
#include <ucs/sys/netlink.h>
1515
#include <ucs/sys/string.h>
16+
#include <ucs/sys/sys.h>
1617
#include <ucs/config/types.h>
1718
#include <sys/socket.h>
1819
#include <sys/poll.h>
@@ -138,6 +139,11 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface,
138139
dev_addr->flags = 0;
139140
dev_addr->sa_family = saddr->sa_family;
140141

142+
/* Default gateway is not relevant for IPoIB interfaces */
143+
if (!ucs_netif_is_ipoib(iface->if_name)) {
144+
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW;
145+
}
146+
141147
if (ucs_sockaddr_is_inaddr_loopback(saddr)) {
142148
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK;
143149
memset(pack_ptr, 0, sizeof(uct_iface_local_addr_ns_t));
@@ -205,6 +211,7 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
205211
struct sockaddr_storage remote_addr;
206212
char remote_addr_str[UCS_SOCKADDR_STRING_LEN];
207213
unsigned ndev_index;
214+
int allow_default_gw;
208215
ucs_status_t status;
209216

210217
if (!uct_iface_is_reachable_params_valid(
@@ -263,8 +270,12 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
263270
return 0;
264271
}
265272

273+
allow_default_gw = !!(tcp_dev_addr->flags &
274+
UCT_TCP_DEVICE_ADDR_FLAG_ALLOW_DEFAULT_GW);
275+
266276
if (!ucs_netlink_route_exists(ndev_index,
267-
(const struct sockaddr *)&remote_addr)) {
277+
(const struct sockaddr *)&remote_addr,
278+
allow_default_gw)) {
268279
uct_iface_fill_info_str_buf(
269280
params, "no route to %s",
270281
ucs_sockaddr_str((const struct sockaddr *)&remote_addr,

0 commit comments

Comments
 (0)