Skip to content

Commit aa29ae9

Browse files
committed
UCS/NETLINK: don't rely on the default GW if the remote device's link layer is IB
1 parent 71e990c commit aa29ae9

File tree

7 files changed

+91
-11
lines changed

7 files changed

+91
-11
lines changed

src/ucs/sys/netlink.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ typedef struct {
2929
const struct sockaddr *sa_remote;
3030
int if_index;
3131
int found;
32+
int allow_default_gw; /* Allow matching default
33+
gateway routes */
3234
} ucs_netlink_route_info_t;
3335

3436

@@ -248,6 +250,7 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
248250
{
249251
ucs_netlink_rt_rules_t *iface_rules;
250252
ucs_netlink_route_entry_t *curr_entry;
253+
int is_default_gw;
251254
khiter_t iter;
252255

253256
iter = kh_get(ucs_netlink_rt_cache, &ucs_netlink_routing_table_cache,
@@ -259,8 +262,17 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
259262

260263
iface_rules = &kh_val(&ucs_netlink_routing_table_cache, iter);
261264
ucs_array_for_each(curr_entry, iface_rules) {
262-
if ((curr_entry->subnet_prefix_len == 0) ||
263-
ucs_sockaddr_is_same_subnet(
265+
is_default_gw = (curr_entry->subnet_prefix_len == 0);
266+
267+
/* Skip default gateway routes if not allowed (e.g., for
268+
IPoIB remote devices) */
269+
if (is_default_gw && !info->allow_default_gw) {
270+
ucs_trace("iface_index=%d: skipping default gateway route",
271+
info->if_index);
272+
continue;
273+
}
274+
275+
if (is_default_gw || ucs_sockaddr_is_same_subnet(
264276
info->sa_remote,
265277
(const struct sockaddr *)&curr_entry->dest,
266278
curr_entry->subnet_prefix_len)) {
@@ -270,7 +282,8 @@ static void ucs_netlink_lookup_route(ucs_netlink_route_info_t *info)
270282
}
271283
}
272284

273-
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
285+
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
286+
int allow_default_gw)
274287
{
275288
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
276289
struct rtmsg rtm = {0};
@@ -289,10 +302,18 @@ int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote)
289302
NULL);
290303
}
291304

292-
info.if_index = if_index;
293-
info.sa_remote = sa_remote;
294-
info.found = 0;
305+
info.if_index = if_index;
306+
info.sa_remote = sa_remote;
307+
info.found = 0;
308+
info.allow_default_gw = allow_default_gw;
309+
295310
ucs_netlink_lookup_route(&info);
296311

297312
return info.found;
298313
}
314+
315+
int ucs_netlink_ethernet_device_route_exists(int if_index,
316+
const struct sockaddr *sa_remote)
317+
{
318+
return ucs_netlink_route_exists(if_index, sa_remote, 1);
319+
}

src/ucs/sys/netlink.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,31 @@ ucs_netlink_send_request(int protocol, unsigned short nlmsg_type,
4848
* Check whether a routing table rule exists for a given network
4949
* interface name and a destination address.
5050
*
51+
* @param [in] if_index A global index representing the network
52+
interface, as assigned by the system
53+
(e.g., obtained via if_nametoindex()).
54+
* @param [in] sa_remote Pointer to the destination address.
55+
* @param [in] allow_default_gw Allow matching default gateway routes (1) or
56+
* only specific subnet routes (0).
57+
*
58+
* @return 1 if rule exists, or 0 otherwise.
59+
*/
60+
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote,
61+
int allow_default_gw);
62+
63+
/**
64+
* Check whether a routing table rule exists for a given network
65+
* interface name and a destination address (allows default gateway).
66+
*
5167
* @param [in] if_index A global index representing the network interface,
5268
as assigned by the system (e.g., obtained via
5369
if_nametoindex()).
5470
* @param [in] sa_remote Pointer to the destination address.
5571
*
5672
* @return 1 if rule exists, or 0 otherwise.
5773
*/
58-
int ucs_netlink_route_exists(int if_index, const struct sockaddr *sa_remote);
74+
int ucs_netlink_ethernet_device_route_exists(int if_index,
75+
const struct sockaddr *sa_remote);
5976

6077
END_C_DECLS
6178

src/ucs/sys/sys.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ucs/sys/checker.h>
1616
#include <ucs/sys/ptr_arith.h>
1717
#include <ucs/sys/string.h>
18+
#include <ucs/sys/sock.h>
1819
#include <ucs/sys/sys.h>
1920
#include <ucs/debug/log.h>
2021
#include <ucs/time/time.h>
@@ -177,6 +178,21 @@ ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p)
177178
return UCS_OK;
178179
}
179180

181+
int ucs_netif_is_ipoib(const char *if_name)
182+
{
183+
struct ifreq ifr;
184+
ucs_status_t status;
185+
186+
status = ucs_netif_ioctl(if_name, SIOCGIFHWADDR, &ifr);
187+
if (status != UCS_OK) {
188+
/* If we can't determine the hardware type, assume it's not IPoIB */
189+
ucs_debug("failed to get hardware address for %s", if_name);
190+
return 0;
191+
}
192+
193+
return (ifr.ifr_hwaddr.sa_family == ARPHRD_INFINIBAND);
194+
}
195+
180196
static uint64_t ucs_get_mac_address()
181197
{
182198
static uint64_t mac_address = 0;

src/ucs/sys/sys.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,16 @@ uint32_t ucs_file_checksum(const char *filename);
192192
ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p);
193193

194194

195+
/**
196+
* Check if a network interface is an IPoIB (IP over InfiniBand) device.
197+
*
198+
* @param [in] if_name Network interface name to check.
199+
*
200+
* @return 1 if the interface is IPoIB, 0 otherwise.
201+
*/
202+
int ucs_netif_is_ipoib(const char *if_name);
203+
204+
195205
/**
196206
* Get a globally unique identifier of the machine running the current process.
197207
*/

src/uct/ib/base/ib_iface.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
710710
return 0;
711711
}
712712

713-
if (!ucs_netlink_route_exists(ndev_index, sa_remote)) {
713+
if (!ucs_netlink_ethernet_device_route_exists(ndev_index, sa_remote)) {
714714
/* try to use loopback interface for reachability check, because it may
715715
* be used for routing in case of an interface with VRF is configured
716716
* and a RoCE IP interface uses this VRF table for routing.
@@ -721,7 +721,8 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
721721
return 0;
722722
}
723723

724-
if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote)) {
724+
if (!ucs_netlink_ethernet_device_route_exists(lo_ndev_index,
725+
sa_remote)) {
725726
uct_iface_fill_info_str_buf(params,
726727
"remote address %s is not routable "
727728
"neither by interface "UCT_IB_IFACE_FMT

src/uct/tcp/tcp.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,11 @@ typedef enum uct_tcp_device_addr_flags {
295295
* Device address is extended by additional information:
296296
* @ref uct_iface_local_addr_ns_t for loopback reachability
297297
*/
298-
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0)
298+
UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK = UCS_BIT(0),
299+
/**
300+
* Interface is IPoIB (IP over InfiniBand)
301+
*/
302+
UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB = UCS_BIT(1)
299303
} uct_tcp_device_addr_flags_t;
300304

301305

src/uct/tcp/tcp_iface.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <ucs/async/async.h>
1414
#include <ucs/sys/netlink.h>
1515
#include <ucs/sys/string.h>
16+
#include <ucs/sys/sys.h>
1617
#include <ucs/config/types.h>
1718
#include <sys/socket.h>
1819
#include <sys/poll.h>
@@ -138,6 +139,10 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface,
138139
dev_addr->flags = 0;
139140
dev_addr->sa_family = saddr->sa_family;
140141

142+
if (ucs_netif_is_ipoib(iface->if_name)) {
143+
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB;
144+
}
145+
141146
if (ucs_sockaddr_is_inaddr_loopback(saddr)) {
142147
dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK;
143148
memset(pack_ptr, 0, sizeof(uct_iface_local_addr_ns_t));
@@ -205,6 +210,7 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
205210
struct sockaddr_storage remote_addr;
206211
char remote_addr_str[UCS_SOCKADDR_STRING_LEN];
207212
unsigned ndev_index;
213+
int allow_default_gw;
208214
ucs_status_t status;
209215

210216
if (!uct_iface_is_reachable_params_valid(
@@ -263,8 +269,13 @@ uct_tcp_iface_is_reachable_v2(const uct_iface_h tl_iface,
263269
return 0;
264270
}
265271

272+
/* Default gateway is not relevant for IPoIB interfaces */
273+
allow_default_gw = !(tcp_dev_addr->flags &
274+
UCT_TCP_DEVICE_ADDR_FLAG_LINK_LAYER_IB);
275+
266276
if (!ucs_netlink_route_exists(ndev_index,
267-
(const struct sockaddr *)&remote_addr)) {
277+
(const struct sockaddr *)&remote_addr,
278+
allow_default_gw)) {
268279
uct_iface_fill_info_str_buf(
269280
params, "no route to %s",
270281
ucs_sockaddr_str((const struct sockaddr *)&remote_addr,

0 commit comments

Comments
 (0)