Skip to content

Commit 91a78ce

Browse files
author
Martin KaFai Lau
committed
Merge branch 'mptcp-fix-conflicts-between-mptcp-and-sockmap'
Jiayuan Chen says: ==================== mptcp: Fix conflicts between MPTCP and sockmap Overall, we encountered a warning [1] that can be triggered by running the selftest I provided. sockmap works by replacing sk_data_ready, recvmsg, sendmsg operations and implementing fast socket-level forwarding logic: 1. Users can obtain file descriptors through userspace socket()/accept() interfaces, then call BPF syscall to perform these replacements. 2. Users can also use the bpf_sock_hash_update helper (in sockops programs) to replace handlers when TCP connections enter ESTABLISHED state (BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB/BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) However, when combined with MPTCP, an issue arises: MPTCP creates subflow sk's and performs TCP handshakes, so the BPF program obtains subflow sk's and may incorrectly replace their sk_prot. We need to reject such operations. In patch 1, we set psock_update_sk_prot to NULL in the subflow's custom sk_prot. Additionally, if the server's listening socket has MPTCP enabled and the client's TCP also uses MPTCP, we should allow the combination of subflow and sockmap. This is because the latest Golang programs have enabled MPTCP for listening sockets by default [2]. For programs already using sockmap, upgrading Golang should not cause sockmap functionality to fail. Patch 2 prevents the WARNING from occurring. Despite these patches fixing stream corruption, users of sockmap must set GODEBUG=multipathtcp=0 to disable MPTCP until sockmap fully supports it. [1] truncated warning: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 388 at net/mptcp/protocol.c:68 mptcp_stream_accept+0x34c/0x380 Modules linked in: RIP: 0010:mptcp_stream_accept+0x34c/0x380 RSP: 0018:ffffc90000cf3cf8 EFLAGS: 00010202 PKRU: 55555554 Call Trace: <TASK> do_accept+0xeb/0x190 ? __x64_sys_pselect6+0x61/0x80 ? _raw_spin_unlock+0x12/0x30 ? alloc_fd+0x11e/0x190 __sys_accept4+0x8c/0x100 __x64_sys_accept+0x1f/0x30 x64_sys_call+0x202f/0x20f0 do_syscall_64+0x72/0x9a0 ? switch_fpu_return+0x60/0xf0 ? irqentry_exit_to_user_mode+0xdb/0x1e0 ? irqentry_exit+0x3f/0x50 ? clear_bhb_loop+0x50/0xa0 ? clear_bhb_loop+0x50/0xa0 ? clear_bhb_loop+0x50/0xa0 entry_SYSCALL_64_after_hwframe+0x76/0x7e </TASK> ---[ end trace 0000000000000000 ]--- [2]: https://go-review.googlesource.com/c/go/+/607715 ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents e427054 + cb730e4 commit 91a78ce

File tree

4 files changed

+195
-2
lines changed

4 files changed

+195
-2
lines changed

net/mptcp/protocol.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
6161

6262
static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
6363
{
64+
unsigned short family = READ_ONCE(sk->sk_family);
65+
6466
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
65-
if (sk->sk_prot == &tcpv6_prot)
67+
if (family == AF_INET6)
6668
return &inet6_stream_ops;
6769
#endif
68-
WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
70+
WARN_ON_ONCE(family != AF_INET);
6971
return &inet_stream_ops;
7072
}
7173

net/mptcp/subflow.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void)
21442144
tcp_prot_override = tcp_prot;
21452145
tcp_prot_override.release_cb = tcp_release_cb_override;
21462146
tcp_prot_override.diag_destroy = tcp_abort_override;
2147+
#ifdef CONFIG_BPF_SYSCALL
2148+
/* Disable sockmap processing for subflows */
2149+
tcp_prot_override.psock_update_sk_prot = NULL;
2150+
#endif
21472151

21482152
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
21492153
/* In struct mptcp_subflow_request_sock, we assume the TCP request sock
@@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void)
21802184
tcpv6_prot_override = tcpv6_prot;
21812185
tcpv6_prot_override.release_cb = tcp_release_cb_override;
21822186
tcpv6_prot_override.diag_destroy = tcp_abort_override;
2187+
#ifdef CONFIG_BPF_SYSCALL
2188+
/* Disable sockmap processing for subflows */
2189+
tcpv6_prot_override.psock_update_sk_prot = NULL;
2190+
#endif
21832191
#endif
21842192

21852193
mptcp_diag_subflow_init(&subflow_ulp_ops);

tools/testing/selftests/bpf/prog_tests/mptcp.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
#include <netinet/in.h>
77
#include <test_progs.h>
88
#include <unistd.h>
9+
#include <errno.h>
910
#include "cgroup_helpers.h"
1011
#include "network_helpers.h"
1112
#include "mptcp_sock.skel.h"
1213
#include "mptcpify.skel.h"
1314
#include "mptcp_subflow.skel.h"
15+
#include "mptcp_sockmap.skel.h"
1416

1517
#define NS_TEST "mptcp_ns"
1618
#define ADDR_1 "10.0.1.1"
@@ -436,6 +438,142 @@ static void test_subflow(void)
436438
close(cgroup_fd);
437439
}
438440

441+
/* Test sockmap on MPTCP server handling non-mp-capable clients. */
442+
static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
443+
{
444+
int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
445+
int server_fd1 = -1, server_fd2 = -1, sent, recvd;
446+
char snd[9] = "123456789";
447+
char rcv[10];
448+
449+
/* start server with MPTCP enabled */
450+
listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
451+
if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
452+
return;
453+
454+
skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
455+
skel->bss->sk_index = 0;
456+
/* create client without MPTCP enabled */
457+
client_fd1 = connect_to_fd_opts(listen_fd, NULL);
458+
if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
459+
goto end;
460+
461+
server_fd1 = accept(listen_fd, NULL, 0);
462+
skel->bss->sk_index = 1;
463+
client_fd2 = connect_to_fd_opts(listen_fd, NULL);
464+
if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
465+
goto end;
466+
467+
server_fd2 = accept(listen_fd, NULL, 0);
468+
/* test normal redirect behavior: data sent by client_fd1 can be
469+
* received by client_fd2
470+
*/
471+
skel->bss->redirect_idx = 1;
472+
sent = send(client_fd1, snd, sizeof(snd), 0);
473+
if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
474+
goto end;
475+
476+
/* try to recv more bytes to avoid truncation check */
477+
recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
478+
if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
479+
goto end;
480+
481+
end:
482+
if (client_fd1 >= 0)
483+
close(client_fd1);
484+
if (client_fd2 >= 0)
485+
close(client_fd2);
486+
if (server_fd1 >= 0)
487+
close(server_fd1);
488+
if (server_fd2 >= 0)
489+
close(server_fd2);
490+
close(listen_fd);
491+
}
492+
493+
/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
494+
static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
495+
{
496+
int listen_fd = -1, server_fd = -1, client_fd1 = -1;
497+
int err, zero = 0;
498+
499+
/* start server with MPTCP enabled */
500+
listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
501+
if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
502+
return;
503+
504+
skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
505+
skel->bss->sk_index = 0;
506+
/* create client with MPTCP enabled */
507+
client_fd1 = connect_to_fd(listen_fd, 0);
508+
if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
509+
goto end;
510+
511+
/* bpf_sock_map_update() called from sockops should reject MPTCP sk */
512+
if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
513+
goto end;
514+
515+
server_fd = accept(listen_fd, NULL, 0);
516+
err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
517+
&zero, &server_fd, BPF_NOEXIST);
518+
if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
519+
goto end;
520+
521+
/* MPTCP client should also be disallowed */
522+
err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
523+
&zero, &client_fd1, BPF_NOEXIST);
524+
if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
525+
goto end;
526+
end:
527+
if (client_fd1 >= 0)
528+
close(client_fd1);
529+
if (server_fd >= 0)
530+
close(server_fd);
531+
close(listen_fd);
532+
}
533+
534+
static void test_mptcp_sockmap(void)
535+
{
536+
struct mptcp_sockmap *skel;
537+
struct netns_obj *netns;
538+
int cgroup_fd, err;
539+
540+
cgroup_fd = test__join_cgroup("/mptcp_sockmap");
541+
if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
542+
return;
543+
544+
skel = mptcp_sockmap__open_and_load();
545+
if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
546+
goto close_cgroup;
547+
548+
skel->links.mptcp_sockmap_inject =
549+
bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
550+
if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
551+
goto skel_destroy;
552+
553+
err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
554+
bpf_map__fd(skel->maps.sock_map),
555+
BPF_SK_SKB_STREAM_VERDICT, 0);
556+
if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
557+
goto skel_destroy;
558+
559+
netns = netns_new(NS_TEST, true);
560+
if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
561+
goto skel_destroy;
562+
563+
if (endpoint_init("subflow") < 0)
564+
goto close_netns;
565+
566+
test_sockmap_with_mptcp_fallback(skel);
567+
test_sockmap_reject_mptcp(skel);
568+
569+
close_netns:
570+
netns_free(netns);
571+
skel_destroy:
572+
mptcp_sockmap__destroy(skel);
573+
close_cgroup:
574+
close(cgroup_fd);
575+
}
576+
439577
void test_mptcp(void)
440578
{
441579
if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
444582
test_mptcpify();
445583
if (test__start_subtest("subflow"))
446584
test_subflow();
585+
if (test__start_subtest("sockmap"))
586+
test_mptcp_sockmap();
447587
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include "bpf_tracing_net.h"
4+
5+
char _license[] SEC("license") = "GPL";
6+
7+
int sk_index;
8+
int redirect_idx;
9+
int trace_port;
10+
int helper_ret;
11+
struct {
12+
__uint(type, BPF_MAP_TYPE_SOCKMAP);
13+
__uint(key_size, sizeof(__u32));
14+
__uint(value_size, sizeof(__u32));
15+
__uint(max_entries, 100);
16+
} sock_map SEC(".maps");
17+
18+
SEC("sockops")
19+
int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
20+
{
21+
struct bpf_sock *sk;
22+
23+
/* only accept specified connection */
24+
if (skops->local_port != trace_port ||
25+
skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
26+
return 1;
27+
28+
sk = skops->sk;
29+
if (!sk)
30+
return 1;
31+
32+
/* update sk handler */
33+
helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
34+
35+
return 1;
36+
}
37+
38+
SEC("sk_skb/stream_verdict")
39+
int mptcp_sockmap_redirect(struct __sk_buff *skb)
40+
{
41+
/* redirect skb to the sk under sock_map[redirect_idx] */
42+
return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
43+
}

0 commit comments

Comments
 (0)