Skip to content

Commit 45926a9

Browse files
jrfastabsmb49
authored andcommitted
bpf, sockmap: Pass skb ownership through read_skb
BugLink: https://bugs.launchpad.net/bugs/2028979 [ Upstream commit 78fa0d6 ] The read_skb hook calls consume_skb() now, but this means that if the recv_actor program wants to use the skb it needs to inc the ref cnt so that the consume_skb() doesn't kfree the sk_buff. This is problematic because in some error cases under memory pressure we may need to linearize the sk_buff from sk_psock_skb_ingress_enqueue(). Then we get this, skb_linearize() __pskb_pull_tail() pskb_expand_head() BUG_ON(skb_shared(skb)) Because we incremented users refcnt from sk_psock_verdict_recv() we hit the bug on with refcnt > 1 and trip it. To fix lets simply pass ownership of the sk_buff through the skb_read call. Then we can drop the consume from read_skb handlers and assume the verdict recv does any required kfree. Bug found while testing in our CI which runs in VMs that hit memory constraints rather regularly. William tested TCP read_skb handlers. [ 106.536188] ------------[ cut here ]------------ [ 106.536197] kernel BUG at net/core/skbuff.c:1693! [ 106.536479] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 106.536726] CPU: 3 PID: 1495 Comm: curl Not tainted 5.19.0-rc5 #1 [ 106.537023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.16.0-1 04/01/2014 [ 106.537467] RIP: 0010:pskb_expand_head+0x269/0x330 [ 106.538585] RSP: 0018:ffffc90000138b68 EFLAGS: 00010202 [ 106.538839] RAX: 000000000000003f RBX: ffff8881048940e8 RCX: 0000000000000a20 [ 106.539186] RDX: 0000000000000002 RSI: 0000000000000000 RDI: ffff8881048940e8 [ 106.539529] RBP: ffffc90000138be8 R08: 00000000e161fd1a R09: 0000000000000000 [ 106.539877] R10: 0000000000000018 R11: 0000000000000000 R12: ffff8881048940e8 [ 106.540222] R13: 0000000000000003 R14: 0000000000000000 R15: ffff8881048940e8 [ 106.540568] FS: 00007f277dde9f00(0000) GS:ffff88813bd80000(0000) knlGS:0000000000000000 [ 106.540954] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 106.541227] CR2: 00007f277eeede64 CR3: 000000000ad3e000 CR4: 00000000000006e0 [ 106.541569] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 106.541915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 106.542255] Call Trace: [ 106.542383] <IRQ> [ 106.542487] __pskb_pull_tail+0x4b/0x3e0 [ 106.542681] skb_ensure_writable+0x85/0xa0 [ 106.542882] sk_skb_pull_data+0x18/0x20 [ 106.543084] bpf_prog_b517a65a242018b0_bpf_skskb_http_verdict+0x3a9/0x4aa9 [ 106.543536] ? migrate_disable+0x66/0x80 [ 106.543871] sk_psock_verdict_recv+0xe2/0x310 [ 106.544258] ? sk_psock_write_space+0x1f0/0x1f0 [ 106.544561] tcp_read_skb+0x7b/0x120 [ 106.544740] tcp_data_queue+0x904/0xee0 [ 106.544931] tcp_rcv_established+0x212/0x7c0 [ 106.545142] tcp_v4_do_rcv+0x174/0x2a0 [ 106.545326] tcp_v4_rcv+0xe70/0xf60 [ 106.545500] ip_protocol_deliver_rcu+0x48/0x290 [ 106.545744] ip_local_deliver_finish+0xa7/0x150 Fixes: 04919be ("tcp: Introduce tcp_read_skb()") Reported-by: William Findlay <will@isovalent.com> Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Tested-by: William Findlay <will@isovalent.com> Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com> Link: https://lore.kernel.org/bpf/20230523025618.113937-2-john.fastabend@gmail.com Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Kamal Mostafa <kamal@canonical.com> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
1 parent e0d3cb2 commit 45926a9

File tree

4 files changed

+4
-13
lines changed

4 files changed

+4
-13
lines changed

net/core/skmsg.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,8 +1180,6 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
11801180
int ret = __SK_DROP;
11811181
int len = skb->len;
11821182

1183-
skb_get(skb);
1184-
11851183
rcu_read_lock();
11861184
psock = sk_psock(sk);
11871185
if (unlikely(!psock)) {

net/ipv4/tcp.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1772,7 +1772,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
17721772
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
17731773
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
17741774
used = recv_actor(sk, skb);
1775-
consume_skb(skb);
17761775
if (used < 0) {
17771776
if (!copied)
17781777
copied = used;

net/ipv4/udp.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,7 +1813,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
18131813
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
18141814
{
18151815
struct sk_buff *skb;
1816-
int err, copied;
1816+
int err;
18171817

18181818
try_again:
18191819
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@@ -1832,10 +1832,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
18321832
}
18331833

18341834
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
1835-
copied = recv_actor(sk, skb);
1836-
kfree_skb(skb);
1837-
1838-
return copied;
1835+
return recv_actor(sk, skb);
18391836
}
18401837
EXPORT_SYMBOL(udp_read_skb);
18411838

net/unix/af_unix.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,18 +2552,15 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
25522552
{
25532553
struct unix_sock *u = unix_sk(sk);
25542554
struct sk_buff *skb;
2555-
int err, copied;
2555+
int err;
25562556

25572557
mutex_lock(&u->iolock);
25582558
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
25592559
mutex_unlock(&u->iolock);
25602560
if (!skb)
25612561
return err;
25622562

2563-
copied = recv_actor(sk, skb);
2564-
kfree_skb(skb);
2565-
2566-
return copied;
2563+
return recv_actor(sk, skb);
25672564
}
25682565

25692566
/*

0 commit comments

Comments
 (0)