Skip to content

Commit 78fa0d6

Browse files
jrfastabborkmann
authored andcommitted
bpf, sockmap: Pass skb ownership through read_skb
The read_skb hook calls consume_skb() now, but this means that if the recv_actor program wants to use the skb it needs to inc the ref cnt so that the consume_skb() doesn't kfree the sk_buff. This is problematic because in some error cases under memory pressure we may need to linearize the sk_buff from sk_psock_skb_ingress_enqueue(). Then we get this, skb_linearize() __pskb_pull_tail() pskb_expand_head() BUG_ON(skb_shared(skb)) Because we incremented users refcnt from sk_psock_verdict_recv() we hit the bug on with refcnt > 1 and trip it. To fix lets simply pass ownership of the sk_buff through the skb_read call. Then we can drop the consume from read_skb handlers and assume the verdict recv does any required kfree. Bug found while testing in our CI which runs in VMs that hit memory constraints rather regularly. William tested TCP read_skb handlers. [ 106.536188] ------------[ cut here ]------------ [ 106.536197] kernel BUG at net/core/skbuff.c:1693! [ 106.536479] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 106.536726] CPU: 3 PID: 1495 Comm: curl Not tainted 5.19.0-rc5 #1 [ 106.537023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.16.0-1 04/01/2014 [ 106.537467] RIP: 0010:pskb_expand_head+0x269/0x330 [ 106.538585] RSP: 0018:ffffc90000138b68 EFLAGS: 00010202 [ 106.538839] RAX: 000000000000003f RBX: ffff8881048940e8 RCX: 0000000000000a20 [ 106.539186] RDX: 0000000000000002 RSI: 0000000000000000 RDI: ffff8881048940e8 [ 106.539529] RBP: ffffc90000138be8 R08: 00000000e161fd1a R09: 0000000000000000 [ 106.539877] R10: 0000000000000018 R11: 0000000000000000 R12: ffff8881048940e8 [ 106.540222] R13: 0000000000000003 R14: 0000000000000000 R15: ffff8881048940e8 [ 106.540568] FS: 00007f277dde9f00(0000) GS:ffff88813bd80000(0000) knlGS:0000000000000000 [ 106.540954] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 106.541227] CR2: 00007f277eeede64 CR3: 000000000ad3e000 CR4: 00000000000006e0 [ 106.541569] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 106.541915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 106.542255] Call Trace: [ 106.542383] <IRQ> [ 106.542487] __pskb_pull_tail+0x4b/0x3e0 [ 106.542681] skb_ensure_writable+0x85/0xa0 [ 106.542882] sk_skb_pull_data+0x18/0x20 [ 106.543084] bpf_prog_b517a65a242018b0_bpf_skskb_http_verdict+0x3a9/0x4aa9 [ 106.543536] ? migrate_disable+0x66/0x80 [ 106.543871] sk_psock_verdict_recv+0xe2/0x310 [ 106.544258] ? sk_psock_write_space+0x1f0/0x1f0 [ 106.544561] tcp_read_skb+0x7b/0x120 [ 106.544740] tcp_data_queue+0x904/0xee0 [ 106.544931] tcp_rcv_established+0x212/0x7c0 [ 106.545142] tcp_v4_do_rcv+0x174/0x2a0 [ 106.545326] tcp_v4_rcv+0xe70/0xf60 [ 106.545500] ip_protocol_deliver_rcu+0x48/0x290 [ 106.545744] ip_local_deliver_finish+0xa7/0x150 Fixes: 04919be ("tcp: Introduce tcp_read_skb()") Reported-by: William Findlay <[email protected]> Signed-off-by: John Fastabend <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Tested-by: William Findlay <[email protected]> Reviewed-by: Jakub Sitnicki <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent b34ffb0 commit 78fa0d6

File tree

5 files changed

+5
-17
lines changed

5 files changed

+5
-17
lines changed

net/core/skmsg.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,8 +1183,6 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
11831183
int ret = __SK_DROP;
11841184
int len = skb->len;
11851185

1186-
skb_get(skb);
1187-
11881186
rcu_read_lock();
11891187
psock = sk_psock(sk);
11901188
if (unlikely(!psock)) {

net/ipv4/tcp.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
17731773
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
17741774
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
17751775
used = recv_actor(sk, skb);
1776-
consume_skb(skb);
17771776
if (used < 0) {
17781777
if (!copied)
17791778
copied = used;

net/ipv4/udp.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
18181818
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
18191819
{
18201820
struct sk_buff *skb;
1821-
int err, copied;
1821+
int err;
18221822

18231823
try_again:
18241824
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@@ -1837,10 +1837,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
18371837
}
18381838

18391839
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
1840-
copied = recv_actor(sk, skb);
1841-
kfree_skb(skb);
1842-
1843-
return copied;
1840+
return recv_actor(sk, skb);
18441841
}
18451842
EXPORT_SYMBOL(udp_read_skb);
18461843

net/unix/af_unix.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,18 +2553,15 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
25532553
{
25542554
struct unix_sock *u = unix_sk(sk);
25552555
struct sk_buff *skb;
2556-
int err, copied;
2556+
int err;
25572557

25582558
mutex_lock(&u->iolock);
25592559
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
25602560
mutex_unlock(&u->iolock);
25612561
if (!skb)
25622562
return err;
25632563

2564-
copied = recv_actor(sk, skb);
2565-
kfree_skb(skb);
2566-
2567-
return copied;
2564+
return recv_actor(sk, skb);
25682565
}
25692566

25702567
/*

net/vmw_vsock/virtio_transport_common.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
14411441
struct sock *sk = sk_vsock(vsk);
14421442
struct sk_buff *skb;
14431443
int off = 0;
1444-
int copied;
14451444
int err;
14461445

14471446
spin_lock_bh(&vvs->rx_lock);
@@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
14541453
if (!skb)
14551454
return err;
14561455

1457-
copied = recv_actor(sk, skb);
1458-
kfree_skb(skb);
1459-
return copied;
1456+
return recv_actor(sk, skb);
14601457
}
14611458
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
14621459

0 commit comments

Comments
 (0)