Skip to content

Commit de99ca5

Browse files
mrpreKernel Patches Daemon
authored andcommitted
bpf, sockmap: Fix incorrect copied_seq calculation
A socket using sockmap has its own independent receive queue: ingress_msg. This queue may contain data from its own protocol stack or from other sockets. The issue is that when reading from ingress_msg, we update tp->copied_seq by default. However, if the data is not from its own protocol stack, tcp->rcv_nxt is not increased. Later, if we convert this socket to a native socket, reading from this socket may fail because copied_seq might be significantly larger than rcv_nxt. This fix also addresses the syzkaller-reported bug referenced in the Closes tag. This patch marks the skmsg objects in ingress_msg. When reading, we update copied_seq only if the data is from its own protocol stack. FD1:read() -- FD1->copied_seq++ | [read data] | [enqueue data] v [sockmap] -> ingress to self -> ingress_msg queue FD1 native stack ------> ^ -- FD1->rcv_nxt++ -> redirect to other | [enqueue data] | | | ingress to FD1 v ^ ... | [sockmap] FD2 native stack Closes: https://syzkaller.appspot.com/bug?extid=06dbd397158ec0ea4983 Fixes: 04919be ("tcp: Introduce tcp_read_skb()") Signed-off-by: Jiayuan Chen <[email protected]>
1 parent 1559a3a commit de99ca5

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
141141
struct sk_msg *msg, u32 bytes);
142142
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
143143
int len, int flags);
144+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
145+
int len, int flags, int *from_self_copied);
144146
bool sk_msg_is_readable(struct sock *sk);
145147

146148
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)

net/core/skmsg.c

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,14 +409,14 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
409409
}
410410
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
411411

412-
/* Receive sk_msg from psock->ingress_msg to @msg. */
413-
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
414-
int len, int flags)
412+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
413+
int len, int flags, int *from_self_copied)
415414
{
416415
struct iov_iter *iter = &msg->msg_iter;
417416
int peek = flags & MSG_PEEK;
418417
struct sk_msg *msg_rx;
419418
int i, copied = 0;
419+
bool to_self;
420420

421421
msg_rx = sk_psock_peek_msg(psock);
422422
while (copied != len) {
@@ -425,6 +425,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
425425
if (unlikely(!msg_rx))
426426
break;
427427

428+
to_self = msg_rx->sk == sk;
428429
i = msg_rx->sg.start;
429430
do {
430431
struct page *page;
@@ -443,6 +444,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
443444
}
444445

445446
copied += copy;
447+
if (to_self && from_self_copied)
448+
*from_self_copied += copy;
449+
446450
if (likely(!peek)) {
447451
sge->offset += copy;
448452
sge->length -= copy;
@@ -487,6 +491,14 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
487491
out:
488492
return copied;
489493
}
494+
EXPORT_SYMBOL_GPL(__sk_msg_recvmsg);
495+
496+
/* Receive sk_msg from psock->ingress_msg to @msg. */
497+
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
498+
int len, int flags)
499+
{
500+
return __sk_msg_recvmsg(sk, psock, msg, len, flags, NULL);
501+
}
490502
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
491503

492504
bool sk_msg_is_readable(struct sock *sk)
@@ -616,6 +628,12 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
616628
if (unlikely(!msg))
617629
return -EAGAIN;
618630
skb_set_owner_r(skb, sk);
631+
632+
/* This is used in tcp_bpf_recvmsg_parser() to determine whether the
633+
* data originates from the socket's own protocol stack. No need to
634+
* refcount sk because msg's lifetime is bound to sk via the ingress_msg.
635+
*/
636+
msg->sk = sk;
619637
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
620638
if (err < 0)
621639
kfree(msg);
@@ -909,6 +927,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
909927
sk_msg_compute_data_pointers(msg);
910928
msg->sk = sk;
911929
ret = bpf_prog_run_pin_on_cpu(prog, msg);
930+
msg->sk = NULL;
912931
ret = sk_psock_map_verd(ret, msg->sk_redir);
913932
psock->apply_bytes = msg->apply_bytes;
914933
if (ret == __SK_REDIRECT) {

net/ipv4/tcp_bpf.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
226226
int peek = flags & MSG_PEEK;
227227
struct sk_psock *psock;
228228
struct tcp_sock *tcp;
229+
int from_self_copied = 0;
229230
int copied = 0;
230231
u32 seq;
231232

@@ -262,7 +263,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
262263
}
263264

264265
msg_bytes_ready:
265-
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
266+
copied = __sk_msg_recvmsg(sk, psock, msg, len, flags, &from_self_copied);
266267
/* The typical case for EFAULT is the socket was gracefully
267268
* shutdown with a FIN pkt. So check here the other case is
268269
* some error on copy_page_to_iter which would be unexpected.
@@ -277,7 +278,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
277278
goto out;
278279
}
279280
}
280-
seq += copied;
281+
seq += from_self_copied;
281282
if (!copied) {
282283
long timeo;
283284
int data;

0 commit comments

Comments
 (0)