Skip to content

Commit 1eb3dee

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2023-10-02 We've added 11 non-merge commits during the last 12 day(s) which contain a total of 12 files changed, 176 insertions(+), 41 deletions(-). The main changes are: 1) Fix BPF verifier to reset backtrack_state masks on global function exit as otherwise subsequent precision tracking would reuse them, from Andrii Nakryiko. 2) Several sockmap fixes for available bytes accounting, from John Fastabend. 3) Reject sk_msg egress redirects to non-TCP sockets given this is only supported for TCP sockets today, from Jakub Sitnicki. 4) Fix a syzkaller splat in bpf_mprog when hitting maximum program limits with BPF_F_BEFORE directive, from Daniel Borkmann and Nikolay Aleksandrov. 5) Fix BPF memory allocator to use kmalloc_size_roundup() to adjust size_index for selecting a bpf_mem_cache, from Hou Tao. 6) Fix arch_prepare_bpf_trampoline return code for s390 JIT, from Song Liu. 7) Fix bpf_trampoline_get when CONFIG_BPF_JIT is turned off, from Leon Hwang. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Use kmalloc_size_roundup() to adjust size_index selftest/bpf: Add various selftests for program limits bpf, mprog: Fix maximum program check on mprog attachment bpf, sockmap: Reject sk_msg egress redirects to non-TCP sockets bpf, sockmap: Add tests for MSG_F_PEEK bpf, sockmap: Do not inc copied_seq when PEEK flag set bpf: tcp_read_skb needs to pop skb regardless of seq bpf: unconditionally reset backtrack_state masks on global func exit bpf: Fix tr dereferencing selftests/bpf: Check bpf_cubic_acked() is called via struct_ops s390/bpf: Let arch_prepare_bpf_trampoline return program size ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 51e7a66 + 9077fc2 commit 1eb3dee

File tree

12 files changed

+176
-41
lines changed

12 files changed

+176
-41
lines changed

arch/s390/net/bpf_jit_comp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2513,7 +2513,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
25132513
return -E2BIG;
25142514
}
25152515

2516-
return ret;
2516+
return tjit.common.prg;
25172517
}
25182518

25192519
bool bpf_jit_supports_subprog_tailcalls(void)

include/linux/bpf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
13071307
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
13081308
struct bpf_attach_target_info *tgt_info)
13091309
{
1310-
return ERR_PTR(-EOPNOTSUPP);
1310+
return NULL;
13111311
}
13121312
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
13131313
#define DEFINE_BPF_DISPATCHER(name)

kernel/bpf/memalloc.c

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -965,37 +965,31 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
965965
return !ret ? NULL : ret + LLIST_NODE_SZ;
966966
}
967967

968-
/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
969968
static __init int bpf_mem_cache_adjust_size(void)
970969
{
971-
unsigned int size, index;
970+
unsigned int size;
972971

973-
/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
974-
* up-to 256-bytes.
972+
/* Adjusting the indexes in size_index() according to the object_size
973+
* of underlying slab cache, so bpf_mem_alloc() will select a
974+
* bpf_mem_cache with unit_size equal to the object_size of
975+
* the underlying slab cache.
976+
*
977+
* The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is
978+
* 256-bytes, so only do adjustment for [8-bytes, 192-bytes].
975979
*/
976-
size = KMALLOC_MIN_SIZE;
977-
if (size <= 192)
978-
index = size_index[(size - 1) / 8];
979-
else
980-
index = fls(size - 1) - 1;
981-
for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
982-
size_index[(size - 1) / 8] = index;
980+
for (size = 192; size >= 8; size -= 8) {
981+
unsigned int kmalloc_size, index;
983982

984-
/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
985-
* use 128-bytes cache instead.
986-
*/
987-
if (KMALLOC_MIN_SIZE >= 64) {
988-
index = size_index[(128 - 1) / 8];
989-
for (size = 64 + 8; size <= 96; size += 8)
990-
size_index[(size - 1) / 8] = index;
991-
}
983+
kmalloc_size = kmalloc_size_roundup(size);
984+
if (kmalloc_size == size)
985+
continue;
992986

993-
/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
994-
* use 256-bytes cache instead.
995-
*/
996-
if (KMALLOC_MIN_SIZE >= 128) {
997-
index = fls(256 - 1) - 1;
998-
for (size = 128 + 8; size <= 192; size += 8)
987+
if (kmalloc_size <= 192)
988+
index = size_index[(kmalloc_size - 1) / 8];
989+
else
990+
index = fls(kmalloc_size - 1) - 1;
991+
/* Only overwrite if necessary */
992+
if (size_index[(size - 1) / 8] != index)
999993
size_index[(size - 1) / 8] = index;
1000994
}
1001995

kernel/bpf/mprog.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ int bpf_mprog_attach(struct bpf_mprog_entry *entry,
253253
goto out;
254254
}
255255
idx = tidx;
256+
} else if (bpf_mprog_total(entry) == bpf_mprog_max()) {
257+
ret = -ERANGE;
258+
goto out;
256259
}
257260
if (flags & BPF_F_BEFORE) {
258261
tidx = bpf_mprog_pos_before(entry, &rtuple);

kernel/bpf/verifier.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4047,11 +4047,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
40474047
bitmap_from_u64(mask, bt_reg_mask(bt));
40484048
for_each_set_bit(i, mask, 32) {
40494049
reg = &st->frame[0]->regs[i];
4050-
if (reg->type != SCALAR_VALUE) {
4051-
bt_clear_reg(bt, i);
4052-
continue;
4053-
}
4054-
reg->precise = true;
4050+
bt_clear_reg(bt, i);
4051+
if (reg->type == SCALAR_VALUE)
4052+
reg->precise = true;
40554053
}
40564054
return 0;
40574055
}

net/core/sock_map.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
668668
sk = __sock_map_lookup_elem(map, key);
669669
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
670670
return SK_DROP;
671+
if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
672+
return SK_DROP;
671673

672674
msg->flags = flags;
673675
msg->sk_redir = sk;
@@ -1267,6 +1269,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
12671269
sk = __sock_hash_lookup_elem(map, key);
12681270
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
12691271
return SK_DROP;
1272+
if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
1273+
return SK_DROP;
12701274

12711275
msg->flags = flags;
12721276
msg->sk_redir = sk;

net/ipv4/tcp.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,16 +1621,13 @@ EXPORT_SYMBOL(tcp_read_sock);
16211621

16221622
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
16231623
{
1624-
struct tcp_sock *tp = tcp_sk(sk);
1625-
u32 seq = tp->copied_seq;
16261624
struct sk_buff *skb;
16271625
int copied = 0;
1628-
u32 offset;
16291626

16301627
if (sk->sk_state == TCP_LISTEN)
16311628
return -ENOTCONN;
16321629

1633-
while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1630+
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
16341631
u8 tcp_flags;
16351632
int used;
16361633

@@ -1643,13 +1640,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
16431640
copied = used;
16441641
break;
16451642
}
1646-
seq += used;
16471643
copied += used;
16481644

1649-
if (tcp_flags & TCPHDR_FIN) {
1650-
++seq;
1645+
if (tcp_flags & TCPHDR_FIN)
16511646
break;
1652-
}
16531647
}
16541648
return copied;
16551649
}

net/ipv4/tcp_bpf.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
222222
int *addr_len)
223223
{
224224
struct tcp_sock *tcp = tcp_sk(sk);
225+
int peek = flags & MSG_PEEK;
225226
u32 seq = tcp->copied_seq;
226227
struct sk_psock *psock;
227228
int copied = 0;
@@ -311,7 +312,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
311312
copied = -EAGAIN;
312313
}
313314
out:
314-
WRITE_ONCE(tcp->copied_seq, seq);
315+
if (!peek)
316+
WRITE_ONCE(tcp->copied_seq, seq);
315317
tcp_rcv_space_adjust(sk);
316318
if (copied > 0)
317319
__tcp_cleanup_rbuf(sk, copied);

tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ static void test_cubic(void)
185185

186186
do_test("bpf_cubic", NULL);
187187

188+
ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
189+
188190
bpf_link__destroy(link);
189191
bpf_cubic__destroy(cubic_skel);
190192
}

tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,55 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
475475
test_sockmap_drop_prog__destroy(drop);
476476
}
477477

478+
static void test_sockmap_skb_verdict_peek(void)
479+
{
480+
int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail;
481+
struct test_sockmap_pass_prog *pass;
482+
char snd[256] = "0123456789";
483+
char rcv[256] = "0";
484+
485+
pass = test_sockmap_pass_prog__open_and_load();
486+
if (!ASSERT_OK_PTR(pass, "open_and_load"))
487+
return;
488+
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
489+
map = bpf_map__fd(pass->maps.sock_map_rx);
490+
491+
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
492+
if (!ASSERT_OK(err, "bpf_prog_attach"))
493+
goto out;
494+
495+
s = socket_loopback(AF_INET, SOCK_STREAM);
496+
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
497+
goto out;
498+
499+
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
500+
if (!ASSERT_OK(err, "create_pairs(s)"))
501+
goto out;
502+
503+
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
504+
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
505+
goto out_close;
506+
507+
sent = xsend(p1, snd, sizeof(snd), 0);
508+
ASSERT_EQ(sent, sizeof(snd), "xsend(p1)");
509+
recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK);
510+
ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)");
511+
err = ioctl(c1, FIONREAD, &avail);
512+
ASSERT_OK(err, "ioctl(FIONREAD) error");
513+
ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)");
514+
recvd = recv(c1, rcv, sizeof(rcv), 0);
515+
ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)");
516+
err = ioctl(c1, FIONREAD, &avail);
517+
ASSERT_OK(err, "ioctl(FIONREAD) error");
518+
ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)");
519+
520+
out_close:
521+
close(c1);
522+
close(p1);
523+
out:
524+
test_sockmap_pass_prog__destroy(pass);
525+
}
526+
478527
void test_sockmap_basic(void)
479528
{
480529
if (test__start_subtest("sockmap create_update_free"))
@@ -515,4 +564,6 @@ void test_sockmap_basic(void)
515564
test_sockmap_skb_verdict_fionread(true);
516565
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
517566
test_sockmap_skb_verdict_fionread(false);
567+
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
568+
test_sockmap_skb_verdict_peek();
518569
}

0 commit comments

Comments
 (0)