Skip to content

Commit 5635f18

Browse files
committed
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Daniel Borkmann: - Fix BPF verifier to force a checkpoint when the program's jump history becomes too long (Eduard Zingerman) - Add several fixes to the BPF bits iterator addressing issues like memory leaks and overflow problems (Hou Tao) - Fix an out-of-bounds write in trie_get_next_key (Byeonguk Jeong) - Fix BPF test infra's LIVE_FRAME frame update after a page has been recycled (Toke Høiland-Jørgensen) - Fix BPF verifier and undo the 40-bytes extra stack space for bpf_fastcall patterns due to various bugs (Eduard Zingerman) - Fix a BPF sockmap race condition which could trigger a NULL pointer dereference in sock_map_link_update_prog (Cong Wang) - Fix tcp_bpf_recvmsg_parser to retrieve seq_copied from tcp_sk under the socket lock (Jiayuan Chen) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf, test_run: Fix LIVE_FRAME frame update after a page has been recycled selftests/bpf: Add three test cases for bits_iter bpf: Use __u64 to save the bits in bits iterator bpf: Check the validity of nr_words in bpf_iter_bits_new() bpf: Add bpf_mem_alloc_check_size() helper bpf: Free dynamically allocated bits in bpf_iter_bits_destroy() bpf: disallow 40-bytes extra stack for bpf_fastcall patterns selftests/bpf: Add test for trie_get_next_key() bpf: Fix out-of-bounds write in trie_get_next_key() selftests/bpf: Test with a very short loop bpf: Force checkpoint when jmp history is too long bpf: fix filed access without lock sock_map: fix a NULL pointer dereference in sock_map_link_update_prog()
2 parents 90602c2 + c40dd8c commit 5635f18

File tree

13 files changed

+269
-88
lines changed

13 files changed

+269
-88
lines changed

include/linux/bpf_mem_alloc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
3333
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
3434
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
3535

36+
/* Check the allocation size for kmalloc equivalent allocator */
37+
int bpf_mem_alloc_check_size(bool percpu, size_t size);
38+
3639
/* kmalloc/kfree equivalent: */
3740
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
3841
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);

kernel/bpf/helpers.c

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,21 +2851,47 @@ struct bpf_iter_bits {
28512851
__u64 __opaque[2];
28522852
} __aligned(8);
28532853

2854+
#define BITS_ITER_NR_WORDS_MAX 511
2855+
28542856
struct bpf_iter_bits_kern {
28552857
union {
2856-
unsigned long *bits;
2857-
unsigned long bits_copy;
2858+
__u64 *bits;
2859+
__u64 bits_copy;
28582860
};
2859-
u32 nr_bits;
2861+
int nr_bits;
28602862
int bit;
28612863
} __aligned(8);
28622864

2865+
/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
2866+
* a u64 pointer and an unsigned long pointer to find_next_bit() will
2867+
* return the same result, as both point to the same 8-byte area.
2868+
*
2869+
* For 32-bit little-endian hosts, using a u64 pointer or unsigned long
2870+
* pointer also makes no difference. This is because the first iterated
2871+
* unsigned long is composed of bits 0-31 of the u64 and the second unsigned
2872+
* long is composed of bits 32-63 of the u64.
2873+
*
2874+
* However, for 32-bit big-endian hosts, this is not the case. The first
2875+
* iterated unsigned long will be bits 32-63 of the u64, so swap these two
2876+
* ulong values within the u64.
2877+
*/
2878+
static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
2879+
{
2880+
#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
2881+
unsigned int i;
2882+
2883+
for (i = 0; i < nr; i++)
2884+
bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
2885+
#endif
2886+
}
2887+
28632888
/**
28642889
* bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
28652890
* @it: The new bpf_iter_bits to be created
28662891
* @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
28672892
* @nr_words: The size of the specified memory area, measured in 8-byte units.
2868-
* Due to the limitation of memalloc, it can't be greater than 512.
2893+
* The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
2894+
* further reduced by the BPF memory allocator implementation.
28692895
*
28702896
* This function initializes a new bpf_iter_bits structure for iterating over
28712897
* a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
@@ -2892,17 +2918,24 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
28922918

28932919
if (!unsafe_ptr__ign || !nr_words)
28942920
return -EINVAL;
2921+
if (nr_words > BITS_ITER_NR_WORDS_MAX)
2922+
return -E2BIG;
28952923

28962924
/* Optimization for u64 mask */
28972925
if (nr_bits == 64) {
28982926
err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
28992927
if (err)
29002928
return -EFAULT;
29012929

2930+
swap_ulong_in_u64(&kit->bits_copy, nr_words);
2931+
29022932
kit->nr_bits = nr_bits;
29032933
return 0;
29042934
}
29052935

2936+
if (bpf_mem_alloc_check_size(false, nr_bytes))
2937+
return -E2BIG;
2938+
29062939
/* Fallback to memalloc */
29072940
kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
29082941
if (!kit->bits)
@@ -2914,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
29142947
return err;
29152948
}
29162949

2950+
swap_ulong_in_u64(kit->bits, nr_words);
2951+
29172952
kit->nr_bits = nr_bits;
29182953
return 0;
29192954
}
@@ -2930,17 +2965,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
29302965
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
29312966
{
29322967
struct bpf_iter_bits_kern *kit = (void *)it;
2933-
u32 nr_bits = kit->nr_bits;
2934-
const unsigned long *bits;
2935-
int bit;
2968+
int bit = kit->bit, nr_bits = kit->nr_bits;
2969+
const void *bits;
29362970

2937-
if (nr_bits == 0)
2971+
if (!nr_bits || bit >= nr_bits)
29382972
return NULL;
29392973

29402974
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
2941-
bit = find_next_bit(bits, nr_bits, kit->bit + 1);
2975+
bit = find_next_bit(bits, nr_bits, bit + 1);
29422976
if (bit >= nr_bits) {
2943-
kit->nr_bits = 0;
2977+
kit->bit = bit;
29442978
return NULL;
29452979
}
29462980

kernel/bpf/lpm_trie.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
655655
if (!key || key->prefixlen > trie->max_prefixlen)
656656
goto find_leftmost;
657657

658-
node_stack = kmalloc_array(trie->max_prefixlen,
658+
node_stack = kmalloc_array(trie->max_prefixlen + 1,
659659
sizeof(struct lpm_trie_node *),
660660
GFP_ATOMIC | __GFP_NOWARN);
661661
if (!node_stack)

kernel/bpf/memalloc.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
*/
3636
#define LLIST_NODE_SZ sizeof(struct llist_node)
3737

38+
#define BPF_MEM_ALLOC_SIZE_MAX 4096
39+
3840
/* similar to kmalloc, but sizeof == 8 bucket is gone */
3941
static u8 size_index[24] __ro_after_init = {
4042
3, /* 8 */
@@ -65,7 +67,7 @@ static u8 size_index[24] __ro_after_init = {
6567

6668
static int bpf_mem_cache_idx(size_t size)
6769
{
68-
if (!size || size > 4096)
70+
if (!size || size > BPF_MEM_ALLOC_SIZE_MAX)
6971
return -1;
7072

7173
if (size <= 192)
@@ -1005,3 +1007,13 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
10051007

10061008
return !ret ? NULL : ret + LLIST_NODE_SZ;
10071009
}
1010+
1011+
int bpf_mem_alloc_check_size(bool percpu, size_t size)
1012+
{
1013+
/* The size of percpu allocation doesn't have LLIST_NODE_SZ overhead */
1014+
if ((percpu && size > BPF_MEM_ALLOC_SIZE_MAX) ||
1015+
(!percpu && size > BPF_MEM_ALLOC_SIZE_MAX - LLIST_NODE_SZ))
1016+
return -E2BIG;
1017+
1018+
return 0;
1019+
}

kernel/bpf/verifier.c

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6804,20 +6804,10 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
68046804
struct bpf_func_state *state,
68056805
enum bpf_access_type t)
68066806
{
6807-
struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
6808-
int min_valid_off, max_bpf_stack;
6809-
6810-
/* If accessing instruction is a spill/fill from bpf_fastcall pattern,
6811-
* add room for all caller saved registers below MAX_BPF_STACK.
6812-
* In case if bpf_fastcall rewrite won't happen maximal stack depth
6813-
* would be checked by check_max_stack_depth_subprog().
6814-
*/
6815-
max_bpf_stack = MAX_BPF_STACK;
6816-
if (aux->fastcall_pattern)
6817-
max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
6807+
int min_valid_off;
68186808

68196809
if (t == BPF_WRITE || env->allow_uninit_stack)
6820-
min_valid_off = -max_bpf_stack;
6810+
min_valid_off = -MAX_BPF_STACK;
68216811
else
68226812
min_valid_off = -state->allocated_stack;
68236813

@@ -17886,9 +17876,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1788617876
struct bpf_verifier_state_list *sl, **pprev;
1788717877
struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
1788817878
int i, j, n, err, states_cnt = 0;
17889-
bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
17890-
bool add_new_state = force_new_state;
17891-
bool force_exact;
17879+
bool force_new_state, add_new_state, force_exact;
17880+
17881+
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
17882+
/* Avoid accumulating infinitely long jmp history */
17883+
cur->jmp_history_cnt > 40;
1789217884

1789317885
/* bpf progs typically have pruning point every 4 instructions
1789417886
* http://vger.kernel.org/bpfconf2019.html#session-1
@@ -17898,6 +17890,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1789817890
* In tests that amounts to up to 50% reduction into total verifier
1789917891
* memory consumption and 20% verifier time speedup.
1790017892
*/
17893+
add_new_state = force_new_state;
1790117894
if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
1790217895
env->insn_processed - env->prev_insn_processed >= 8)
1790317896
add_new_state = true;

net/bpf/test_run.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ static void reset_ctx(struct xdp_page_head *head)
246246
head->ctx.data_meta = head->orig_ctx.data_meta;
247247
head->ctx.data_end = head->orig_ctx.data_end;
248248
xdp_update_frame_from_buff(&head->ctx, head->frame);
249+
head->frame->mem = head->orig_ctx.rxq->mem;
249250
}
250251

251252
static int xdp_recv_frames(struct xdp_frame **frames, int nframes,

net/core/sock_map.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,10 @@ static int sock_map_link_update_prog(struct bpf_link *link,
17601760
ret = -EINVAL;
17611761
goto out;
17621762
}
1763+
if (!sockmap_link->map) {
1764+
ret = -ENOLINK;
1765+
goto out;
1766+
}
17631767

17641768
ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
17651769
sockmap_link->attach_type);

net/ipv4/tcp_bpf.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
221221
int flags,
222222
int *addr_len)
223223
{
224-
struct tcp_sock *tcp = tcp_sk(sk);
225224
int peek = flags & MSG_PEEK;
226-
u32 seq = tcp->copied_seq;
227225
struct sk_psock *psock;
226+
struct tcp_sock *tcp;
228227
int copied = 0;
228+
u32 seq;
229229

230230
if (unlikely(flags & MSG_ERRQUEUE))
231231
return inet_recv_error(sk, msg, len, addr_len);
@@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
238238
return tcp_recvmsg(sk, msg, len, flags, addr_len);
239239

240240
lock_sock(sk);
241-
241+
tcp = tcp_sk(sk);
242+
seq = tcp->copied_seq;
242243
/* We may have received data on the sk_receive_queue pre-accept and
243244
* then we can not use read_skb in this context because we haven't
244245
* assigned a sk_socket yet so have no link to the ops. The work-around
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#define _GNU_SOURCE
4+
#include <linux/bpf.h>
5+
#include <stdio.h>
6+
#include <stdbool.h>
7+
#include <unistd.h>
8+
#include <errno.h>
9+
#include <stdlib.h>
10+
#include <string.h>
11+
#include <pthread.h>
12+
13+
#include <bpf/bpf.h>
14+
#include <bpf/libbpf.h>
15+
16+
#include <test_maps.h>
17+
18+
struct test_lpm_key {
19+
__u32 prefix;
20+
__u32 data;
21+
};
22+
23+
struct get_next_key_ctx {
24+
struct test_lpm_key key;
25+
bool start;
26+
bool stop;
27+
int map_fd;
28+
int loop;
29+
};
30+
31+
static void *get_next_key_fn(void *arg)
32+
{
33+
struct get_next_key_ctx *ctx = arg;
34+
struct test_lpm_key next_key;
35+
int i = 0;
36+
37+
while (!ctx->start)
38+
usleep(1);
39+
40+
while (!ctx->stop && i++ < ctx->loop)
41+
bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key);
42+
43+
return NULL;
44+
}
45+
46+
static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids,
47+
unsigned int nr)
48+
{
49+
unsigned int i;
50+
51+
ctx->stop = true;
52+
ctx->start = true;
53+
for (i = 0; i < nr; i++)
54+
pthread_join(tids[i], NULL);
55+
}
56+
57+
/* This test aims to prevent regression of future. As long as the kernel does
58+
* not panic, it is considered as success.
59+
*/
60+
void test_lpm_trie_map_get_next_key(void)
61+
{
62+
#define MAX_NR_THREADS 8
63+
LIBBPF_OPTS(bpf_map_create_opts, create_opts,
64+
.map_flags = BPF_F_NO_PREALLOC);
65+
struct test_lpm_key key = {};
66+
__u32 val = 0;
67+
int map_fd;
68+
const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix));
69+
const __u32 max_entries = max_prefixlen + 1;
70+
unsigned int i, nr = MAX_NR_THREADS, loop = 65536;
71+
pthread_t tids[MAX_NR_THREADS];
72+
struct get_next_key_ctx ctx;
73+
int err;
74+
75+
map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
76+
sizeof(struct test_lpm_key), sizeof(__u32),
77+
max_entries, &create_opts);
78+
CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
79+
strerror(errno));
80+
81+
for (i = 0; i <= max_prefixlen; i++) {
82+
key.prefix = i;
83+
err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
84+
CHECK(err, "bpf_map_update_elem()", "error:%s\n",
85+
strerror(errno));
86+
}
87+
88+
ctx.start = false;
89+
ctx.stop = false;
90+
ctx.map_fd = map_fd;
91+
ctx.loop = loop;
92+
memcpy(&ctx.key, &key, sizeof(key));
93+
94+
for (i = 0; i < nr; i++) {
95+
err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx);
96+
if (err) {
97+
abort_get_next_key(&ctx, tids, i);
98+
CHECK(err, "pthread_create", "error %d\n", err);
99+
}
100+
}
101+
102+
ctx.start = true;
103+
for (i = 0; i < nr; i++)
104+
pthread_join(tids[i], NULL);
105+
106+
printf("%s:PASS\n", __func__);
107+
108+
close(map_fd);
109+
}

0 commit comments

Comments
 (0)