Skip to content

Commit 1cfa2f1

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2024-04-04 We've added 7 non-merge commits during the last 5 day(s) which contain a total of 9 files changed, 75 insertions(+), 24 deletions(-). The main changes are: 1) Fix x86 BPF JIT under retbleed=stuff which causes kernel panics due to incorrect destination IP calculation and incorrect IP for relocations, from Uros Bizjak and Joan Bruguera Micó. 2) Fix BPF arena file descriptor leaks in the verifier, from Anton Protopopov. 3) Defer bpf_link deallocation to after RCU grace period as currently running multi-{kprobes,uprobes} programs might still access cookie information from the link, from Andrii Nakryiko. 4) Fix a BPF sockmap lock inversion deadlock in map_delete_elem reported by syzkaller, from Jakub Sitnicki. 5) Fix resolve_btfids build with musl libc due to missing linux/types.h include, from Natanael Copa. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf, sockmap: Prevent lock inversion deadlock in map delete elem x86/bpf: Fix IP for relocating call depth accounting x86/bpf: Fix IP after emitting call depth accounting bpf: fix possible file descriptor leaks in verifier tools/resolve_btfids: fix build with musl libc bpf: support deferring bpf_link dealloc to after RCU grace period bpf: put uprobe link's path and task in release callback ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 0c83842 + ff91059 commit 1cfa2f1

File tree

9 files changed

+75
-24
lines changed

9 files changed

+75
-24
lines changed

arch/x86/include/asm/alternative.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
117117
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
118118
struct module *mod);
119119
extern void *callthunks_translate_call_dest(void *dest);
120-
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
120+
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
121121
#else
122122
static __always_inline void callthunks_patch_builtin_calls(void) {}
123123
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
128128
return dest;
129129
}
130130
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
131-
void *func)
131+
void *func, void *ip)
132132
{
133133
return 0;
134134
}

arch/x86/kernel/callthunks.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
314314
return !bcmp(pad, insn_buff, tmpl_size);
315315
}
316316

317-
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
317+
int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
318318
{
319319
unsigned int tmpl_size = SKL_TMPL_SIZE;
320320
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
327327
return 0;
328328

329329
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
330-
apply_relocation(insn_buff, tmpl_size, *pprog,
330+
apply_relocation(insn_buff, tmpl_size, ip,
331331
skl_call_thunk_template, tmpl_size);
332332

333333
memcpy(*pprog, insn_buff, tmpl_size);

arch/x86/net/bpf_jit_comp.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
480480
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
481481
{
482482
OPTIMIZER_HIDE_VAR(func);
483-
x86_call_depth_emit_accounting(pprog, func);
483+
ip += x86_call_depth_emit_accounting(pprog, func, ip);
484484
return emit_patch(pprog, func, ip, 0xE8);
485485
}
486486

@@ -1972,20 +1972,17 @@ st: if (is_imm8(insn->off))
19721972

19731973
/* call */
19741974
case BPF_JMP | BPF_CALL: {
1975-
int offs;
1975+
u8 *ip = image + addrs[i - 1];
19761976

19771977
func = (u8 *) __bpf_call_base + imm32;
19781978
if (tail_call_reachable) {
19791979
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
1980-
if (!imm32)
1981-
return -EINVAL;
1982-
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
1983-
} else {
1984-
if (!imm32)
1985-
return -EINVAL;
1986-
offs = x86_call_depth_emit_accounting(&prog, func);
1980+
ip += 7;
19871981
}
1988-
if (emit_call(&prog, func, image + addrs[i - 1] + offs))
1982+
if (!imm32)
1983+
return -EINVAL;
1984+
ip += x86_call_depth_emit_accounting(&prog, func, ip);
1985+
if (emit_call(&prog, func, ip))
19891986
return -EINVAL;
19901987
break;
19911988
}
@@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
28352832
* Direct-call fentry stub, as such it needs accounting for the
28362833
* __fentry__ call.
28372834
*/
2838-
x86_call_depth_emit_accounting(&prog, NULL);
2835+
x86_call_depth_emit_accounting(&prog, NULL, image);
28392836
}
28402837
EMIT1(0x55); /* push rbp */
28412838
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */

include/linux/bpf.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1574,12 +1574,26 @@ struct bpf_link {
15741574
enum bpf_link_type type;
15751575
const struct bpf_link_ops *ops;
15761576
struct bpf_prog *prog;
1577-
struct work_struct work;
1577+
/* rcu is used before freeing, work can be used to schedule that
1578+
* RCU-based freeing before that, so they never overlap
1579+
*/
1580+
union {
1581+
struct rcu_head rcu;
1582+
struct work_struct work;
1583+
};
15781584
};
15791585

15801586
struct bpf_link_ops {
15811587
void (*release)(struct bpf_link *link);
1588+
/* deallocate link resources callback, called without RCU grace period
1589+
* waiting
1590+
*/
15821591
void (*dealloc)(struct bpf_link *link);
1592+
/* deallocate link resources callback, called after RCU grace period;
1593+
* if underlying BPF program is sleepable we go through tasks trace
1594+
* RCU GP and then "classic" RCU GP
1595+
*/
1596+
void (*dealloc_deferred)(struct bpf_link *link);
15831597
int (*detach)(struct bpf_link *link);
15841598
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
15851599
struct bpf_prog *old_prog);

kernel/bpf/syscall.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
30243024
atomic64_inc(&link->refcnt);
30253025
}
30263026

3027+
static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
3028+
{
3029+
struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
3030+
3031+
/* free bpf_link and its containing memory */
3032+
link->ops->dealloc_deferred(link);
3033+
}
3034+
3035+
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
3036+
{
3037+
if (rcu_trace_implies_rcu_gp())
3038+
bpf_link_defer_dealloc_rcu_gp(rcu);
3039+
else
3040+
call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
3041+
}
3042+
30273043
/* bpf_link_free is guaranteed to be called from process context */
30283044
static void bpf_link_free(struct bpf_link *link)
30293045
{
3046+
bool sleepable = false;
3047+
30303048
bpf_link_free_id(link->id);
30313049
if (link->prog) {
3050+
sleepable = link->prog->sleepable;
30323051
/* detach BPF program, clean up used resources */
30333052
link->ops->release(link);
30343053
bpf_prog_put(link->prog);
30353054
}
3036-
/* free bpf_link and its containing memory */
3037-
link->ops->dealloc(link);
3055+
if (link->ops->dealloc_deferred) {
3056+
/* schedule BPF link deallocation; if underlying BPF program
3057+
* is sleepable, we need to first wait for RCU tasks trace
3058+
* sync, then go through "classic" RCU grace period
3059+
*/
3060+
if (sleepable)
3061+
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
3062+
else
3063+
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
3064+
}
3065+
if (link->ops->dealloc)
3066+
link->ops->dealloc(link);
30383067
}
30393068

30403069
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
35443573

35453574
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
35463575
.release = bpf_raw_tp_link_release,
3547-
.dealloc = bpf_raw_tp_link_dealloc,
3576+
.dealloc_deferred = bpf_raw_tp_link_dealloc,
35483577
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
35493578
.fill_link_info = bpf_raw_tp_link_fill_link_info,
35503579
};

kernel/bpf/verifier.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18379,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
1837918379
}
1838018380
if (!env->prog->jit_requested) {
1838118381
verbose(env, "JIT is required to use arena\n");
18382+
fdput(f);
1838218383
return -EOPNOTSUPP;
1838318384
}
1838418385
if (!bpf_jit_supports_arena()) {
1838518386
verbose(env, "JIT doesn't support arena\n");
18387+
fdput(f);
1838618388
return -EOPNOTSUPP;
1838718389
}
1838818390
env->prog->aux->arena = (void *)map;
1838918391
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
1839018392
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
18393+
fdput(f);
1839118394
return -EINVAL;
1839218395
}
1839318396
}

kernel/trace/bpf_trace.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
27282728

27292729
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
27302730
.release = bpf_kprobe_multi_link_release,
2731-
.dealloc = bpf_kprobe_multi_link_dealloc,
2731+
.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
27322732
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
27332733
};
27342734

@@ -3157,16 +3157,16 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
31573157

31583158
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
31593159
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
3160+
if (umulti_link->task)
3161+
put_task_struct(umulti_link->task);
3162+
path_put(&umulti_link->path);
31603163
}
31613164

31623165
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
31633166
{
31643167
struct bpf_uprobe_multi_link *umulti_link;
31653168

31663169
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3167-
if (umulti_link->task)
3168-
put_task_struct(umulti_link->task);
3169-
path_put(&umulti_link->path);
31703170
kvfree(umulti_link->uprobes);
31713171
kfree(umulti_link);
31723172
}
@@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
32423242

32433243
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
32443244
.release = bpf_uprobe_multi_link_release,
3245-
.dealloc = bpf_uprobe_multi_link_dealloc,
3245+
.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
32463246
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
32473247
};
32483248

net/core/sock_map.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
411411
struct sock *sk;
412412
int err = 0;
413413

414+
if (irqs_disabled())
415+
return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
416+
414417
spin_lock_bh(&stab->lock);
415418
sk = *psk;
416419
if (!sk_test || sk_test == sk)
@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
933936
struct bpf_shtab_elem *elem;
934937
int ret = -ENOENT;
935938

939+
if (irqs_disabled())
940+
return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
941+
936942
hash = sock_hash_bucket_hash(key, key_size);
937943
bucket = sock_hash_select_bucket(htab, hash);
938944

tools/include/linux/btf_ids.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#ifndef _LINUX_BTF_IDS_H
44
#define _LINUX_BTF_IDS_H
55

6+
#include <linux/types.h> /* for u32 */
7+
68
struct btf_id_set {
79
u32 cnt;
810
u32 ids[];

0 commit comments

Comments
 (0)