Skip to content

Commit a819ff0

Browse files
author
Paolo Abeni
committed
Daniel Borkmann says: ==================== pull-request: bpf 2024-07-11 The following pull-request contains BPF updates for your *net* tree. We've added 4 non-merge commits during the last 2 day(s) which contain a total of 4 files changed, 262 insertions(+), 19 deletions(-). The main changes are: 1) Fixes for a BPF timer lockup and a use-after-free scenario when timers are used concurrently, from Kumar Kartikeya Dwivedi. 2) Fix the argument order in the call to bpf_map_kvcalloc() which could otherwise lead to a compilation error, from Mohammad Shehar Yaar Tausif. bpf-for-netdev * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: Add timer lockup selftest bpf: Defer work in bpf_timer_cancel_and_free bpf: Fail bpf_timer_cancel when callback is being cancelled bpf: fix order of args in call to bpf_map_kvcalloc ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents 626dfed + 50bd5a0 commit a819ff0

File tree

4 files changed

+262
-19
lines changed

4 files changed

+262
-19
lines changed

kernel/bpf/bpf_local_storage.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
782782
nbuckets = max_t(u32, 2, nbuckets);
783783
smap->bucket_log = ilog2(nbuckets);
784784

785-
smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
786-
nbuckets, GFP_USER | __GFP_NOWARN);
785+
smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
786+
sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
787787
if (!smap->buckets) {
788788
err = -ENOMEM;
789789
goto free_smap;

kernel/bpf/helpers.c

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,10 @@ struct bpf_async_cb {
10841084
struct bpf_prog *prog;
10851085
void __rcu *callback_fn;
10861086
void *value;
1087-
struct rcu_head rcu;
1087+
union {
1088+
struct rcu_head rcu;
1089+
struct work_struct delete_work;
1090+
};
10881091
u64 flags;
10891092
};
10901093

@@ -1107,6 +1110,7 @@ struct bpf_async_cb {
11071110
struct bpf_hrtimer {
11081111
struct bpf_async_cb cb;
11091112
struct hrtimer timer;
1113+
atomic_t cancelling;
11101114
};
11111115

11121116
struct bpf_work {
@@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
12191223
kfree_rcu(w, cb.rcu);
12201224
}
12211225

1226+
static void bpf_timer_delete_work(struct work_struct *work)
1227+
{
1228+
struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
1229+
1230+
/* Cancel the timer and wait for callback to complete if it was running.
1231+
* If hrtimer_cancel() can be safely called it's safe to call
1232+
* kfree_rcu(t) right after for both preallocated and non-preallocated
1233+
* maps. The async->cb = NULL was already done and no code path can see
1234+
* address 't' anymore. Timer if armed for existing bpf_hrtimer before
1235+
* bpf_timer_cancel_and_free will have been cancelled.
1236+
*/
1237+
hrtimer_cancel(&t->timer);
1238+
kfree_rcu(t, cb.rcu);
1239+
}
1240+
12221241
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
12231242
enum bpf_async_type type)
12241243
{
@@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
12621281
clockid = flags & (MAX_CLOCKS - 1);
12631282
t = (struct bpf_hrtimer *)cb;
12641283

1284+
atomic_set(&t->cancelling, 0);
1285+
INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
12651286
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
12661287
t->timer.function = bpf_timer_cb;
12671288
cb->value = (void *)async - map->record->timer_off;
@@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
14401461

14411462
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
14421463
{
1443-
struct bpf_hrtimer *t;
1464+
struct bpf_hrtimer *t, *cur_t;
1465+
bool inc = false;
14441466
int ret = 0;
14451467

14461468
if (in_nmi())
@@ -1452,21 +1474,50 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
14521474
ret = -EINVAL;
14531475
goto out;
14541476
}
1455-
if (this_cpu_read(hrtimer_running) == t) {
1477+
1478+
cur_t = this_cpu_read(hrtimer_running);
1479+
if (cur_t == t) {
14561480
/* If bpf callback_fn is trying to bpf_timer_cancel()
14571481
* its own timer the hrtimer_cancel() will deadlock
1458-
* since it waits for callback_fn to finish
1482+
* since it waits for callback_fn to finish.
14591483
*/
14601484
ret = -EDEADLK;
14611485
goto out;
14621486
}
1487+
1488+
/* Only account in-flight cancellations when invoked from a timer
1489+
* callback, since we want to avoid waiting only if other _callbacks_
1490+
* are waiting on us, to avoid introducing lockups. Non-callback paths
1491+
* are ok, since nobody would synchronously wait for their completion.
1492+
*/
1493+
if (!cur_t)
1494+
goto drop;
1495+
atomic_inc(&t->cancelling);
1496+
/* Need full barrier after relaxed atomic_inc */
1497+
smp_mb__after_atomic();
1498+
inc = true;
1499+
if (atomic_read(&cur_t->cancelling)) {
1500+
/* We're cancelling timer t, while some other timer callback is
1501+
* attempting to cancel us. In such a case, it might be possible
1502+
* that timer t belongs to the other callback, or some other
1503+
* callback waiting upon it (creating transitive dependencies
1504+
* upon us), and we will enter a deadlock if we continue
1505+
* cancelling and waiting for it synchronously, since it might
1506+
* do the same. Bail!
1507+
*/
1508+
ret = -EDEADLK;
1509+
goto out;
1510+
}
1511+
drop:
14631512
drop_prog_refcnt(&t->cb);
14641513
out:
14651514
__bpf_spin_unlock_irqrestore(&timer->lock);
14661515
/* Cancel the timer and wait for associated callback to finish
14671516
* if it was running.
14681517
*/
14691518
ret = ret ?: hrtimer_cancel(&t->timer);
1519+
if (inc)
1520+
atomic_dec(&t->cancelling);
14701521
rcu_read_unlock();
14711522
return ret;
14721523
}
@@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
15121563

15131564
if (!t)
15141565
return;
1515-
/* Cancel the timer and wait for callback to complete if it was running.
1516-
* If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1517-
* right after for both preallocated and non-preallocated maps.
1518-
* The async->cb = NULL was already done and no code path can
1519-
* see address 't' anymore.
1520-
*
1521-
* Check that bpf_map_delete/update_elem() wasn't called from timer
1522-
* callback_fn. In such case don't call hrtimer_cancel() (since it will
1523-
* deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1524-
* return -1). Though callback_fn is still running on this cpu it's
1566+
/* We check that bpf_map_delete/update_elem() was called from timer
1567+
* callback_fn. In such case we don't call hrtimer_cancel() (since it
1568+
* will deadlock) and don't call hrtimer_try_to_cancel() (since it will
1569+
* just return -1). Though callback_fn is still running on this cpu it's
15251570
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
15261571
* from 't'. The bpf subprog callback_fn won't be able to access 't',
15271572
* since async->cb = NULL was already done. The timer will be
15281573
* effectively cancelled because bpf_timer_cb() will return
15291574
* HRTIMER_NORESTART.
1575+
*
1576+
* However, it is possible the timer callback_fn calling us armed the
1577+
* timer _before_ calling us, such that failing to cancel it here will
1578+
* cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
1579+
* Therefore, we _need_ to cancel any outstanding timers before we do
1580+
* kfree_rcu, even though no more timers can be armed.
1581+
*
1582+
* Moreover, we need to schedule work even if timer does not belong to
1583+
* the calling callback_fn, as on two different CPUs, we can end up in a
1584+
* situation where both sides run in parallel, try to cancel one
1585+
* another, and we end up waiting on both sides in hrtimer_cancel
1586+
* without making forward progress, since timer1 depends on time2
1587+
* callback to finish, and vice versa.
1588+
*
1589+
* CPU 1 (timer1_cb) CPU 2 (timer2_cb)
1590+
* bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
1591+
*
1592+
* To avoid these issues, punt to workqueue context when we are in a
1593+
* timer callback.
15301594
*/
1531-
if (this_cpu_read(hrtimer_running) != t)
1532-
hrtimer_cancel(&t->timer);
1533-
kfree_rcu(t, cb.rcu);
1595+
if (this_cpu_read(hrtimer_running))
1596+
queue_work(system_unbound_wq, &t->cb.delete_work);
1597+
else
1598+
bpf_timer_delete_work(&t->cb.delete_work);
15341599
}
15351600

15361601
/* This function is called by map_delete/update_elem for individual element and
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#define _GNU_SOURCE
4+
#include <sched.h>
5+
#include <test_progs.h>
6+
#include <pthread.h>
7+
#include <network_helpers.h>
8+
9+
#include "timer_lockup.skel.h"
10+
11+
static long cpu;
12+
static int *timer1_err;
13+
static int *timer2_err;
14+
static bool skip;
15+
16+
volatile int k = 0;
17+
18+
static void *timer_lockup_thread(void *arg)
19+
{
20+
LIBBPF_OPTS(bpf_test_run_opts, opts,
21+
.data_in = &pkt_v4,
22+
.data_size_in = sizeof(pkt_v4),
23+
.repeat = 1000,
24+
);
25+
int i, prog_fd = *(int *)arg;
26+
cpu_set_t cpuset;
27+
28+
CPU_ZERO(&cpuset);
29+
CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
30+
ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
31+
&cpuset),
32+
"cpu affinity");
33+
34+
for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
35+
bpf_prog_test_run_opts(prog_fd, &opts);
36+
/* Skip the test if we can't reproduce the race in a reasonable
37+
* amount of time.
38+
*/
39+
if (i > 50) {
40+
WRITE_ONCE(skip, true);
41+
break;
42+
}
43+
}
44+
45+
return NULL;
46+
}
47+
48+
void test_timer_lockup(void)
49+
{
50+
int timer1_prog, timer2_prog;
51+
struct timer_lockup *skel;
52+
pthread_t thrds[2];
53+
void *ret;
54+
55+
skel = timer_lockup__open_and_load();
56+
if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
57+
return;
58+
59+
timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
60+
timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
61+
62+
timer1_err = &skel->bss->timer1_err;
63+
timer2_err = &skel->bss->timer2_err;
64+
65+
if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
66+
&timer1_prog),
67+
"pthread_create thread1"))
68+
goto out;
69+
if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
70+
&timer2_prog),
71+
"pthread_create thread2")) {
72+
pthread_exit(&thrds[0]);
73+
goto out;
74+
}
75+
76+
pthread_join(thrds[1], &ret);
77+
pthread_join(thrds[0], &ret);
78+
79+
if (skip) {
80+
test__skip();
81+
goto out;
82+
}
83+
84+
if (*timer1_err != -EDEADLK && *timer1_err != 0)
85+
ASSERT_FAIL("timer1_err bad value");
86+
if (*timer2_err != -EDEADLK && *timer2_err != 0)
87+
ASSERT_FAIL("timer2_err bad value");
88+
out:
89+
timer_lockup__destroy(skel);
90+
return;
91+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/bpf.h>
4+
#include <time.h>
5+
#include <errno.h>
6+
#include <bpf/bpf_helpers.h>
7+
#include <bpf/bpf_tracing.h>
8+
#include "bpf_misc.h"
9+
10+
char _license[] SEC("license") = "GPL";
11+
12+
struct elem {
13+
struct bpf_timer t;
14+
};
15+
16+
struct {
17+
__uint(type, BPF_MAP_TYPE_ARRAY);
18+
__uint(max_entries, 1);
19+
__type(key, int);
20+
__type(value, struct elem);
21+
} timer1_map SEC(".maps");
22+
23+
struct {
24+
__uint(type, BPF_MAP_TYPE_ARRAY);
25+
__uint(max_entries, 1);
26+
__type(key, int);
27+
__type(value, struct elem);
28+
} timer2_map SEC(".maps");
29+
30+
int timer1_err;
31+
int timer2_err;
32+
33+
static int timer_cb1(void *map, int *k, struct elem *v)
34+
{
35+
struct bpf_timer *timer;
36+
int key = 0;
37+
38+
timer = bpf_map_lookup_elem(&timer2_map, &key);
39+
if (timer)
40+
timer2_err = bpf_timer_cancel(timer);
41+
42+
return 0;
43+
}
44+
45+
static int timer_cb2(void *map, int *k, struct elem *v)
46+
{
47+
struct bpf_timer *timer;
48+
int key = 0;
49+
50+
timer = bpf_map_lookup_elem(&timer1_map, &key);
51+
if (timer)
52+
timer1_err = bpf_timer_cancel(timer);
53+
54+
return 0;
55+
}
56+
57+
SEC("tc")
58+
int timer1_prog(void *ctx)
59+
{
60+
struct bpf_timer *timer;
61+
int key = 0;
62+
63+
timer = bpf_map_lookup_elem(&timer1_map, &key);
64+
if (timer) {
65+
bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
66+
bpf_timer_set_callback(timer, timer_cb1);
67+
bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
68+
}
69+
70+
return 0;
71+
}
72+
73+
SEC("tc")
74+
int timer2_prog(void *ctx)
75+
{
76+
struct bpf_timer *timer;
77+
int key = 0;
78+
79+
timer = bpf_map_lookup_elem(&timer2_map, &key);
80+
if (timer) {
81+
bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
82+
bpf_timer_set_callback(timer, timer_cb2);
83+
bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
84+
}
85+
86+
return 0;
87+
}

0 commit comments

Comments
 (0)