diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index e7a2fc60523f6..38fddcb1e28c7 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -307,9 +307,10 @@ static void bpf_lru_list_push_free(struct bpf_lru_list *l, if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; - raw_spin_lock_irqsave(&l->lock, flags); + if (raw_res_spin_lock_irqsave(&l->lock, flags)) + return; __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); - raw_spin_unlock_irqrestore(&l->lock, flags); + raw_res_spin_unlock_irqrestore(&l->lock, flags); } static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, @@ -319,7 +320,8 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, struct bpf_lru_node *node, *tmp_node; unsigned int nfree = 0; - raw_spin_lock(&l->lock); + if (raw_res_spin_lock(&l->lock)) + return; __local_list_flush(l, loc_l); @@ -338,7 +340,7 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - raw_spin_unlock(&l->lock); + raw_res_spin_unlock(&l->lock); } static void __local_list_add_pending(struct bpf_lru *lru, @@ -404,7 +406,8 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, l = per_cpu_ptr(lru->percpu_lru, cpu); - raw_spin_lock_irqsave(&l->lock, flags); + if (raw_res_spin_lock_irqsave(&l->lock, flags)) + return NULL; __bpf_lru_list_rotate(lru, l); @@ -420,7 +423,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } - raw_spin_unlock_irqrestore(&l->lock, flags); + raw_res_spin_unlock_irqrestore(&l->lock, flags); return node; } @@ -437,7 +440,8 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, loc_l = per_cpu_ptr(clru->local_list, cpu); - raw_spin_lock_irqsave(&loc_l->lock, flags); + if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) + return NULL; node = __local_list_pop_free(loc_l); if (!node) { @@ -448,7 +452,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, if (node) __local_list_add_pending(lru, loc_l, cpu, node, hash); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + raw_res_spin_unlock_irqrestore(&loc_l->lock, flags); if (node) return node; @@ -466,23 +470,26 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, do { steal_loc_l = per_cpu_ptr(clru->local_list, steal); - raw_spin_lock_irqsave(&steal_loc_l->lock, flags); + if (raw_res_spin_lock_irqsave(&steal_loc_l->lock, flags)) + goto out_next; node = __local_list_pop_free(steal_loc_l); if (!node) node = __local_list_pop_pending(lru, steal_loc_l); - raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); + raw_res_spin_unlock_irqrestore(&steal_loc_l->lock, flags); +out_next: steal = cpumask_next_wrap(steal, cpu_possible_mask); } while (!node && steal != first_steal); loc_l->next_steal = steal; if (node) { - raw_spin_lock_irqsave(&loc_l->lock, flags); + if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) + return NULL; __local_list_add_pending(lru, loc_l, cpu, node, hash); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + raw_res_spin_unlock_irqrestore(&loc_l->lock, flags); } return node; @@ -511,10 +518,11 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); - raw_spin_lock_irqsave(&loc_l->lock, flags); + if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) + return; if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + raw_res_spin_unlock_irqrestore(&loc_l->lock, flags); goto check_lru_list; } @@ -522,7 +530,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, bpf_lru_node_clear_ref(node); list_move(&node->list, local_free_list(loc_l)); - raw_spin_unlock_irqrestore(&loc_l->lock, flags); + raw_res_spin_unlock_irqrestore(&loc_l->lock, flags); return; } @@ -538,11 +546,12 @@ static void bpf_percpu_lru_push_free(struct bpf_lru *lru, l = per_cpu_ptr(lru->percpu_lru, node->cpu); - raw_spin_lock_irqsave(&l->lock, flags); + if (raw_res_spin_lock_irqsave(&l->lock, flags)) + return; __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); - raw_spin_unlock_irqrestore(&l->lock, flags); + raw_res_spin_unlock_irqrestore(&l->lock, flags); } void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) @@ -625,7 +634,7 @@ static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) loc_l->next_steal = cpu; - raw_spin_lock_init(&loc_l->lock); + raw_res_spin_lock_init(&loc_l->lock); } static void bpf_lru_list_init(struct bpf_lru_list *l) @@ -640,7 +649,7 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; - raw_spin_lock_init(&l->lock); + raw_res_spin_lock_init(&l->lock); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index fe2661a58ea94..61fc7d7f9de12 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -7,6 +7,7 @@ #include #include #include +#include #define NR_BPF_LRU_LIST_T (3) #define NR_BPF_LRU_LIST_COUNT (2) @@ -34,13 +35,13 @@ struct bpf_lru_list { /* The next inactive list rotation starts from here */ struct list_head *next_inactive_rotation; - raw_spinlock_t lock ____cacheline_aligned_in_smp; + rqspinlock_t lock ____cacheline_aligned_in_smp; }; struct bpf_lru_locallist { struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; u16 next_steal; - raw_spinlock_t lock; + rqspinlock_t lock; }; struct bpf_common_lru { diff --git a/tools/testing/selftests/bpf/prog_tests/map_deadlock.c b/tools/testing/selftests/bpf/prog_tests/map_deadlock.c new file mode 100644 index 0000000000000..17fcf1f5efa65 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_deadlock.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include "map_deadlock.skel.h" + + +static int perf_open_all_cpus(struct perf_event_attr *attr, int fds[], int max_cpus) +{ + int n = 0; + + for (int cpu = 0; cpu < max_cpus; cpu++) { + int fd = syscall(__NR_perf_event_open, attr, -1 /* pid: all */, cpu, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (fd < 0) + continue; + fds[cpu] = fd; + n++; + } + return n; +} + +struct thread_arg { + int map_fd; + bool *stop; +}; + +static void *user_update_thread(void *argp) +{ + struct thread_arg *arg = argp; + u32 key = 0; + u64 val = 1; + + while (!*arg->stop) { + key++; + val++; + bpf_map_update_elem(arg->map_fd, &key, &val, BPF_ANY); + if ((key & 0x7) == 0) + bpf_map_delete_elem(arg->map_fd, &key); + } + return NULL; +} + +static void test_map(const char *map_name, int map_index) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .size = sizeof(struct perf_event_attr), + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_period = 1000000, + .freq = 0, + .disabled = 0, + .wakeup_events = 1, + }; + int map_fd, nfd = 0, max_cpus, err; + struct bpf_link **links = NULL; + struct map_deadlock *skel; + struct bpf_program *prog; + struct thread_arg targ; + bool stop = false; + int *fds = NULL; + pthread_t thr; + + skel = map_deadlock__open(); + if (!ASSERT_OK_PTR(skel, "map_deadlock__open")) + return; + skel->rodata->map_index = map_index; + err = map_deadlock__load(skel); + if (!ASSERT_OK(err, "map_deadlock__load")) + goto out; + + prog = skel->progs.on_perf; + map_fd = bpf_object__find_map_fd_by_name(skel->obj, map_name); + if (!ASSERT_GE(map_fd, 0, map_name)) + goto out; + + max_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(max_cpus, 0, "num cpus")) + goto out; + + links = calloc(max_cpus, sizeof(*links)); + ASSERT_OK_PTR(links, "alloc links"); + fds = calloc(max_cpus, sizeof(*fds)); + ASSERT_OK_PTR(fds, "alloc fds"); + for (int i = 0; i < max_cpus; i++) + fds[i] = -1; + + nfd = perf_open_all_cpus(&attr, fds, max_cpus); + if (!ASSERT_GT(nfd, 0, "perf fds")) + goto out; + + for (int cpu = 0; cpu < max_cpus; cpu++) { + if (fds[cpu] < 0) + continue; + links[cpu] = bpf_program__attach_perf_event(prog, fds[cpu]); + if (!ASSERT_OK_PTR(links[cpu], "attach perf")) + goto out; + } + + targ.map_fd = map_fd; + targ.stop = &stop; + err = pthread_create(&thr, NULL, user_update_thread, &targ); + if (!ASSERT_OK(err, "create thr")) + goto out; + + /* 1 second should be enough to trigger the deadlock */ + sleep(1); + stop = true; + (void)pthread_join(thr, NULL); + /* TODO: read dmesg to check the deadlock? */ +out: + if (links) { + for (int cpu = 0; cpu < max_cpus; cpu++) { + if (links[cpu]) + bpf_link__destroy(links[cpu]); + } + } + if (fds) { + for (int cpu = 0; cpu < max_cpus; cpu++) { + if (fds[cpu] >= 0) + close(fds[cpu]); + } + } + free(links); + free(fds); + map_deadlock__destroy(skel); +} + +void test_map_deadlock(void) +{ + if (test__start_subtest("lru")) + test_map("lru_map", 0); +} diff --git a/tools/testing/selftests/bpf/progs/map_deadlock.c b/tools/testing/selftests/bpf/progs/map_deadlock.c new file mode 100644 index 0000000000000..6966224955fc1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_deadlock.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "GPL"; + +struct lru_map { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1024); + __type(key, u32); + __type(value, u64); +} lru_map SEC(".maps"); + +struct map_list { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct lru_map); +} map_list SEC(".maps") = { + .values = { [0] = &lru_map }, +}; + +const volatile int map_index; + +static __always_inline void do_update_delete(void *map) +{ + u64 ts = bpf_ktime_get_ns(); + u32 key = (u32)(ts >> 12); + u64 val = ts; + + if ((ts & 1) == 0) + bpf_map_update_elem(map, &key, &val, BPF_ANY); + else + bpf_map_delete_elem(map, &key); +} + +SEC("perf_event") +int on_perf(struct bpf_perf_event_data *ctx) +{ + int key = map_index; + void *target_map; + + target_map = bpf_map_lookup_elem(&map_list, &key); + if (!target_map) + return 0; + + for (int i = 0; i < 4; i++) + do_update_delete(target_map); + return 0; +}