Skip to content

Commit 5c8fd7e

Browse files
mykyta5Alexei Starovoitov
authored andcommitted
bpf: bpf task work plumbing
This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Acked-by: Eduard Zingerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent d2699bd commit 5c8fd7e

File tree

9 files changed

+208
-18
lines changed

9 files changed

+208
-18
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ enum btf_field_type {
209209
BPF_WORKQUEUE = (1 << 10),
210210
BPF_UPTR = (1 << 11),
211211
BPF_RES_SPIN_LOCK = (1 << 12),
212+
BPF_TASK_WORK = (1 << 13),
212213
};
213214

214215
enum bpf_cgroup_storage_type {
@@ -262,6 +263,7 @@ struct btf_record {
262263
int timer_off;
263264
int wq_off;
264265
int refcount_off;
266+
int task_work_off;
265267
struct btf_field fields[];
266268
};
267269

@@ -363,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
363365
return "bpf_rb_node";
364366
case BPF_REFCOUNT:
365367
return "bpf_refcount";
368+
case BPF_TASK_WORK:
369+
return "bpf_task_work";
366370
default:
367371
WARN_ON_ONCE(1);
368372
return "unknown";
@@ -401,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
401405
return sizeof(struct bpf_rb_node);
402406
case BPF_REFCOUNT:
403407
return sizeof(struct bpf_refcount);
408+
case BPF_TASK_WORK:
409+
return sizeof(struct bpf_task_work);
404410
default:
405411
WARN_ON_ONCE(1);
406412
return 0;
@@ -433,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
433439
return __alignof__(struct bpf_rb_node);
434440
case BPF_REFCOUNT:
435441
return __alignof__(struct bpf_refcount);
442+
case BPF_TASK_WORK:
443+
return __alignof__(struct bpf_task_work);
436444
default:
437445
WARN_ON_ONCE(1);
438446
return 0;
@@ -464,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
464472
case BPF_KPTR_REF:
465473
case BPF_KPTR_PERCPU:
466474
case BPF_UPTR:
475+
case BPF_TASK_WORK:
467476
break;
468477
default:
469478
WARN_ON_ONCE(1);
@@ -600,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
600609
bool lock_src);
601610
void bpf_timer_cancel_and_free(void *timer);
602611
void bpf_wq_cancel_and_free(void *timer);
612+
void bpf_task_work_cancel_and_free(void *timer);
603613
void bpf_list_head_free(const struct btf_field *field, void *list_head,
604614
struct bpf_spin_lock *spin_lock);
605615
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2426,6 +2436,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
24262436
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
24272437
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
24282438
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
2439+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
24292440
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
24302441
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
24312442

include/uapi/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7436,6 +7436,10 @@ struct bpf_timer {
74367436
__u64 __opaque[2];
74377437
} __attribute__((aligned(8)));
74387438

7439+
struct bpf_task_work {
7440+
__u64 __opaque;
7441+
} __attribute__((aligned(8)));
7442+
74397443
struct bpf_wq {
74407444
__u64 __opaque[2];
74417445
} __attribute__((aligned(8)));

kernel/bpf/arraymap.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,20 +443,22 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
443443
return (void *)round_down((unsigned long)array, PAGE_SIZE);
444444
}
445445

446-
static void array_map_free_timers_wq(struct bpf_map *map)
446+
static void array_map_free_internal_structs(struct bpf_map *map)
447447
{
448448
struct bpf_array *array = container_of(map, struct bpf_array, map);
449449
int i;
450450

451451
/* We don't reset or free fields other than timer and workqueue
452452
* on uref dropping to zero.
453453
*/
454-
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
454+
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
455455
for (i = 0; i < array->map.max_entries; i++) {
456456
if (btf_record_has_field(map->record, BPF_TIMER))
457457
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
458458
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
459459
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
460+
if (btf_record_has_field(map->record, BPF_TASK_WORK))
461+
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
460462
}
461463
}
462464
}
@@ -795,7 +797,7 @@ const struct bpf_map_ops array_map_ops = {
795797
.map_alloc = array_map_alloc,
796798
.map_free = array_map_free,
797799
.map_get_next_key = array_map_get_next_key,
798-
.map_release_uref = array_map_free_timers_wq,
800+
.map_release_uref = array_map_free_internal_structs,
799801
.map_lookup_elem = array_map_lookup_elem,
800802
.map_update_elem = array_map_update_elem,
801803
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3490,6 +3490,7 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
34903490
{ BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true },
34913491
{ BPF_TIMER, "bpf_timer", true },
34923492
{ BPF_WORKQUEUE, "bpf_wq", true },
3493+
{ BPF_TASK_WORK, "bpf_task_work", true },
34933494
{ BPF_LIST_HEAD, "bpf_list_head", false },
34943495
{ BPF_LIST_NODE, "bpf_list_node", false },
34953496
{ BPF_RB_ROOT, "bpf_rb_root", false },
@@ -3675,6 +3676,7 @@ static int btf_find_field_one(const struct btf *btf,
36753676
case BPF_LIST_NODE:
36763677
case BPF_RB_NODE:
36773678
case BPF_REFCOUNT:
3679+
case BPF_TASK_WORK:
36783680
ret = btf_find_struct(btf, var_type, off, sz, field_type,
36793681
info_cnt ? &info[0] : &tmp);
36803682
if (ret < 0)
@@ -3967,6 +3969,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39673969
rec->timer_off = -EINVAL;
39683970
rec->wq_off = -EINVAL;
39693971
rec->refcount_off = -EINVAL;
3972+
rec->task_work_off = -EINVAL;
39703973
for (i = 0; i < cnt; i++) {
39713974
field_type_size = btf_field_type_size(info_arr[i].type);
39723975
if (info_arr[i].off + field_type_size > value_size) {
@@ -4006,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
40064009
/* Cache offset for faster lookup at runtime */
40074010
rec->wq_off = rec->fields[i].offset;
40084011
break;
4012+
case BPF_TASK_WORK:
4013+
WARN_ON_ONCE(rec->task_work_off >= 0);
4014+
rec->task_work_off = rec->fields[i].offset;
4015+
break;
40094016
case BPF_REFCOUNT:
40104017
WARN_ON_ONCE(rec->refcount_off >= 0);
40114018
/* Cache offset for faster lookup at runtime */

kernel/bpf/hashtab.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,12 @@ static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *
223223
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
224224
bpf_obj_free_workqueue(htab->map.record,
225225
htab_elem_value(elem, htab->map.key_size));
226+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
227+
bpf_obj_free_task_work(htab->map.record,
228+
htab_elem_value(elem, htab->map.key_size));
226229
}
227230

228-
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
231+
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
229232
{
230233
u32 num_entries = htab->map.max_entries;
231234
int i;
@@ -1495,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab)
14951498
}
14961499
}
14971500

1498-
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
1501+
static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
14991502
{
15001503
int i;
15011504

@@ -1514,16 +1517,16 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
15141517
rcu_read_unlock();
15151518
}
15161519

1517-
static void htab_map_free_timers_and_wq(struct bpf_map *map)
1520+
static void htab_map_free_internal_structs(struct bpf_map *map)
15181521
{
15191522
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15201523

15211524
/* We only free timer and workqueue on uref dropping to zero */
1522-
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
1525+
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
15231526
if (!htab_is_prealloc(htab))
1524-
htab_free_malloced_timers_and_wq(htab);
1527+
htab_free_malloced_internal_structs(htab);
15251528
else
1526-
htab_free_prealloced_timers_and_wq(htab);
1529+
htab_free_prealloced_internal_structs(htab);
15271530
}
15281531
}
15291532

@@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = {
22552258
.map_alloc = htab_map_alloc,
22562259
.map_free = htab_map_free,
22572260
.map_get_next_key = htab_map_get_next_key,
2258-
.map_release_uref = htab_map_free_timers_and_wq,
2261+
.map_release_uref = htab_map_free_internal_structs,
22592262
.map_lookup_elem = htab_map_lookup_elem,
22602263
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22612264
.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
22762279
.map_alloc = htab_map_alloc,
22772280
.map_free = htab_map_free,
22782281
.map_get_next_key = htab_map_get_next_key,
2279-
.map_release_uref = htab_map_free_timers_and_wq,
2282+
.map_release_uref = htab_map_free_internal_structs,
22802283
.map_lookup_elem = htab_lru_map_lookup_elem,
22812284
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
22822285
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,

kernel/bpf/helpers.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3906,8 +3906,48 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
39063906
}
39073907
#endif /* CONFIG_KEYS */
39083908

3909+
typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);
3910+
3911+
/**
3912+
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
3913+
* @task: Task struct for which callback should be scheduled
3914+
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
3915+
* @map__map: bpf_map that embeds struct bpf_task_work in the values
3916+
* @callback: pointer to BPF subprogram to call
3917+
* @aux__prog: user should pass NULL
3918+
*
3919+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3920+
*/
3921+
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
3922+
void *map__map, bpf_task_work_callback_t callback,
3923+
void *aux__prog)
3924+
{
3925+
return 0;
3926+
}
3927+
3928+
/**
3929+
* bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
3930+
* @task: Task struct for which callback should be scheduled
3931+
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
3932+
* @map__map: bpf_map that embeds struct bpf_task_work in the values
3933+
* @callback: pointer to BPF subprogram to call
3934+
* @aux__prog: user should pass NULL
3935+
*
3936+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3937+
*/
3938+
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
3939+
void *map__map, bpf_task_work_callback_t callback,
3940+
void *aux__prog)
3941+
{
3942+
return 0;
3943+
}
3944+
39093945
__bpf_kfunc_end_defs();
39103946

3947+
void bpf_task_work_cancel_and_free(void *val)
3948+
{
3949+
}
3950+
39113951
BTF_KFUNCS_START(generic_btf_ids)
39123952
#ifdef CONFIG_CRASH_DUMP
39133953
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)

kernel/bpf/syscall.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ void btf_record_free(struct btf_record *rec)
674674
case BPF_TIMER:
675675
case BPF_REFCOUNT:
676676
case BPF_WORKQUEUE:
677+
case BPF_TASK_WORK:
677678
/* Nothing to release */
678679
break;
679680
default:
@@ -727,6 +728,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
727728
case BPF_TIMER:
728729
case BPF_REFCOUNT:
729730
case BPF_WORKQUEUE:
731+
case BPF_TASK_WORK:
730732
/* Nothing to acquire */
731733
break;
732734
default:
@@ -785,6 +787,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
785787
bpf_wq_cancel_and_free(obj + rec->wq_off);
786788
}
787789

790+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
791+
{
792+
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
793+
return;
794+
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
795+
}
796+
788797
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
789798
{
790799
const struct btf_field *fields;
@@ -809,6 +818,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
809818
case BPF_WORKQUEUE:
810819
bpf_wq_cancel_and_free(field_ptr);
811820
break;
821+
case BPF_TASK_WORK:
822+
bpf_task_work_cancel_and_free(field_ptr);
823+
break;
812824
case BPF_KPTR_UNREF:
813825
WRITE_ONCE(*(u64 *)field_ptr, 0);
814826
break;
@@ -1240,7 +1252,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12401252

12411253
map->record = btf_parse_fields(btf, value_type,
12421254
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1243-
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
1255+
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
1256+
BPF_TASK_WORK,
12441257
map->value_size);
12451258
if (!IS_ERR_OR_NULL(map->record)) {
12461259
int i;
@@ -1272,6 +1285,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12721285
break;
12731286
case BPF_TIMER:
12741287
case BPF_WORKQUEUE:
1288+
case BPF_TASK_WORK:
12751289
if (map->map_type != BPF_MAP_TYPE_HASH &&
12761290
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
12771291
map->map_type != BPF_MAP_TYPE_ARRAY) {

0 commit comments

Comments
 (0)