Skip to content

Commit 42faedf

Browse files
committed
bpf: bpf task work plumbing
This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko <[email protected]>
1 parent cd7c97f commit 42faedf

File tree

9 files changed

+247
-16
lines changed

9 files changed

+247
-16
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ enum btf_field_type {
206206
BPF_WORKQUEUE = (1 << 10),
207207
BPF_UPTR = (1 << 11),
208208
BPF_RES_SPIN_LOCK = (1 << 12),
209+
BPF_TASK_WORK = (1 << 13),
209210
};
210211

211212
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -245,6 +246,7 @@ struct btf_record {
245246
int timer_off;
246247
int wq_off;
247248
int refcount_off;
249+
int task_work_off;
248250
struct btf_field fields[];
249251
};
250252

@@ -340,6 +342,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
340342
return "bpf_rb_node";
341343
case BPF_REFCOUNT:
342344
return "bpf_refcount";
345+
case BPF_TASK_WORK:
346+
return "bpf_task_work";
343347
default:
344348
WARN_ON_ONCE(1);
345349
return "unknown";
@@ -378,6 +382,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
378382
return sizeof(struct bpf_rb_node);
379383
case BPF_REFCOUNT:
380384
return sizeof(struct bpf_refcount);
385+
case BPF_TASK_WORK:
386+
return sizeof(struct bpf_task_work);
381387
default:
382388
WARN_ON_ONCE(1);
383389
return 0;
@@ -410,6 +416,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
410416
return __alignof__(struct bpf_rb_node);
411417
case BPF_REFCOUNT:
412418
return __alignof__(struct bpf_refcount);
419+
case BPF_TASK_WORK:
420+
return __alignof__(struct bpf_task_work);
413421
default:
414422
WARN_ON_ONCE(1);
415423
return 0;
@@ -441,6 +449,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
441449
case BPF_KPTR_REF:
442450
case BPF_KPTR_PERCPU:
443451
case BPF_UPTR:
452+
case BPF_TASK_WORK:
444453
break;
445454
default:
446455
WARN_ON_ONCE(1);
@@ -577,6 +586,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
577586
bool lock_src);
578587
void bpf_timer_cancel_and_free(void *timer);
579588
void bpf_wq_cancel_and_free(void *timer);
589+
void bpf_task_work_cancel_and_free(void *timer);
580590
void bpf_list_head_free(const struct btf_field *field, void *list_head,
581591
struct bpf_spin_lock *spin_lock);
582592
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2391,6 +2401,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
23912401
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
23922402
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
23932403
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
2404+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
23942405
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
23952406
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
23962407

include/uapi/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7418,6 +7418,10 @@ struct bpf_timer {
74187418
__u64 __opaque[2];
74197419
} __attribute__((aligned(8)));
74207420

7421+
struct bpf_task_work {
7422+
__u64 __opaque[16];
7423+
} __attribute__((aligned(8)));
7424+
74217425
struct bpf_wq {
74227426
__u64 __opaque[2];
74237427
} __attribute__((aligned(8)));

kernel/bpf/arraymap.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,20 +431,22 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
431431
return (void *)round_down((unsigned long)array, PAGE_SIZE);
432432
}
433433

434-
static void array_map_free_timers_wq(struct bpf_map *map)
434+
static void array_map_free_internal_structs(struct bpf_map *map)
435435
{
436436
struct bpf_array *array = container_of(map, struct bpf_array, map);
437437
int i;
438438

439439
/* We don't reset or free fields other than timer and workqueue
440440
* on uref dropping to zero.
441441
*/
442-
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
442+
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
443443
for (i = 0; i < array->map.max_entries; i++) {
444444
if (btf_record_has_field(map->record, BPF_TIMER))
445445
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
446446
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
447447
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
448+
if (btf_record_has_field(map->record, BPF_TASK_WORK))
449+
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
448450
}
449451
}
450452
}
@@ -783,7 +785,7 @@ const struct bpf_map_ops array_map_ops = {
783785
.map_alloc = array_map_alloc,
784786
.map_free = array_map_free,
785787
.map_get_next_key = array_map_get_next_key,
786-
.map_release_uref = array_map_free_timers_wq,
788+
.map_release_uref = array_map_free_internal_structs,
787789
.map_lookup_elem = array_map_lookup_elem,
788790
.map_update_elem = array_map_update_elem,
789791
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3527,6 +3527,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
35273527
goto end;
35283528
}
35293529
}
3530+
if (field_mask & BPF_TASK_WORK) {
3531+
if (!strcmp(name, "bpf_task_work")) {
3532+
if (*seen_mask & BPF_TASK_WORK)
3533+
return -E2BIG;
3534+
*seen_mask |= BPF_TASK_WORK;
3535+
type = BPF_TASK_WORK;
3536+
goto end;
3537+
}
3538+
}
35303539
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
35313540
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
35323541
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
@@ -3693,6 +3702,7 @@ static int btf_find_field_one(const struct btf *btf,
36933702
case BPF_LIST_NODE:
36943703
case BPF_RB_NODE:
36953704
case BPF_REFCOUNT:
3705+
case BPF_TASK_WORK:
36963706
ret = btf_find_struct(btf, var_type, off, sz, field_type,
36973707
info_cnt ? &info[0] : &tmp);
36983708
if (ret < 0)
@@ -3985,6 +3995,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39853995
rec->timer_off = -EINVAL;
39863996
rec->wq_off = -EINVAL;
39873997
rec->refcount_off = -EINVAL;
3998+
rec->task_work_off = -EINVAL;
39883999
for (i = 0; i < cnt; i++) {
39894000
field_type_size = btf_field_type_size(info_arr[i].type);
39904001
if (info_arr[i].off + field_type_size > value_size) {
@@ -4050,6 +4061,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
40504061
case BPF_LIST_NODE:
40514062
case BPF_RB_NODE:
40524063
break;
4064+
case BPF_TASK_WORK:
4065+
WARN_ON_ONCE(rec->task_work_off >= 0);
4066+
rec->task_work_off = rec->fields[i].offset;
4067+
break;
40534068
default:
40544069
ret = -EFAULT;
40554070
goto end;

kernel/bpf/hashtab.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
215215
return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
216216
}
217217

218-
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
218+
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
219219
{
220220
u32 num_entries = htab->map.max_entries;
221221
int i;
@@ -233,6 +233,9 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
233233
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
234234
bpf_obj_free_workqueue(htab->map.record,
235235
htab_elem_value(elem, htab->map.key_size));
236+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
237+
bpf_obj_free_task_work(htab->map.record,
238+
htab_elem_value(elem, htab->map.key_size));
236239
cond_resched();
237240
}
238241
}
@@ -1490,7 +1493,7 @@ static void delete_all_elements(struct bpf_htab *htab)
14901493
}
14911494
}
14921495

1493-
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
1496+
static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
14941497
{
14951498
int i;
14961499

@@ -1508,22 +1511,25 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
15081511
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
15091512
bpf_obj_free_workqueue(htab->map.record,
15101513
htab_elem_value(l, htab->map.key_size));
1514+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
1515+
bpf_obj_free_task_work(htab->map.record,
1516+
htab_elem_value(l, htab->map.key_size));
15111517
}
15121518
cond_resched_rcu();
15131519
}
15141520
rcu_read_unlock();
15151521
}
15161522

1517-
static void htab_map_free_timers_and_wq(struct bpf_map *map)
1523+
static void htab_map_free_internal_structs(struct bpf_map *map)
15181524
{
15191525
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15201526

15211527
/* We only free timer and workqueue on uref dropping to zero */
1522-
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
1528+
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
15231529
if (!htab_is_prealloc(htab))
1524-
htab_free_malloced_timers_and_wq(htab);
1530+
htab_free_malloced_internal_structs(htab);
15251531
else
1526-
htab_free_prealloced_timers_and_wq(htab);
1532+
htab_free_prealloced_internal_structs(htab);
15271533
}
15281534
}
15291535

@@ -2255,7 +2261,7 @@ const struct bpf_map_ops htab_map_ops = {
22552261
.map_alloc = htab_map_alloc,
22562262
.map_free = htab_map_free,
22572263
.map_get_next_key = htab_map_get_next_key,
2258-
.map_release_uref = htab_map_free_timers_and_wq,
2264+
.map_release_uref = htab_map_free_internal_structs,
22592265
.map_lookup_elem = htab_map_lookup_elem,
22602266
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22612267
.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2282,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
22762282
.map_alloc = htab_map_alloc,
22772283
.map_free = htab_map_free,
22782284
.map_get_next_key = htab_map_get_next_key,
2279-
.map_release_uref = htab_map_free_timers_and_wq,
2285+
.map_release_uref = htab_map_free_internal_structs,
22802286
.map_lookup_elem = htab_lru_map_lookup_elem,
22812287
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
22822288
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,

kernel/bpf/helpers.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3703,8 +3703,53 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
37033703
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
37043704
}
37053705

3706+
typedef void (*bpf_task_work_callback_t)(struct bpf_map *, void *, void *);
3707+
3708+
/**
3709+
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
3710+
* @task: Task struct for which callback should be scheduled
3711+
* @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
3712+
* @map__map: bpf_map which contain bpf_task_work in one of the values
3713+
* @callback: pointer to BPF subprogram to call
3714+
* @aux__prog: user should pass NULL
3715+
*
3716+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3717+
*/
3718+
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task,
3719+
struct bpf_task_work *tw,
3720+
struct bpf_map *map__map,
3721+
bpf_task_work_callback_t callback,
3722+
void *aux__prog)
3723+
{
3724+
return 0;
3725+
}
3726+
3727+
/**
3728+
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_RESUME or
3729+
* TWA_NMI_CURRENT mode if scheduling for the current task in the NMI
3730+
* @task: Task struct for which callback should be scheduled
3731+
* @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
3732+
* @map__map: bpf_map which contain bpf_task_work in one of the values
3733+
* @callback: pointer to BPF subprogram to call
3734+
* @aux__prog: user should pass NULL
3735+
*
3736+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3737+
*/
3738+
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task,
3739+
struct bpf_task_work *tw,
3740+
struct bpf_map *map__map,
3741+
bpf_task_work_callback_t callback,
3742+
void *aux__prog)
3743+
{
3744+
return 0;
3745+
}
3746+
37063747
__bpf_kfunc_end_defs();
37073748

3749+
void bpf_task_work_cancel_and_free(void *val)
3750+
{
3751+
}
3752+
37083753
BTF_KFUNCS_START(generic_btf_ids)
37093754
#ifdef CONFIG_CRASH_DUMP
37103755
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)

kernel/bpf/syscall.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ void btf_record_free(struct btf_record *rec)
670670
case BPF_TIMER:
671671
case BPF_REFCOUNT:
672672
case BPF_WORKQUEUE:
673+
case BPF_TASK_WORK:
673674
/* Nothing to release */
674675
break;
675676
default:
@@ -723,6 +724,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
723724
case BPF_TIMER:
724725
case BPF_REFCOUNT:
725726
case BPF_WORKQUEUE:
727+
case BPF_TASK_WORK:
726728
/* Nothing to acquire */
727729
break;
728730
default:
@@ -781,6 +783,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
781783
bpf_wq_cancel_and_free(obj + rec->wq_off);
782784
}
783785

786+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
787+
{
788+
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
789+
return;
790+
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
791+
}
792+
784793
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
785794
{
786795
const struct btf_field *fields;
@@ -838,6 +847,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
838847
continue;
839848
bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
840849
break;
850+
case BPF_TASK_WORK:
851+
bpf_task_work_cancel_and_free(field_ptr);
852+
break;
841853
case BPF_LIST_NODE:
842854
case BPF_RB_NODE:
843855
case BPF_REFCOUNT:
@@ -1234,7 +1246,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12341246

12351247
map->record = btf_parse_fields(btf, value_type,
12361248
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1237-
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
1249+
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
1250+
BPF_TASK_WORK,
12381251
map->value_size);
12391252
if (!IS_ERR_OR_NULL(map->record)) {
12401253
int i;
@@ -1306,6 +1319,14 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
13061319
goto free_map_tab;
13071320
}
13081321
break;
1322+
case BPF_TASK_WORK:
1323+
if (map->map_type != BPF_MAP_TYPE_HASH &&
1324+
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1325+
map->map_type != BPF_MAP_TYPE_ARRAY) {
1326+
ret = -EOPNOTSUPP;
1327+
goto free_map_tab;
1328+
}
1329+
break;
13091330
default:
13101331
/* Fail if map_type checks are missing for a field type */
13111332
ret = -EOPNOTSUPP;

0 commit comments

Comments
 (0)