Skip to content

Commit e799423

Browse files
committed
bpf: bpf task work plumbing
This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko <[email protected]>
1 parent fa47913 commit e799423

File tree

9 files changed

+247
-16
lines changed

9 files changed

+247
-16
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ enum btf_field_type {
206206
BPF_WORKQUEUE = (1 << 10),
207207
BPF_UPTR = (1 << 11),
208208
BPF_RES_SPIN_LOCK = (1 << 12),
209+
BPF_TASK_WORK = (1 << 13),
209210
};
210211

211212
enum bpf_cgroup_storage_type {
@@ -259,6 +260,7 @@ struct btf_record {
259260
int timer_off;
260261
int wq_off;
261262
int refcount_off;
263+
int task_work_off;
262264
struct btf_field fields[];
263265
};
264266

@@ -358,6 +360,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
358360
return "bpf_rb_node";
359361
case BPF_REFCOUNT:
360362
return "bpf_refcount";
363+
case BPF_TASK_WORK:
364+
return "bpf_task_work";
361365
default:
362366
WARN_ON_ONCE(1);
363367
return "unknown";
@@ -396,6 +400,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
396400
return sizeof(struct bpf_rb_node);
397401
case BPF_REFCOUNT:
398402
return sizeof(struct bpf_refcount);
403+
case BPF_TASK_WORK:
404+
return sizeof(struct bpf_task_work);
399405
default:
400406
WARN_ON_ONCE(1);
401407
return 0;
@@ -428,6 +434,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
428434
return __alignof__(struct bpf_rb_node);
429435
case BPF_REFCOUNT:
430436
return __alignof__(struct bpf_refcount);
437+
case BPF_TASK_WORK:
438+
return __alignof__(struct bpf_task_work);
431439
default:
432440
WARN_ON_ONCE(1);
433441
return 0;
@@ -459,6 +467,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
459467
case BPF_KPTR_REF:
460468
case BPF_KPTR_PERCPU:
461469
case BPF_UPTR:
470+
case BPF_TASK_WORK:
462471
break;
463472
default:
464473
WARN_ON_ONCE(1);
@@ -595,6 +604,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
595604
bool lock_src);
596605
void bpf_timer_cancel_and_free(void *timer);
597606
void bpf_wq_cancel_and_free(void *timer);
607+
void bpf_task_work_cancel_and_free(void *timer);
598608
void bpf_list_head_free(const struct btf_field *field, void *list_head,
599609
struct bpf_spin_lock *spin_lock);
600610
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2412,6 +2422,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
24122422
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
24132423
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
24142424
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
2425+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
24152426
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
24162427
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
24172428

include/uapi/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7418,6 +7418,10 @@ struct bpf_timer {
74187418
__u64 __opaque[2];
74197419
} __attribute__((aligned(8)));
74207420

7421+
struct bpf_task_work {
7422+
__u64 ctx;
7423+
} __attribute__((aligned(8)));
7424+
74217425
struct bpf_wq {
74227426
__u64 __opaque[2];
74237427
} __attribute__((aligned(8)));

kernel/bpf/arraymap.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,20 +431,22 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
431431
return (void *)round_down((unsigned long)array, PAGE_SIZE);
432432
}
433433

434-
static void array_map_free_timers_wq(struct bpf_map *map)
434+
static void array_map_free_internal_structs(struct bpf_map *map)
435435
{
436436
struct bpf_array *array = container_of(map, struct bpf_array, map);
437437
int i;
438438

439439
/* We don't reset or free fields other than timer and workqueue
440440
* on uref dropping to zero.
441441
*/
442-
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
442+
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
443443
for (i = 0; i < array->map.max_entries; i++) {
444444
if (btf_record_has_field(map->record, BPF_TIMER))
445445
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
446446
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
447447
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
448+
if (btf_record_has_field(map->record, BPF_TASK_WORK))
449+
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
448450
}
449451
}
450452
}
@@ -783,7 +785,7 @@ const struct bpf_map_ops array_map_ops = {
783785
.map_alloc = array_map_alloc,
784786
.map_free = array_map_free,
785787
.map_get_next_key = array_map_get_next_key,
786-
.map_release_uref = array_map_free_timers_wq,
788+
.map_release_uref = array_map_free_internal_structs,
787789
.map_lookup_elem = array_map_lookup_elem,
788790
.map_update_elem = array_map_update_elem,
789791
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3527,6 +3527,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
35273527
goto end;
35283528
}
35293529
}
3530+
if (field_mask & BPF_TASK_WORK) {
3531+
if (!strcmp(name, "bpf_task_work")) {
3532+
if (*seen_mask & BPF_TASK_WORK)
3533+
return -E2BIG;
3534+
*seen_mask |= BPF_TASK_WORK;
3535+
type = BPF_TASK_WORK;
3536+
goto end;
3537+
}
3538+
}
35303539
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
35313540
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
35323541
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
@@ -3693,6 +3702,7 @@ static int btf_find_field_one(const struct btf *btf,
36933702
case BPF_LIST_NODE:
36943703
case BPF_RB_NODE:
36953704
case BPF_REFCOUNT:
3705+
case BPF_TASK_WORK:
36963706
ret = btf_find_struct(btf, var_type, off, sz, field_type,
36973707
info_cnt ? &info[0] : &tmp);
36983708
if (ret < 0)
@@ -3985,6 +3995,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39853995
rec->timer_off = -EINVAL;
39863996
rec->wq_off = -EINVAL;
39873997
rec->refcount_off = -EINVAL;
3998+
rec->task_work_off = -EINVAL;
39883999
for (i = 0; i < cnt; i++) {
39894000
field_type_size = btf_field_type_size(info_arr[i].type);
39904001
if (info_arr[i].off + field_type_size > value_size) {
@@ -4050,6 +4061,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
40504061
case BPF_LIST_NODE:
40514062
case BPF_RB_NODE:
40524063
break;
4064+
case BPF_TASK_WORK:
4065+
WARN_ON_ONCE(rec->task_work_off >= 0);
4066+
rec->task_work_off = rec->fields[i].offset;
4067+
break;
40534068
default:
40544069
ret = -EFAULT;
40554070
goto end;

kernel/bpf/hashtab.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
215215
return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
216216
}
217217

218-
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
218+
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
219219
{
220220
u32 num_entries = htab->map.max_entries;
221221
int i;
@@ -233,6 +233,9 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
233233
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
234234
bpf_obj_free_workqueue(htab->map.record,
235235
htab_elem_value(elem, htab->map.key_size));
236+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
237+
bpf_obj_free_task_work(htab->map.record,
238+
htab_elem_value(elem, htab->map.key_size));
236239
cond_resched();
237240
}
238241
}
@@ -1490,7 +1493,7 @@ static void delete_all_elements(struct bpf_htab *htab)
14901493
}
14911494
}
14921495

1493-
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
1496+
static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
14941497
{
14951498
int i;
14961499

@@ -1508,22 +1511,25 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
15081511
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
15091512
bpf_obj_free_workqueue(htab->map.record,
15101513
htab_elem_value(l, htab->map.key_size));
1514+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
1515+
bpf_obj_free_task_work(htab->map.record,
1516+
htab_elem_value(l, htab->map.key_size));
15111517
}
15121518
cond_resched_rcu();
15131519
}
15141520
rcu_read_unlock();
15151521
}
15161522

1517-
static void htab_map_free_timers_and_wq(struct bpf_map *map)
1523+
static void htab_map_free_internal_structs(struct bpf_map *map)
15181524
{
15191525
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15201526

15211527
/* We only free timer and workqueue on uref dropping to zero */
1522-
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
1528+
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
15231529
if (!htab_is_prealloc(htab))
1524-
htab_free_malloced_timers_and_wq(htab);
1530+
htab_free_malloced_internal_structs(htab);
15251531
else
1526-
htab_free_prealloced_timers_and_wq(htab);
1532+
htab_free_prealloced_internal_structs(htab);
15271533
}
15281534
}
15291535

@@ -2255,7 +2261,7 @@ const struct bpf_map_ops htab_map_ops = {
22552261
.map_alloc = htab_map_alloc,
22562262
.map_free = htab_map_free,
22572263
.map_get_next_key = htab_map_get_next_key,
2258-
.map_release_uref = htab_map_free_timers_and_wq,
2264+
.map_release_uref = htab_map_free_internal_structs,
22592265
.map_lookup_elem = htab_map_lookup_elem,
22602266
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22612267
.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2282,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
22762282
.map_alloc = htab_map_alloc,
22772283
.map_free = htab_map_free,
22782284
.map_get_next_key = htab_map_get_next_key,
2279-
.map_release_uref = htab_map_free_timers_and_wq,
2285+
.map_release_uref = htab_map_free_internal_structs,
22802286
.map_lookup_elem = htab_lru_map_lookup_elem,
22812287
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
22822288
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,

kernel/bpf/helpers.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3703,8 +3703,53 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
37033703
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
37043704
}
37053705

3706+
typedef void (*bpf_task_work_callback_t)(struct bpf_map *, void *, void *);
3707+
3708+
/**
3709+
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
3710+
* @task: Task struct for which callback should be scheduled
3711+
* @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
3712+
* @map__map: bpf_map which contains bpf_task_work in one of the values
3713+
* @callback: pointer to BPF subprogram to call
3714+
* @aux__prog: user should pass NULL
3715+
*
3716+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3717+
*/
3718+
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task,
3719+
struct bpf_task_work *tw,
3720+
struct bpf_map *map__map,
3721+
bpf_task_work_callback_t callback,
3722+
void *aux__prog)
3723+
{
3724+
return 0;
3725+
}
3726+
3727+
/**
3728+
* bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME or
3729+
* TWA_NMI_CURRENT mode if scheduling for the current task in the NMI
3730+
* @task: Task struct for which callback should be scheduled
3731+
* @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
3732+
* @map__map: bpf_map which contains bpf_task_work in one of the values
3733+
* @callback: pointer to BPF subprogram to call
3734+
* @aux__prog: user should pass NULL
3735+
*
3736+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3737+
*/
3738+
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task,
3739+
struct bpf_task_work *tw,
3740+
struct bpf_map *map__map,
3741+
bpf_task_work_callback_t callback,
3742+
void *aux__prog)
3743+
{
3744+
return 0;
3745+
}
3746+
37063747
__bpf_kfunc_end_defs();
37073748

3749+
void bpf_task_work_cancel_and_free(void *val)
3750+
{
3751+
}
3752+
37083753
BTF_KFUNCS_START(generic_btf_ids)
37093754
#ifdef CONFIG_CRASH_DUMP
37103755
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)

kernel/bpf/syscall.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ void btf_record_free(struct btf_record *rec)
672672
case BPF_TIMER:
673673
case BPF_REFCOUNT:
674674
case BPF_WORKQUEUE:
675+
case BPF_TASK_WORK:
675676
/* Nothing to release */
676677
break;
677678
default:
@@ -725,6 +726,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
725726
case BPF_TIMER:
726727
case BPF_REFCOUNT:
727728
case BPF_WORKQUEUE:
729+
case BPF_TASK_WORK:
728730
/* Nothing to acquire */
729731
break;
730732
default:
@@ -783,6 +785,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
783785
bpf_wq_cancel_and_free(obj + rec->wq_off);
784786
}
785787

788+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
789+
{
790+
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
791+
return;
792+
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
793+
}
794+
786795
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
787796
{
788797
const struct btf_field *fields;
@@ -840,6 +849,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
840849
continue;
841850
bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
842851
break;
852+
case BPF_TASK_WORK:
853+
bpf_task_work_cancel_and_free(field_ptr);
854+
break;
843855
case BPF_LIST_NODE:
844856
case BPF_RB_NODE:
845857
case BPF_REFCOUNT:
@@ -1237,7 +1249,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12371249

12381250
map->record = btf_parse_fields(btf, value_type,
12391251
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1240-
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
1252+
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
1253+
BPF_TASK_WORK,
12411254
map->value_size);
12421255
if (!IS_ERR_OR_NULL(map->record)) {
12431256
int i;
@@ -1309,6 +1322,14 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
13091322
goto free_map_tab;
13101323
}
13111324
break;
1325+
case BPF_TASK_WORK:
1326+
if (map->map_type != BPF_MAP_TYPE_HASH &&
1327+
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1328+
map->map_type != BPF_MAP_TYPE_ARRAY) {
1329+
ret = -EOPNOTSUPP;
1330+
goto free_map_tab;
1331+
}
1332+
break;
13121333
default:
13131334
/* Fail if map_type checks are missing for a field type */
13141335
ret = -EOPNOTSUPP;

0 commit comments

Comments
 (0)