Skip to content

Commit cf563df

Browse files
mykyta5Kernel Patches Daemon
authored andcommitted
bpf: bpf task work plumbing
This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko <[email protected]> Acked-by: Andrii Nakryiko <[email protected]>
1 parent 44267b4 commit cf563df

File tree

9 files changed

+193
-15
lines changed

9 files changed

+193
-15
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ enum btf_field_type {
206206
BPF_WORKQUEUE = (1 << 10),
207207
BPF_UPTR = (1 << 11),
208208
BPF_RES_SPIN_LOCK = (1 << 12),
209+
BPF_TASK_WORK = (1 << 13),
209210
};
210211

211212
enum bpf_cgroup_storage_type {
@@ -259,6 +260,7 @@ struct btf_record {
259260
int timer_off;
260261
int wq_off;
261262
int refcount_off;
263+
int task_work_off;
262264
struct btf_field fields[];
263265
};
264266

@@ -358,6 +360,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
358360
return "bpf_rb_node";
359361
case BPF_REFCOUNT:
360362
return "bpf_refcount";
363+
case BPF_TASK_WORK:
364+
return "bpf_task_work";
361365
default:
362366
WARN_ON_ONCE(1);
363367
return "unknown";
@@ -396,6 +400,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
396400
return sizeof(struct bpf_rb_node);
397401
case BPF_REFCOUNT:
398402
return sizeof(struct bpf_refcount);
403+
case BPF_TASK_WORK:
404+
return sizeof(struct bpf_task_work);
399405
default:
400406
WARN_ON_ONCE(1);
401407
return 0;
@@ -428,6 +434,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
428434
return __alignof__(struct bpf_rb_node);
429435
case BPF_REFCOUNT:
430436
return __alignof__(struct bpf_refcount);
437+
case BPF_TASK_WORK:
438+
return __alignof__(struct bpf_task_work);
431439
default:
432440
WARN_ON_ONCE(1);
433441
return 0;
@@ -459,6 +467,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
459467
case BPF_KPTR_REF:
460468
case BPF_KPTR_PERCPU:
461469
case BPF_UPTR:
470+
case BPF_TASK_WORK:
462471
break;
463472
default:
464473
WARN_ON_ONCE(1);
@@ -595,6 +604,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
595604
bool lock_src);
596605
void bpf_timer_cancel_and_free(void *timer);
597606
void bpf_wq_cancel_and_free(void *timer);
607+
void bpf_task_work_cancel_and_free(void *timer);
598608
void bpf_list_head_free(const struct btf_field *field, void *list_head,
599609
struct bpf_spin_lock *spin_lock);
600610
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2417,6 +2427,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
24172427
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
24182428
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
24192429
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
2430+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
24202431
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
24212432
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
24222433

include/uapi/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7418,6 +7418,10 @@ struct bpf_timer {
74187418
__u64 __opaque[2];
74197419
} __attribute__((aligned(8)));
74207420

7421+
struct bpf_task_work {
7422+
__u64 __opaque;
7423+
} __attribute__((aligned(8)));
7424+
74217425
struct bpf_wq {
74227426
__u64 __opaque[2];
74237427
} __attribute__((aligned(8)));

kernel/bpf/arraymap.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,20 +431,22 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
431431
return (void *)round_down((unsigned long)array, PAGE_SIZE);
432432
}
433433

434-
static void array_map_free_timers_wq(struct bpf_map *map)
434+
static void array_map_free_internal_structs(struct bpf_map *map)
435435
{
436436
struct bpf_array *array = container_of(map, struct bpf_array, map);
437437
int i;
438438

439439
/* We don't reset or free fields other than timer and workqueue
440440
* on uref dropping to zero.
441441
*/
442-
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
442+
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
443443
for (i = 0; i < array->map.max_entries; i++) {
444444
if (btf_record_has_field(map->record, BPF_TIMER))
445445
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
446446
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
447447
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
448+
if (btf_record_has_field(map->record, BPF_TASK_WORK))
449+
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
448450
}
449451
}
450452
}
@@ -783,7 +785,7 @@ const struct bpf_map_ops array_map_ops = {
783785
.map_alloc = array_map_alloc,
784786
.map_free = array_map_free,
785787
.map_get_next_key = array_map_get_next_key,
786-
.map_release_uref = array_map_free_timers_wq,
788+
.map_release_uref = array_map_free_internal_structs,
787789
.map_lookup_elem = array_map_lookup_elem,
788790
.map_update_elem = array_map_update_elem,
789791
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3494,7 +3494,8 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
34943494
} field_types[] = { { BPF_SPIN_LOCK, "bpf_spin_lock" },
34953495
{ BPF_RES_SPIN_LOCK, "bpf_res_spin_lock" },
34963496
{ BPF_TIMER, "bpf_timer" },
3497-
{ BPF_WORKQUEUE, "bpf_wq" }};
3497+
{ BPF_WORKQUEUE, "bpf_wq" },
3498+
{ BPF_TASK_WORK, "bpf_task_work" } };
34983499
int type = 0, i;
34993500
const char *name = __btf_name_by_offset(btf, var_type->name_off);
35003501
const char *field_type_name;
@@ -3677,6 +3678,7 @@ static int btf_find_field_one(const struct btf *btf,
36773678
case BPF_LIST_NODE:
36783679
case BPF_RB_NODE:
36793680
case BPF_REFCOUNT:
3681+
case BPF_TASK_WORK:
36803682
ret = btf_find_struct(btf, var_type, off, sz, field_type,
36813683
info_cnt ? &info[0] : &tmp);
36823684
if (ret < 0)
@@ -3969,6 +3971,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39693971
rec->timer_off = -EINVAL;
39703972
rec->wq_off = -EINVAL;
39713973
rec->refcount_off = -EINVAL;
3974+
rec->task_work_off = -EINVAL;
39723975
for (i = 0; i < cnt; i++) {
39733976
field_type_size = btf_field_type_size(info_arr[i].type);
39743977
if (info_arr[i].off + field_type_size > value_size) {
@@ -4034,6 +4037,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
40344037
case BPF_LIST_NODE:
40354038
case BPF_RB_NODE:
40364039
break;
4040+
case BPF_TASK_WORK:
4041+
WARN_ON_ONCE(rec->task_work_off >= 0);
4042+
rec->task_work_off = rec->fields[i].offset;
4043+
break;
40374044
default:
40384045
ret = -EFAULT;
40394046
goto end;

kernel/bpf/hashtab.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,12 @@ static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *
223223
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
224224
bpf_obj_free_workqueue(htab->map.record,
225225
htab_elem_value(elem, htab->map.key_size));
226+
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
227+
bpf_obj_free_task_work(htab->map.record,
228+
htab_elem_value(elem, htab->map.key_size));
226229
}
227230

228-
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
231+
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
229232
{
230233
u32 num_entries = htab->map.max_entries;
231234
int i;
@@ -1495,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab)
14951498
}
14961499
}
14971500

1498-
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
1501+
static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
14991502
{
15001503
int i;
15011504

@@ -1514,16 +1517,16 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
15141517
rcu_read_unlock();
15151518
}
15161519

1517-
static void htab_map_free_timers_and_wq(struct bpf_map *map)
1520+
static void htab_map_free_internal_structs(struct bpf_map *map)
15181521
{
15191522
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15201523

15211524
/* We only free timer and workqueue on uref dropping to zero */
1522-
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
1525+
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
15231526
if (!htab_is_prealloc(htab))
1524-
htab_free_malloced_timers_and_wq(htab);
1527+
htab_free_malloced_internal_structs(htab);
15251528
else
1526-
htab_free_prealloced_timers_and_wq(htab);
1529+
htab_free_prealloced_internal_structs(htab);
15271530
}
15281531
}
15291532

@@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = {
22552258
.map_alloc = htab_map_alloc,
22562259
.map_free = htab_map_free,
22572260
.map_get_next_key = htab_map_get_next_key,
2258-
.map_release_uref = htab_map_free_timers_and_wq,
2261+
.map_release_uref = htab_map_free_internal_structs,
22592262
.map_lookup_elem = htab_map_lookup_elem,
22602263
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22612264
.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
22762279
.map_alloc = htab_map_alloc,
22772280
.map_free = htab_map_free,
22782281
.map_get_next_key = htab_map_get_next_key,
2279-
.map_release_uref = htab_map_free_timers_and_wq,
2282+
.map_release_uref = htab_map_free_internal_structs,
22802283
.map_lookup_elem = htab_lru_map_lookup_elem,
22812284
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
22822285
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,

kernel/bpf/helpers.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3738,8 +3738,48 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
37383738
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
37393739
}
37403740

3741+
typedef void (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);
3742+
3743+
/**
3744+
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
3745+
* @task: Task struct for which callback should be scheduled
3746+
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
3747+
* @map__map: bpf_map that embeds struct bpf_task_work in the values
3748+
* @callback: pointer to BPF subprogram to call
3749+
* @aux__prog: user should pass NULL
3750+
*
3751+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3752+
*/
3753+
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
3754+
struct bpf_map *map__map,
3755+
bpf_task_work_callback_t callback, void *aux__prog)
3756+
{
3757+
return 0;
3758+
}
3759+
3760+
/**
3761+
* bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
3762+
* @task: Task struct for which callback should be scheduled
3763+
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
3764+
* @map__map: bpf_map that embeds struct bpf_task_work in the values
3765+
* @callback: pointer to BPF subprogram to call
3766+
* @aux__prog: user should pass NULL
3767+
*
3768+
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
3769+
*/
3770+
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
3771+
struct bpf_map *map__map,
3772+
bpf_task_work_callback_t callback, void *aux__prog)
3773+
{
3774+
return 0;
3775+
}
3776+
37413777
__bpf_kfunc_end_defs();
37423778

3779+
void bpf_task_work_cancel_and_free(void *val)
3780+
{
3781+
}
3782+
37433783
BTF_KFUNCS_START(generic_btf_ids)
37443784
#ifdef CONFIG_CRASH_DUMP
37453785
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)

kernel/bpf/syscall.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ void btf_record_free(struct btf_record *rec)
672672
case BPF_TIMER:
673673
case BPF_REFCOUNT:
674674
case BPF_WORKQUEUE:
675+
case BPF_TASK_WORK:
675676
/* Nothing to release */
676677
break;
677678
default:
@@ -725,6 +726,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
725726
case BPF_TIMER:
726727
case BPF_REFCOUNT:
727728
case BPF_WORKQUEUE:
729+
case BPF_TASK_WORK:
728730
/* Nothing to acquire */
729731
break;
730732
default:
@@ -783,6 +785,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
783785
bpf_wq_cancel_and_free(obj + rec->wq_off);
784786
}
785787

788+
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
789+
{
790+
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
791+
return;
792+
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
793+
}
794+
786795
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
787796
{
788797
const struct btf_field *fields;
@@ -840,6 +849,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
840849
continue;
841850
bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
842851
break;
852+
case BPF_TASK_WORK:
853+
bpf_task_work_cancel_and_free(field_ptr);
854+
break;
843855
case BPF_LIST_NODE:
844856
case BPF_RB_NODE:
845857
case BPF_REFCOUNT:
@@ -1237,7 +1249,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12371249

12381250
map->record = btf_parse_fields(btf, value_type,
12391251
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1240-
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
1252+
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
1253+
BPF_TASK_WORK,
12411254
map->value_size);
12421255
if (!IS_ERR_OR_NULL(map->record)) {
12431256
int i;
@@ -1269,6 +1282,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12691282
break;
12701283
case BPF_TIMER:
12711284
case BPF_WORKQUEUE:
1285+
case BPF_TASK_WORK:
12721286
if (map->map_type != BPF_MAP_TYPE_HASH &&
12731287
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
12741288
map->map_type != BPF_MAP_TYPE_ARRAY) {

0 commit comments

Comments
 (0)