Skip to content

Commit 5cd0aea

Browse files
author
Alexei Starovoitov
committed
Merge branch 'support-bpf_kptr_xchg-into-local-kptr'
Amery Hung says: ==================== Support bpf_kptr_xchg into local kptr This revision adds substaintial changes to patch 2 to support structures with kptr as the only special btf type. The test is split into local_kptr_stash and task_kfunc_success to remove dependencies on bpf_testmod that would break veristat results. This series allows stashing kptr into local kptr. Currently, kptrs are only allowed to be stashed into map value with bpf_kptr_xchg(). A motivating use case of this series is to enable adding referenced kptr to bpf_rbtree or bpf_list by using allocated object as graph node and the storage of referenced kptr. For example, a bpf qdisc [0] enqueuing a referenced kptr to a struct sk_buff* to a bpf_list serving as a fifo: struct skb_node { struct sk_buff __kptr *skb; struct bpf_list_node node; }; private(A) struct bpf_spin_lock fifo_lock; private(A) struct bpf_list_head fifo __contains(skb_node, node); /* In Qdisc_ops.enqueue */ struct skb_node *skbn; skbn = bpf_obj_new(typeof(*skbn)); if (!skbn) goto drop; /* skb is a referenced kptr to struct sk_buff acquired earilier * but not shown in this code snippet. */ skb = bpf_kptr_xchg(&skbn->skb, skb); if (skb) /* should not happen; do something below releasing skb to * satisfy the verifier */ ... bpf_spin_lock(&fifo_lock); bpf_list_push_back(&fifo, &skbn->node); bpf_spin_unlock(&fifo_lock); The implementation first searches for BPF_KPTR when generating program BTF. Then, we teach the verifier that the detination argument of bpf_kptr_xchg() can be local kptr, and use the btf_record in program BTF to check against the source argument. This series is mostly developed by Dave, who kindly helped and sent me the patchset. The selftests in bpf qdisc (WIP) relies on this series to work. [0] https://lore.kernel.org/netdev/[email protected]/ --- v3 -> v4 - Allow struct in prog btf w/ kptr as the only special field type - Split tests of stashing referenced kptr and local kptr - v3: https://lore.kernel.org/bpf/[email protected]/ v2 -> v3 - Fix prog btf memory leak - Test stashing kptr in prog btf - Test unstashing kptrs after stashing into local kptrs - v2: https://lore.kernel.org/bpf/[email protected]/ v1 -> v2 - Fix the document for bpf_kptr_xchg() - Add a comment explaining changes in the verifier - v1: https://lore.kernel.org/bpf/[email protected]/ ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents f727b13 + 91c9684 commit 5cd0aea

File tree

8 files changed

+151
-48
lines changed

8 files changed

+151
-48
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ enum bpf_arg_type {
744744
ARG_PTR_TO_STACK, /* pointer to stack */
745745
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
746746
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
747-
ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
747+
ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */
748748
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
749749
__BPF_ARG_TYPE_MAX,
750750

include/uapi/linux/bpf.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5519,11 +5519,12 @@ union bpf_attr {
55195519
* **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
55205520
* invalid arguments are passed.
55215521
*
5522-
* void *bpf_kptr_xchg(void *map_value, void *ptr)
5522+
* void *bpf_kptr_xchg(void *dst, void *ptr)
55235523
* Description
5524-
* Exchange kptr at pointer *map_value* with *ptr*, and return the
5525-
* old value. *ptr* can be NULL, otherwise it must be a referenced
5526-
* pointer which will be released when this helper is called.
5524+
* Exchange kptr at pointer *dst* with *ptr*, and return the old value.
5525+
* *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
5526+
* it must be a referenced pointer which will be released when this helper
5527+
* is called.
55275528
* Return
55285529
* The old value of kptr (which can be NULL). The returned pointer
55295530
* if not NULL, is a reference which must be released using its

kernel/bpf/btf.c

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3754,6 +3754,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
37543754
return -EINVAL;
37553755
}
37563756

3757+
/* Callers have to ensure the life cycle of btf if it is program BTF */
37573758
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
37583759
struct btf_field_info *info)
37593760
{
@@ -3782,7 +3783,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
37823783
field->kptr.dtor = NULL;
37833784
id = info->kptr.type_id;
37843785
kptr_btf = (struct btf *)btf;
3785-
btf_get(kptr_btf);
37863786
goto found_dtor;
37873787
}
37883788
if (id < 0)
@@ -5512,36 +5512,70 @@ static const char *alloc_obj_fields[] = {
55125512
static struct btf_struct_metas *
55135513
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
55145514
{
5515-
union {
5516-
struct btf_id_set set;
5517-
struct {
5518-
u32 _cnt;
5519-
u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
5520-
} _arr;
5521-
} aof;
55225515
struct btf_struct_metas *tab = NULL;
5516+
struct btf_id_set *aof;
55235517
int i, n, id, ret;
55245518

55255519
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
55265520
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
55275521

5528-
memset(&aof, 0, sizeof(aof));
5522+
aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
5523+
if (!aof)
5524+
return ERR_PTR(-ENOMEM);
5525+
aof->cnt = 0;
5526+
55295527
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
55305528
/* Try to find whether this special type exists in user BTF, and
55315529
* if so remember its ID so we can easily find it among members
55325530
* of structs that we iterate in the next loop.
55335531
*/
5532+
struct btf_id_set *new_aof;
5533+
55345534
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
55355535
if (id < 0)
55365536
continue;
5537-
aof.set.ids[aof.set.cnt++] = id;
5537+
5538+
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
5539+
GFP_KERNEL | __GFP_NOWARN);
5540+
if (!new_aof) {
5541+
ret = -ENOMEM;
5542+
goto free_aof;
5543+
}
5544+
aof = new_aof;
5545+
aof->ids[aof->cnt++] = id;
5546+
}
5547+
5548+
n = btf_nr_types(btf);
5549+
for (i = 1; i < n; i++) {
5550+
/* Try to find if there are kptrs in user BTF and remember their ID */
5551+
struct btf_id_set *new_aof;
5552+
struct btf_field_info tmp;
5553+
const struct btf_type *t;
5554+
5555+
t = btf_type_by_id(btf, i);
5556+
if (!t) {
5557+
ret = -EINVAL;
5558+
goto free_aof;
5559+
}
5560+
5561+
ret = btf_find_kptr(btf, t, 0, 0, &tmp);
5562+
if (ret != BTF_FIELD_FOUND)
5563+
continue;
5564+
5565+
new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
5566+
GFP_KERNEL | __GFP_NOWARN);
5567+
if (!new_aof) {
5568+
ret = -ENOMEM;
5569+
goto free_aof;
5570+
}
5571+
aof = new_aof;
5572+
aof->ids[aof->cnt++] = i;
55385573
}
55395574

5540-
if (!aof.set.cnt)
5575+
if (!aof->cnt)
55415576
return NULL;
5542-
sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
5577+
sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
55435578

5544-
n = btf_nr_types(btf);
55455579
for (i = 1; i < n; i++) {
55465580
struct btf_struct_metas *new_tab;
55475581
const struct btf_member *member;
@@ -5551,17 +5585,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
55515585
int j, tab_cnt;
55525586

55535587
t = btf_type_by_id(btf, i);
5554-
if (!t) {
5555-
ret = -EINVAL;
5556-
goto free;
5557-
}
55585588
if (!__btf_type_is_struct(t))
55595589
continue;
55605590

55615591
cond_resched();
55625592

55635593
for_each_member(j, t, member) {
5564-
if (btf_id_set_contains(&aof.set, member->type))
5594+
if (btf_id_set_contains(aof, member->type))
55655595
goto parse;
55665596
}
55675597
continue;
@@ -5580,7 +5610,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
55805610
type = &tab->types[tab->cnt];
55815611
type->btf_id = i;
55825612
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
5583-
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
5613+
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
5614+
BPF_KPTR, t->size);
55845615
/* The record cannot be unset, treat it as an error if so */
55855616
if (IS_ERR_OR_NULL(record)) {
55865617
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5589,9 +5620,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
55895620
type->record = record;
55905621
tab->cnt++;
55915622
}
5623+
kfree(aof);
55925624
return tab;
55935625
free:
55945626
btf_struct_metas_free(tab);
5627+
free_aof:
5628+
kfree(aof);
55955629
return ERR_PTR(ret);
55965630
}
55975631

kernel/bpf/helpers.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,9 +1619,9 @@ void bpf_wq_cancel_and_free(void *val)
16191619
schedule_work(&work->delete_work);
16201620
}
16211621

1622-
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
1622+
BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
16231623
{
1624-
unsigned long *kptr = map_value;
1624+
unsigned long *kptr = dst;
16251625

16261626
/* This helper may be inlined by verifier. */
16271627
return xchg(kptr, (unsigned long)ptr);
@@ -1636,7 +1636,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
16361636
.gpl_only = false,
16371637
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
16381638
.ret_btf_id = BPF_PTR_POISON,
1639-
.arg1_type = ARG_PTR_TO_KPTR,
1639+
.arg1_type = ARG_KPTR_XCHG_DEST,
16401640
.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
16411641
.arg2_btf_id = BPF_PTR_POISON,
16421642
};

kernel/bpf/syscall.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,8 @@ void btf_record_free(struct btf_record *rec)
550550
case BPF_KPTR_PERCPU:
551551
if (rec->fields[i].kptr.module)
552552
module_put(rec->fields[i].kptr.module);
553-
btf_put(rec->fields[i].kptr.btf);
553+
if (btf_is_kernel(rec->fields[i].kptr.btf))
554+
btf_put(rec->fields[i].kptr.btf);
554555
break;
555556
case BPF_LIST_HEAD:
556557
case BPF_LIST_NODE:
@@ -596,7 +597,8 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
596597
case BPF_KPTR_UNREF:
597598
case BPF_KPTR_REF:
598599
case BPF_KPTR_PERCPU:
599-
btf_get(fields[i].kptr.btf);
600+
if (btf_is_kernel(fields[i].kptr.btf))
601+
btf_get(fields[i].kptr.btf);
600602
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
601603
ret = -ENXIO;
602604
goto free;

kernel/bpf/verifier.c

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7803,29 +7803,38 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
78037803
struct bpf_call_arg_meta *meta)
78047804
{
78057805
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7806-
struct bpf_map *map_ptr = reg->map_ptr;
78077806
struct btf_field *kptr_field;
7807+
struct bpf_map *map_ptr;
7808+
struct btf_record *rec;
78087809
u32 kptr_off;
78097810

7811+
if (type_is_ptr_alloc_obj(reg->type)) {
7812+
rec = reg_btf_record(reg);
7813+
} else { /* PTR_TO_MAP_VALUE */
7814+
map_ptr = reg->map_ptr;
7815+
if (!map_ptr->btf) {
7816+
verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7817+
map_ptr->name);
7818+
return -EINVAL;
7819+
}
7820+
rec = map_ptr->record;
7821+
meta->map_ptr = map_ptr;
7822+
}
7823+
78107824
if (!tnum_is_const(reg->var_off)) {
78117825
verbose(env,
78127826
"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
78137827
regno);
78147828
return -EINVAL;
78157829
}
7816-
if (!map_ptr->btf) {
7817-
verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7818-
map_ptr->name);
7819-
return -EINVAL;
7820-
}
7821-
if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
7822-
verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
7830+
7831+
if (!btf_record_has_field(rec, BPF_KPTR)) {
7832+
verbose(env, "R%d has no valid kptr\n", regno);
78237833
return -EINVAL;
78247834
}
78257835

7826-
meta->map_ptr = map_ptr;
78277836
kptr_off = reg->off + reg->var_off.value;
7828-
kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
7837+
kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
78297838
if (!kptr_field) {
78307839
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
78317840
return -EACCES;
@@ -8412,7 +8421,12 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
84128421
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
84138422
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
84148423
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8415-
static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
8424+
static const struct bpf_reg_types kptr_xchg_dest_types = {
8425+
.types = {
8426+
PTR_TO_MAP_VALUE,
8427+
PTR_TO_BTF_ID | MEM_ALLOC
8428+
}
8429+
};
84168430
static const struct bpf_reg_types dynptr_types = {
84178431
.types = {
84188432
PTR_TO_STACK,
@@ -8444,7 +8458,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
84448458
[ARG_PTR_TO_STACK] = &stack_ptr_types,
84458459
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
84468460
[ARG_PTR_TO_TIMER] = &timer_types,
8447-
[ARG_PTR_TO_KPTR] = &kptr_types,
8461+
[ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types,
84488462
[ARG_PTR_TO_DYNPTR] = &dynptr_types,
84498463
};
84508464

@@ -8483,7 +8497,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
84838497
if (base_type(arg_type) == ARG_PTR_TO_MEM)
84848498
type &= ~DYNPTR_TYPE_FLAG_MASK;
84858499

8486-
if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
8500+
/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
8501+
if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
84878502
type &= ~MEM_ALLOC;
84888503
type &= ~MEM_PERCPU;
84898504
}
@@ -8576,7 +8591,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
85768591
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
85778592
return -EFAULT;
85788593
}
8579-
if (meta->func_id == BPF_FUNC_kptr_xchg) {
8594+
/* Check if local kptr in src arg matches kptr in dst arg */
8595+
if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
85808596
if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
85818597
return -EACCES;
85828598
}
@@ -8887,7 +8903,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
88878903
meta->release_regno = regno;
88888904
}
88898905

8890-
if (reg->ref_obj_id) {
8906+
if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
88918907
if (meta->ref_obj_id) {
88928908
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
88938909
regno, reg->ref_obj_id,
@@ -9044,7 +9060,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
90449060
return err;
90459061
break;
90469062
}
9047-
case ARG_PTR_TO_KPTR:
9063+
case ARG_KPTR_XCHG_DEST:
90489064
err = process_kptr_func(env, regno, meta);
90499065
if (err)
90509066
return err;

tools/testing/selftests/bpf/progs/local_kptr_stash.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88
#include "../bpf_experimental.h"
99
#include "../bpf_testmod/bpf_testmod_kfunc.h"
1010

11+
struct plain_local;
12+
1113
struct node_data {
1214
long key;
1315
long data;
16+
struct plain_local __kptr * stashed_in_local_kptr;
1417
struct bpf_rb_node node;
1518
};
1619

@@ -85,18 +88,33 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
8588

8689
static int create_and_stash(int idx, int val)
8790
{
91+
struct plain_local *inner_local_kptr;
8892
struct map_value *mapval;
8993
struct node_data *res;
9094

9195
mapval = bpf_map_lookup_elem(&some_nodes, &idx);
9296
if (!mapval)
9397
return 1;
9498

99+
inner_local_kptr = bpf_obj_new(typeof(*inner_local_kptr));
100+
if (!inner_local_kptr)
101+
return 2;
102+
95103
res = bpf_obj_new(typeof(*res));
96-
if (!res)
97-
return 1;
104+
if (!res) {
105+
bpf_obj_drop(inner_local_kptr);
106+
return 3;
107+
}
98108
res->key = val;
99109

110+
inner_local_kptr = bpf_kptr_xchg(&res->stashed_in_local_kptr, inner_local_kptr);
111+
if (inner_local_kptr) {
112+
/* Should never happen, we just obj_new'd res */
113+
bpf_obj_drop(inner_local_kptr);
114+
bpf_obj_drop(res);
115+
return 4;
116+
}
117+
100118
res = bpf_kptr_xchg(&mapval->node, res);
101119
if (res)
102120
bpf_obj_drop(res);
@@ -169,6 +187,7 @@ long stash_local_with_root(void *ctx)
169187
SEC("tc")
170188
long unstash_rb_node(void *ctx)
171189
{
190+
struct plain_local *inner_local_kptr = NULL;
172191
struct map_value *mapval;
173192
struct node_data *res;
174193
long retval;
@@ -180,6 +199,13 @@ long unstash_rb_node(void *ctx)
180199

181200
res = bpf_kptr_xchg(&mapval->node, NULL);
182201
if (res) {
202+
inner_local_kptr = bpf_kptr_xchg(&res->stashed_in_local_kptr, inner_local_kptr);
203+
if (!inner_local_kptr) {
204+
bpf_obj_drop(res);
205+
return 1;
206+
}
207+
bpf_obj_drop(inner_local_kptr);
208+
183209
retval = res->key;
184210
bpf_obj_drop(res);
185211
return retval;

0 commit comments

Comments
 (0)