Skip to content

Commit 03c22ad

Browse files
committed
bpf: Add batch and iter ops for resizable hashtab
Signed-off-by: Mykyta Yatsenko <[email protected]>
1 parent c912740 commit 03c22ad

File tree

1 file changed

+228
-16
lines changed

1 file changed

+228
-16
lines changed

kernel/bpf/hashtab.c

Lines changed: 228 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2982,66 +2982,278 @@ static long bpf_for_each_rhash_elem(struct bpf_map *map,
29822982

29832983
static u64 rhtab_map_mem_usage(const struct bpf_map *map)
29842984
{
2985-
return 1;
2985+
struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
2986+
u64 num_entries;
2987+
u64 usage = sizeof(struct bpf_rhtab);
2988+
2989+
num_entries = atomic_read(&rhtab->ht.nelems);
2990+
usage += rhtab->elem_size * num_entries;
2991+
2992+
/* Add rhashtable internal overhead estimate */
2993+
usage += rhtab->ht.tbl ? sizeof(struct bucket_table) : 0;
2994+
2995+
return usage;
2996+
}
2997+
2998+
static int __rhtab_map_lookup_and_delete_batch(struct bpf_map *map,
2999+
const union bpf_attr *attr,
3000+
union bpf_attr __user *uattr,
3001+
bool do_delete)
3002+
{
3003+
struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
3004+
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
3005+
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
3006+
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
3007+
u32 batch, max_count, total, key_size, value_size;
3008+
void *keys = NULL, *values = NULL, *dst_key, *dst_val;
3009+
struct rhtab_elem **elems_to_delete = NULL;
3010+
struct rhashtable_iter iter;
3011+
struct rhtab_elem *l;
3012+
u64 elem_map_flags, map_flags;
3013+
u32 buf_size;
3014+
int ret = 0;
3015+
3016+
elem_map_flags = attr->batch.elem_flags;
3017+
if ((elem_map_flags & ~BPF_F_LOCK) ||
3018+
((elem_map_flags & BPF_F_LOCK) &&
3019+
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
3020+
return -EINVAL;
3021+
3022+
map_flags = attr->batch.flags;
3023+
if (map_flags)
3024+
return -EINVAL;
3025+
3026+
max_count = attr->batch.count;
3027+
if (!max_count)
3028+
return 0;
3029+
3030+
if (put_user(0, &uattr->batch.count))
3031+
return -EFAULT;
3032+
3033+
batch = 0;
3034+
if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
3035+
return -EFAULT;
3036+
3037+
key_size = map->key_size;
3038+
value_size = map->value_size;
3039+
3040+
/* Allocate buffers for keys and values */
3041+
buf_size = max_count;
3042+
keys = kvmalloc_array(key_size, buf_size, GFP_USER | __GFP_NOWARN);
3043+
values = kvmalloc_array(value_size, buf_size, GFP_USER | __GFP_NOWARN);
3044+
if (!keys || !values) {
3045+
ret = -ENOMEM;
3046+
goto out;
3047+
}
3048+
3049+
if (do_delete) {
3050+
elems_to_delete = kvmalloc_array(sizeof(*elems_to_delete),
3051+
buf_size,
3052+
GFP_USER | __GFP_NOWARN);
3053+
if (!elems_to_delete) {
3054+
ret = -ENOMEM;
3055+
goto out;
3056+
}
3057+
}
3058+
3059+
dst_key = keys;
3060+
dst_val = values;
3061+
total = 0;
3062+
3063+
rcu_read_lock();
3064+
rhashtable_walk_enter(&rhtab->ht, &iter);
3065+
rhashtable_walk_start(&iter);
3066+
3067+
/* Skip elements up to batch position */
3068+
while (batch > 0 && (l = rhashtable_walk_next(&iter))) {
3069+
if (IS_ERR(l)) {
3070+
if (PTR_ERR(l) == -EAGAIN)
3071+
continue;
3072+
break;
3073+
}
3074+
batch--;
3075+
}
3076+
3077+
/* Collect elements */
3078+
while (total < max_count && (l = rhashtable_walk_next(&iter))) {
3079+
if (IS_ERR(l)) {
3080+
if (PTR_ERR(l) == -EAGAIN)
3081+
continue;
3082+
break;
3083+
}
3084+
3085+
memcpy(dst_key, l->data, key_size);
3086+
memcpy(dst_val, rhtab_elem_value(l), value_size);
3087+
check_and_init_map_value(map, dst_val);
3088+
3089+
if (do_delete)
3090+
elems_to_delete[total] = l;
3091+
3092+
dst_key += key_size;
3093+
dst_val += value_size;
3094+
total++;
3095+
}
3096+
3097+
/* Delete collected elements while still in RCU critical section */
3098+
if (do_delete && total > 0) {
3099+
u32 i;
3100+
3101+
for (i = 0; i < total; i++) {
3102+
l = elems_to_delete[i];
3103+
rhashtable_remove_fast(&rhtab->ht, &l->node,
3104+
rhtab->params);
3105+
bpf_mem_cache_free_rcu(&rhtab->ma, l);
3106+
}
3107+
}
3108+
3109+
rhashtable_walk_stop(&iter);
3110+
rhashtable_walk_exit(&iter);
3111+
rcu_read_unlock();
3112+
3113+
if (total == 0) {
3114+
ret = -ENOENT;
3115+
goto out;
3116+
}
3117+
3118+
/* Copy results to userspace */
3119+
if (copy_to_user(ukeys, keys, total * key_size) ||
3120+
copy_to_user(uvalues, values, total * value_size)) {
3121+
ret = -EFAULT;
3122+
goto out;
3123+
}
3124+
3125+
if (put_user(total, &uattr->batch.count))
3126+
ret = -EFAULT;
3127+
3128+
/* Update batch cursor for next iteration */
3129+
if (ubatch) {
3130+
u32 next_batch;
3131+
3132+
if (copy_from_user(&next_batch, ubatch, sizeof(next_batch))) {
3133+
ret = -EFAULT;
3134+
goto out;
3135+
}
3136+
next_batch += total;
3137+
if (copy_to_user(ubatch, &next_batch, sizeof(next_batch)))
3138+
ret = -EFAULT;
3139+
}
3140+
3141+
out:
3142+
kvfree(keys);
3143+
kvfree(values);
3144+
kvfree(elems_to_delete);
3145+
return ret;
29863146
}
29873147

29883148
static int rhtab_map_lookup_batch(struct bpf_map *map,
29893149
const union bpf_attr *attr,
29903150
union bpf_attr __user *uattr)
29913151
{
2992-
return 0;
3152+
return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, false);
29933153
}
29943154

29953155
static int rhtab_map_lookup_and_delete_batch(struct bpf_map *map,
29963156
const union bpf_attr *attr,
29973157
union bpf_attr __user *uattr)
29983158
{
2999-
return 0;
3159+
return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, true);
30003160
}
30013161

30023162
struct bpf_iter_seq_rhash_map_info {
30033163
struct bpf_map *map;
30043164
struct bpf_rhtab *rhtab;
3005-
void *percpu_value_buf; // non-zero means percpu hash
3006-
u32 bucket_id;
3007-
u32 skip_elems;
3165+
struct rhashtable_iter iter;
3166+
bool iter_active;
30083167
};
30093168

3010-
static struct htab_elem *
3011-
bpf_rhash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
3012-
struct htab_elem *prev_elem)
3169+
static void *bpf_rhash_map_seq_start(struct seq_file *seq, loff_t *pos)
30133170
{
3014-
return NULL;
3171+
struct bpf_iter_seq_rhash_map_info *info = seq->private;
3172+
struct rhtab_elem *elem;
3173+
3174+
rhashtable_walk_enter(&info->rhtab->ht, &info->iter);
3175+
rhashtable_walk_start(&info->iter);
3176+
info->iter_active = true;
3177+
3178+
elem = rhtab_iter_next(&info->iter);
3179+
if (!elem)
3180+
return NULL;
3181+
3182+
if (*pos == 0)
3183+
++*pos;
3184+
return elem;
30153185
}
30163186

3017-
static void *bpf_rhash_map_seq_start(struct seq_file *seq, loff_t *pos)
3187+
static void *bpf_rhash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
30183188
{
3019-
return NULL;
3189+
struct bpf_iter_seq_rhash_map_info *info = seq->private;
3190+
3191+
++*pos;
3192+
3193+
return rhtab_iter_next(&info->iter);
30203194
}
30213195

3022-
static void *bpf_rhash_map_seq_next(struct seq_file *seq, void *v,
3023-
loff_t *pos) /* */
3196+
static int __bpf_rhash_map_seq_show(struct seq_file *seq,
3197+
struct rhtab_elem *elem)
30243198
{
3025-
return NULL;
3199+
struct bpf_iter_seq_rhash_map_info *info = seq->private;
3200+
struct bpf_iter__bpf_map_elem ctx = {};
3201+
struct bpf_iter_meta meta;
3202+
struct bpf_prog *prog;
3203+
int ret = 0;
3204+
3205+
meta.seq = seq;
3206+
prog = bpf_iter_get_info(&meta, elem == NULL);
3207+
if (prog) {
3208+
ctx.meta = &meta;
3209+
ctx.map = info->map;
3210+
if (elem) {
3211+
ctx.key = elem->data;
3212+
ctx.value = rhtab_elem_value(elem);
3213+
}
3214+
ret = bpf_iter_run_prog(prog, &ctx);
3215+
}
3216+
3217+
return ret;
30263218
}
30273219

30283220
static int bpf_rhash_map_seq_show(struct seq_file *seq, void *v)
30293221
{
3030-
return 0;
3222+
return __bpf_rhash_map_seq_show(seq, v);
30313223
}
30323224

30333225
static void bpf_rhash_map_seq_stop(struct seq_file *seq, void *v)
30343226
{
3227+
struct bpf_iter_seq_rhash_map_info *info = seq->private;
3228+
3229+
if (!v)
3230+
(void)__bpf_rhash_map_seq_show(seq, NULL);
3231+
3232+
if (info->iter_active) {
3233+
rhashtable_walk_stop(&info->iter);
3234+
rhashtable_walk_exit(&info->iter);
3235+
info->iter_active = false;
3236+
}
30353237
}
30363238

30373239
static int bpf_iter_init_rhash_map(void *priv_data,
30383240
struct bpf_iter_aux_info *aux)
30393241
{
3242+
struct bpf_iter_seq_rhash_map_info *info = priv_data;
3243+
struct bpf_map *map = aux->map;
3244+
3245+
bpf_map_inc_with_uref(map);
3246+
info->map = map;
3247+
info->rhtab = container_of(map, struct bpf_rhtab, map);
3248+
info->iter_active = false;
30403249
return 0;
30413250
}
30423251

30433252
static void bpf_iter_fini_rhash_map(void *priv_data)
30443253
{
3254+
struct bpf_iter_seq_rhash_map_info *info = priv_data;
3255+
3256+
bpf_map_put_with_uref(info->map);
30453257
}
30463258

30473259
static const struct seq_operations bpf_rhash_map_seq_ops = {

0 commit comments

Comments
 (0)