Skip to content

Commit b13849e

Browse files
AsphalttKernel Patches Daemon
authored andcommitted
bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_hash and lru_percpu_hash maps
Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value for both update_elem and update_batch APIs. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow: * update value for specified CPU for both update_elem and update_batch APIs. * lookup value for specified CPU for both lookup_elem and lookup_batch APIs. The BPF_F_CPU flag is passed via: * map_flags along with embedded cpu info. * elem_flags along with embedded cpu info. Signed-off-by: Leon Hwang <[email protected]>
1 parent 5a49c5f commit b13849e

File tree

3 files changed

+59
-26
lines changed

3 files changed

+59
-26
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2717,7 +2717,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
27172717
struct bpf_func_state *caller,
27182718
struct bpf_func_state *callee);
27192719

2720-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
2720+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27212721
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27222722
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
27232723
u64 flags);
@@ -3772,6 +3772,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
37723772
{
37733773
switch (map_type) {
37743774
case BPF_MAP_TYPE_PERCPU_ARRAY:
3775+
case BPF_MAP_TYPE_PERCPU_HASH:
3776+
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
37753777
return true;
37763778
default:
37773779
return false;

kernel/bpf/hashtab.c

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
945945
}
946946

947947
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
948-
void *value, bool onallcpus)
948+
void *value, bool onallcpus, u64 map_flags)
949949
{
950950
if (!onallcpus) {
951951
/* copy true value_size bytes */
@@ -954,15 +954,26 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
954954
u32 size = round_up(htab->map.value_size, 8);
955955
int off = 0, cpu;
956956

957+
if (map_flags & BPF_F_CPU) {
958+
cpu = map_flags >> 32;
959+
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value);
960+
return;
961+
}
962+
957963
for_each_possible_cpu(cpu) {
958964
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
959-
off += size;
965+
/* same user-provided value is used if BPF_F_ALL_CPUS
966+
* is specified, otherwise value is an array of per-CPU
967+
* values.
968+
*/
969+
if (!(map_flags & BPF_F_ALL_CPUS))
970+
off += size;
960971
}
961972
}
962973
}
963974

964975
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
965-
void *value, bool onallcpus)
976+
void *value, bool onallcpus, u64 map_flags)
966977
{
967978
/* When not setting the initial value on all cpus, zero-fill element
968979
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -980,7 +991,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
980991
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
981992
}
982993
} else {
983-
pcpu_copy_value(htab, pptr, value, onallcpus);
994+
pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
984995
}
985996
}
986997

@@ -992,7 +1003,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
9921003
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
9931004
void *value, u32 key_size, u32 hash,
9941005
bool percpu, bool onallcpus,
995-
struct htab_elem *old_elem)
1006+
struct htab_elem *old_elem, u64 map_flags)
9961007
{
9971008
u32 size = htab->map.value_size;
9981009
bool prealloc = htab_is_prealloc(htab);
@@ -1050,7 +1061,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
10501061
pptr = *(void __percpu **)ptr;
10511062
}
10521063

1053-
pcpu_init_value(htab, pptr, value, onallcpus);
1064+
pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
10541065

10551066
if (!prealloc)
10561067
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1155,7 +1166,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
11551166
}
11561167

11571168
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1158-
l_old);
1169+
l_old, map_flags);
11591170
if (IS_ERR(l_new)) {
11601171
/* all pre-allocated elements are in use or memory exhausted */
11611172
ret = PTR_ERR(l_new);
@@ -1271,9 +1282,11 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12711282
u32 key_size, hash;
12721283
int ret;
12731284

1274-
if (unlikely(map_flags > BPF_EXIST))
1285+
if (unlikely(!onallcpus && map_flags > BPF_EXIST))
12751286
/* unknown flags */
12761287
return -EINVAL;
1288+
if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
1289+
return -EINVAL;
12771290

12781291
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
12791292
!rcu_read_lock_bh_held());
@@ -1299,7 +1312,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12991312
/* Update value in-place */
13001313
if (percpu) {
13011314
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1302-
value, onallcpus);
1315+
value, onallcpus, map_flags);
13031316
} else {
13041317
void **inner_map_pptr = htab_elem_value(l_old, key_size);
13051318

@@ -1308,7 +1321,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
13081321
}
13091322
} else {
13101323
l_new = alloc_htab_elem(htab, key, value, key_size,
1311-
hash, percpu, onallcpus, NULL);
1324+
hash, percpu, onallcpus, NULL, map_flags);
13121325
if (IS_ERR(l_new)) {
13131326
ret = PTR_ERR(l_new);
13141327
goto err;
@@ -1334,9 +1347,11 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13341347
u32 key_size, hash;
13351348
int ret;
13361349

1337-
if (unlikely(map_flags > BPF_EXIST))
1350+
if (unlikely(!onallcpus && map_flags > BPF_EXIST))
13381351
/* unknown flags */
13391352
return -EINVAL;
1353+
if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
1354+
return -EINVAL;
13401355

13411356
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
13421357
!rcu_read_lock_bh_held());
@@ -1374,10 +1389,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13741389

13751390
/* per-cpu hash map can update value in-place */
13761391
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1377-
value, onallcpus);
1392+
value, onallcpus, map_flags);
13781393
} else {
13791394
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1380-
value, onallcpus);
1395+
value, onallcpus, map_flags);
13811396
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
13821397
l_new = NULL;
13831398
}
@@ -1689,9 +1704,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
16891704
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
16901705
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
16911706
u32 batch, max_count, size, bucket_size, map_id;
1707+
u64 elem_map_flags, map_flags, allowed_flags;
16921708
u32 bucket_cnt, total, key_size, value_size;
16931709
struct htab_elem *node_to_free = NULL;
1694-
u64 elem_map_flags, map_flags;
16951710
struct hlist_nulls_head *head;
16961711
struct hlist_nulls_node *n;
16971712
unsigned long flags = 0;
@@ -1701,9 +1716,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
17011716
int ret = 0;
17021717

17031718
elem_map_flags = attr->batch.elem_flags;
1704-
if ((elem_map_flags & ~BPF_F_LOCK) ||
1705-
((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1706-
return -EINVAL;
1719+
allowed_flags = BPF_F_LOCK;
1720+
if (!do_delete && is_percpu)
1721+
allowed_flags |= BPF_F_CPU;
1722+
ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags);
1723+
if (ret)
1724+
return ret;
17071725

17081726
map_flags = attr->batch.flags;
17091727
if (map_flags)
@@ -1726,7 +1744,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
17261744
key_size = htab->map.key_size;
17271745
value_size = htab->map.value_size;
17281746
size = round_up(value_size, 8);
1729-
if (is_percpu)
1747+
if (is_percpu && !(elem_map_flags & BPF_F_CPU))
17301748
value_size = size * num_possible_cpus();
17311749
total = 0;
17321750
/* while experimenting with hash tables with sizes ranging from 10 to
@@ -1809,10 +1827,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
18091827
void __percpu *pptr;
18101828

18111829
pptr = htab_elem_get_ptr(l, map->key_size);
1812-
for_each_possible_cpu(cpu) {
1813-
copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
1814-
check_and_init_map_value(&htab->map, dst_val + off);
1815-
off += size;
1830+
if (elem_map_flags & BPF_F_CPU) {
1831+
cpu = elem_map_flags >> 32;
1832+
copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu));
1833+
check_and_init_map_value(&htab->map, dst_val);
1834+
} else {
1835+
for_each_possible_cpu(cpu) {
1836+
copy_map_value_long(&htab->map, dst_val + off,
1837+
per_cpu_ptr(pptr, cpu));
1838+
check_and_init_map_value(&htab->map, dst_val + off);
1839+
off += size;
1840+
}
18161841
}
18171842
} else {
18181843
value = htab_elem_value(l, key_size);
@@ -2368,7 +2393,7 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
23682393
return NULL;
23692394
}
23702395

2371-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2396+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
23722397
{
23732398
struct htab_elem *l;
23742399
void __percpu *pptr;
@@ -2385,16 +2410,22 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
23852410
l = __htab_map_lookup_elem(map, key);
23862411
if (!l)
23872412
goto out;
2413+
ret = 0;
23882414
/* We do not mark LRU map element here in order to not mess up
23892415
* eviction heuristics when user space does a map walk.
23902416
*/
23912417
pptr = htab_elem_get_ptr(l, map->key_size);
2418+
if (map_flags & BPF_F_CPU) {
2419+
cpu = map_flags >> 32;
2420+
copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
2421+
check_and_init_map_value(map, value);
2422+
goto out;
2423+
}
23922424
for_each_possible_cpu(cpu) {
23932425
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
23942426
check_and_init_map_value(map, value + off);
23952427
off += size;
23962428
}
2397-
ret = 0;
23982429
out:
23992430
rcu_read_unlock();
24002431
return ret;

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
316316
bpf_disable_instrumentation();
317317
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
318318
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
319-
err = bpf_percpu_hash_copy(map, key, value);
319+
err = bpf_percpu_hash_copy(map, key, value, flags);
320320
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
321321
err = bpf_percpu_array_copy(map, key, value, flags);
322322
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {

0 commit comments

Comments
 (0)