Skip to content

Commit 9befe75

Browse files
AsphalttKernel Patches Daemon
authored andcommitted
bpf: Introduce BPF_F_CPU flag for percpu_hash and lru_percpu_hash maps
Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow updating value for specified CPU. This enhancement enables: * Efficient update values across all CPUs with a single value when BPF_F_ALL_CPUS is set for update_elem and update_batch APIs. * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. The BPF_F_CPU flag is passed via: * map_flags of lookup_elem and update_elem APIs along with embedded cpu field. * elem_flags of lookup_batch and update_batch APIs along with embedded cpu field. Signed-off-by: Leon Hwang <[email protected]>
1 parent e9d68d9 commit 9befe75

File tree

4 files changed

+147
-77
lines changed

4 files changed

+147
-77
lines changed

include/linux/bpf.h

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2696,7 +2696,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
26962696
struct bpf_func_state *caller,
26972697
struct bpf_func_state *callee);
26982698

2699-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
2699+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27002700
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
27012701
u64 flags);
27022702
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -3710,4 +3710,56 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
37103710
const char **linep, int *nump);
37113711
struct bpf_prog *bpf_prog_find_from_stack(void);
37123712

3713+
static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus)
3714+
{
3715+
const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS;
3716+
u32 cpu;
3717+
3718+
if (check_all_cpus) {
3719+
if (unlikely((u32)flags > BPF_F_ALL_CPUS))
3720+
/* unknown flags */
3721+
return -EINVAL;
3722+
if (unlikely((flags & cpu_flags) == cpu_flags))
3723+
return -EINVAL;
3724+
} else {
3725+
if (unlikely((u32)flags & ~BPF_F_CPU))
3726+
return -EINVAL;
3727+
}
3728+
3729+
cpu = flags >> 32;
3730+
if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
3731+
return -ERANGE;
3732+
3733+
return 0;
3734+
}
3735+
3736+
static inline bool bpf_map_support_cpu_flags(enum bpf_map_type map_type)
3737+
{
3738+
switch (map_type) {
3739+
case BPF_MAP_TYPE_PERCPU_ARRAY:
3740+
case BPF_MAP_TYPE_PERCPU_HASH:
3741+
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
3742+
return true;
3743+
default:
3744+
return false;
3745+
}
3746+
}
3747+
3748+
static inline int bpf_map_check_flags(struct bpf_map *map, u64 flags, bool check_flag)
3749+
{
3750+
if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS)))
3751+
return -EINVAL;
3752+
3753+
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
3754+
return -EINVAL;
3755+
3756+
if (!(flags & BPF_F_CPU) && flags >> 32)
3757+
return -EINVAL;
3758+
3759+
if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_support_cpu_flags(map->map_type))
3760+
return -EINVAL;
3761+
3762+
return 0;
3763+
}
3764+
37133765
#endif /* _LINUX_BPF_H */

kernel/bpf/arraymap.c

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -300,18 +300,15 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags
300300
struct bpf_array *array = container_of(map, struct bpf_array, map);
301301
u32 index = *(u32 *)key;
302302
void __percpu *pptr;
303+
int off = 0, err;
303304
u32 size, cpu;
304-
int off = 0;
305305

306306
if (unlikely(index >= array->map.max_entries))
307307
return -ENOENT;
308308

309-
if (unlikely((u32)flags & ~BPF_F_CPU))
310-
return -EINVAL;
311-
312-
cpu = flags >> 32;
313-
if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
314-
return -ERANGE;
309+
err = bpf_map_check_cpu_flags(flags, false);
310+
if (unlikely(err))
311+
return err;
315312

316313
/* per_cpu areas are zero-filled and bpf programs can only
317314
* access 'value_size' of them, so copying rounded areas
@@ -321,6 +318,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags
321318
rcu_read_lock();
322319
pptr = array->pptrs[index & array->index_mask];
323320
if (flags & BPF_F_CPU) {
321+
cpu = flags >> 32;
324322
copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
325323
check_and_init_map_value(map, value);
326324
} else {
@@ -397,22 +395,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
397395
u64 map_flags)
398396
{
399397
struct bpf_array *array = container_of(map, struct bpf_array, map);
400-
const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS;
401398
u32 index = *(u32 *)key;
402399
void __percpu *pptr;
400+
int off = 0, err;
403401
u32 size, cpu;
404-
int off = 0;
405-
406-
if (unlikely((u32)map_flags > BPF_F_ALL_CPUS))
407-
/* unknown flags */
408-
return -EINVAL;
409-
if (unlikely((map_flags & cpu_flags) == cpu_flags))
410-
return -EINVAL;
411402

412-
cpu = map_flags >> 32;
413-
if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
414-
/* invalid cpu */
415-
return -ERANGE;
403+
err = bpf_map_check_cpu_flags(map_flags, true);
404+
if (unlikely(err))
405+
return err;
416406

417407
if (unlikely(index >= array->map.max_entries))
418408
/* all elements were pre-allocated, cannot insert a new one */
@@ -432,6 +422,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
432422
rcu_read_lock();
433423
pptr = array->pptrs[index & array->index_mask];
434424
if (map_flags & BPF_F_CPU) {
425+
cpu = map_flags >> 32;
435426
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value);
436427
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
437428
} else {

kernel/bpf/hashtab.c

Lines changed: 78 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -937,24 +937,39 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
937937
}
938938

939939
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
940-
void *value, bool onallcpus)
940+
void *value, bool onallcpus, u64 map_flags)
941941
{
942+
int cpu = map_flags & BPF_F_CPU ? map_flags >> 32 : 0;
943+
int current_cpu = raw_smp_processor_id();
944+
942945
if (!onallcpus) {
943946
/* copy true value_size bytes */
944-
copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
947+
copy_map_value(&htab->map, (map_flags & BPF_F_CPU) && cpu != current_cpu ?
948+
per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value);
945949
} else {
946950
u32 size = round_up(htab->map.value_size, 8);
947-
int off = 0, cpu;
951+
int off = 0;
952+
953+
if (map_flags & BPF_F_CPU) {
954+
copy_map_value_long(&htab->map, cpu != current_cpu ?
955+
per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value);
956+
return;
957+
}
948958

949959
for_each_possible_cpu(cpu) {
950960
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
951-
off += size;
961+
/* same user-provided value is used if
962+
* BPF_F_ALL_CPUS is specified, otherwise value is
963+
* an array of per-cpu values.
964+
*/
965+
if (!(map_flags & BPF_F_ALL_CPUS))
966+
off += size;
952967
}
953968
}
954969
}
955970

956971
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
957-
void *value, bool onallcpus)
972+
void *value, bool onallcpus, u64 map_flags)
958973
{
959974
/* When not setting the initial value on all cpus, zero-fill element
960975
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -972,7 +987,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
972987
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
973988
}
974989
} else {
975-
pcpu_copy_value(htab, pptr, value, onallcpus);
990+
pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
976991
}
977992
}
978993

@@ -984,7 +999,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
984999
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
9851000
void *value, u32 key_size, u32 hash,
9861001
bool percpu, bool onallcpus,
987-
struct htab_elem *old_elem)
1002+
struct htab_elem *old_elem, u64 map_flags)
9881003
{
9891004
u32 size = htab->map.value_size;
9901005
bool prealloc = htab_is_prealloc(htab);
@@ -1042,7 +1057,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
10421057
pptr = *(void __percpu **)ptr;
10431058
}
10441059

1045-
pcpu_init_value(htab, pptr, value, onallcpus);
1060+
pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
10461061

10471062
if (!prealloc)
10481063
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1147,7 +1162,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
11471162
}
11481163

11491164
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1150-
l_old);
1165+
l_old, map_flags);
11511166
if (IS_ERR(l_new)) {
11521167
/* all pre-allocated elements are in use or memory exhausted */
11531168
ret = PTR_ERR(l_new);
@@ -1263,9 +1278,15 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12631278
u32 key_size, hash;
12641279
int ret;
12651280

1266-
if (unlikely(map_flags > BPF_EXIST))
1267-
/* unknown flags */
1268-
return -EINVAL;
1281+
if (percpu) {
1282+
ret = bpf_map_check_cpu_flags(map_flags, true);
1283+
if (unlikely(ret))
1284+
return ret;
1285+
} else {
1286+
if (unlikely(map_flags > BPF_EXIST))
1287+
/* unknown flags */
1288+
return -EINVAL;
1289+
}
12691290

12701291
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
12711292
!rcu_read_lock_bh_held());
@@ -1291,7 +1312,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12911312
/* Update value in-place */
12921313
if (percpu) {
12931314
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1294-
value, onallcpus);
1315+
value, onallcpus, map_flags);
12951316
} else {
12961317
void **inner_map_pptr = htab_elem_value(l_old, key_size);
12971318

@@ -1300,7 +1321,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
13001321
}
13011322
} else {
13021323
l_new = alloc_htab_elem(htab, key, value, key_size,
1303-
hash, percpu, onallcpus, NULL);
1324+
hash, percpu, onallcpus, NULL, map_flags);
13041325
if (IS_ERR(l_new)) {
13051326
ret = PTR_ERR(l_new);
13061327
goto err;
@@ -1326,9 +1347,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13261347
u32 key_size, hash;
13271348
int ret;
13281349

1329-
if (unlikely(map_flags > BPF_EXIST))
1330-
/* unknown flags */
1331-
return -EINVAL;
1350+
ret = bpf_map_check_cpu_flags(map_flags, true);
1351+
if (unlikely(ret))
1352+
return ret;
13321353

13331354
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
13341355
!rcu_read_lock_bh_held());
@@ -1366,10 +1387,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13661387

13671388
/* per-cpu hash map can update value in-place */
13681389
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1369-
value, onallcpus);
1390+
value, onallcpus, map_flags);
13701391
} else {
13711392
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1372-
value, onallcpus);
1393+
value, onallcpus, map_flags);
13731394
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
13741395
l_new = NULL;
13751396
}
@@ -1698,9 +1719,16 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
16981719
int ret = 0;
16991720

17001721
elem_map_flags = attr->batch.elem_flags;
1701-
if ((elem_map_flags & ~BPF_F_LOCK) ||
1702-
((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1703-
return -EINVAL;
1722+
if (!do_delete && is_percpu) {
1723+
ret = bpf_map_check_flags(map, elem_map_flags, false);
1724+
if (ret)
1725+
return ret;
1726+
} else {
1727+
if ((elem_map_flags & ~BPF_F_LOCK) ||
1728+
((elem_map_flags & BPF_F_LOCK) &&
1729+
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1730+
return -EINVAL;
1731+
}
17041732

17051733
map_flags = attr->batch.flags;
17061734
if (map_flags)
@@ -1806,10 +1834,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
18061834
void __percpu *pptr;
18071835

18081836
pptr = htab_elem_get_ptr(l, map->key_size);
1809-
for_each_possible_cpu(cpu) {
1810-
copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
1811-
check_and_init_map_value(&htab->map, dst_val + off);
1812-
off += size;
1837+
if (!do_delete && (elem_map_flags & BPF_F_CPU)) {
1838+
cpu = elem_map_flags >> 32;
1839+
copy_map_value_long(&htab->map, dst_val, per_cpu_ptr(pptr, cpu));
1840+
check_and_init_map_value(&htab->map, dst_val);
1841+
} else {
1842+
for_each_possible_cpu(cpu) {
1843+
copy_map_value_long(&htab->map, dst_val + off,
1844+
per_cpu_ptr(pptr, cpu));
1845+
check_and_init_map_value(&htab->map, dst_val + off);
1846+
off += size;
1847+
}
18131848
}
18141849
} else {
18151850
value = htab_elem_value(l, key_size);
@@ -2365,14 +2400,18 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
23652400
return NULL;
23662401
}
23672402

2368-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2403+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
23692404
{
2405+
int ret, cpu, off = 0;
23702406
struct htab_elem *l;
23712407
void __percpu *pptr;
2372-
int ret = -ENOENT;
2373-
int cpu, off = 0;
23742408
u32 size;
23752409

2410+
ret = bpf_map_check_cpu_flags(map_flags, false);
2411+
if (unlikely(ret))
2412+
return ret;
2413+
ret = -ENOENT;
2414+
23762415
/* per_cpu areas are zero-filled and bpf programs can only
23772416
* access 'value_size' of them, so copying rounded areas
23782417
* will not leak any kernel data
@@ -2386,10 +2425,16 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
23862425
* eviction heuristics when user space does a map walk.
23872426
*/
23882427
pptr = htab_elem_get_ptr(l, map->key_size);
2389-
for_each_possible_cpu(cpu) {
2390-
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
2391-
check_and_init_map_value(map, value + off);
2392-
off += size;
2428+
if (map_flags & BPF_F_CPU) {
2429+
cpu = map_flags >> 32;
2430+
copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
2431+
check_and_init_map_value(map, value);
2432+
} else {
2433+
for_each_possible_cpu(cpu) {
2434+
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
2435+
check_and_init_map_value(map, value + off);
2436+
off += size;
2437+
}
23932438
}
23942439
ret = 0;
23952440
out:

0 commit comments

Comments
 (0)