Skip to content

Commit e7bec0f

Browse files
AsphalttKernel Patches Daemon
authored andcommitted
bpf: Introduce BPF_F_CPU flag for percpu_array maps
Introduce support for the BPF_F_ALL_CPUS flag in percpu_array maps to allow updating values for all CPUs with a single value. Introduce support for the BPF_F_CPU flag in percpu_array maps to allow updating value for specified CPU. This enhancement enables: * Efficient update values across all CPUs with a single value when BPF_F_ALL_CPUS is set for update_elem and update_batch APIs. * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. The BPF_F_CPU flag is passed via: * map_flags of lookup_elem and update_elem APIs along with embedded cpu field. * elem_flags of lookup_batch and update_batch APIs along with embedded cpu field. Signed-off-by: Leon Hwang <[email protected]>
1 parent cdaa18f commit e7bec0f

File tree

5 files changed

+67
-23
lines changed

5 files changed

+67
-23
lines changed

include/linux/bpf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2697,7 +2697,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
26972697
struct bpf_func_state *callee);
26982698

26992699
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
2700-
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
2700+
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
2701+
u64 flags);
27012702
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
27022703
u64 flags);
27032704
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,

include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,8 @@ enum {
13721372
BPF_NOEXIST = 1, /* create new element if it didn't exist */
13731373
BPF_EXIST = 2, /* update existing element */
13741374
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
1375+
BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
1376+
BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
13751377
};
13761378

13771379
/* flags for BPF_MAP_CREATE command */

kernel/bpf/arraymap.c

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -295,28 +295,40 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
295295
return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
296296
}
297297

298-
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
298+
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags)
299299
{
300300
struct bpf_array *array = container_of(map, struct bpf_array, map);
301301
u32 index = *(u32 *)key;
302302
void __percpu *pptr;
303-
int cpu, off = 0;
304-
u32 size;
303+
u32 size, cpu;
304+
int off = 0;
305305

306306
if (unlikely(index >= array->map.max_entries))
307307
return -ENOENT;
308308

309+
if (unlikely((u32)flags & ~BPF_F_CPU))
310+
return -EINVAL;
311+
312+
cpu = flags >> 32;
313+
if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
314+
return -ERANGE;
315+
309316
/* per_cpu areas are zero-filled and bpf programs can only
310317
* access 'value_size' of them, so copying rounded areas
311318
* will not leak any kernel data
312319
*/
313320
size = array->elem_size;
314321
rcu_read_lock();
315322
pptr = array->pptrs[index & array->index_mask];
316-
for_each_possible_cpu(cpu) {
317-
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
318-
check_and_init_map_value(map, value + off);
319-
off += size;
323+
if (flags & BPF_F_CPU) {
324+
copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu));
325+
check_and_init_map_value(map, value);
326+
} else {
327+
for_each_possible_cpu(cpu) {
328+
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
329+
check_and_init_map_value(map, value + off);
330+
off += size;
331+
}
320332
}
321333
rcu_read_unlock();
322334
return 0;
@@ -385,14 +397,22 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
385397
u64 map_flags)
386398
{
387399
struct bpf_array *array = container_of(map, struct bpf_array, map);
400+
const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS;
388401
u32 index = *(u32 *)key;
389402
void __percpu *pptr;
390-
int cpu, off = 0;
391-
u32 size;
403+
u32 size, cpu;
404+
int off = 0;
392405

393-
if (unlikely(map_flags > BPF_EXIST))
406+
if (unlikely((u32)map_flags > BPF_F_ALL_CPUS))
394407
/* unknown flags */
395408
return -EINVAL;
409+
if (unlikely((map_flags & cpu_flags) == cpu_flags))
410+
return -EINVAL;
411+
412+
cpu = map_flags >> 32;
413+
if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
414+
/* invalid cpu */
415+
return -ERANGE;
396416

397417
if (unlikely(index >= array->map.max_entries))
398418
/* all elements were pre-allocated, cannot insert a new one */
@@ -411,10 +431,20 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
411431
size = array->elem_size;
412432
rcu_read_lock();
413433
pptr = array->pptrs[index & array->index_mask];
414-
for_each_possible_cpu(cpu) {
415-
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
434+
if (map_flags & BPF_F_CPU) {
435+
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value);
416436
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
417-
off += size;
437+
} else {
438+
for_each_possible_cpu(cpu) {
439+
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
440+
/* same user-provided value is used if
441+
* BPF_F_ALL_CPUS is specified, otherwise value is
442+
* an array of per-cpu values.
443+
*/
444+
if (!(map_flags & BPF_F_ALL_CPUS))
445+
off += size;
446+
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
447+
}
418448
}
419449
rcu_read_unlock();
420450
return 0;

kernel/bpf/syscall.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,11 @@ bool bpf_map_write_active(const struct bpf_map *map)
131131
return atomic64_read(&map->writecnt) != 0;
132132
}
133133

134-
static u32 bpf_map_value_size(const struct bpf_map *map)
134+
static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
135135
{
136-
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
136+
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY && (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)))
137+
return round_up(map->value_size, 8);
138+
else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
137139
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
138140
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
139141
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
@@ -314,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
314316
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
315317
err = bpf_percpu_hash_copy(map, key, value);
316318
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
317-
err = bpf_percpu_array_copy(map, key, value);
319+
err = bpf_percpu_array_copy(map, key, value, flags);
318320
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
319321
err = bpf_percpu_cgroup_storage_copy(map, key, value);
320322
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
@@ -1656,12 +1658,19 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
16561658

16571659
static int check_map_flags(struct bpf_map *map, u64 flags, bool check_flag)
16581660
{
1659-
if (check_flag && (flags & ~BPF_F_LOCK))
1661+
if (check_flag && ((u32)flags & ~(BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS)))
16601662
return -EINVAL;
16611663

16621664
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
16631665
return -EINVAL;
16641666

1667+
if (!(flags & BPF_F_CPU) && flags >> 32)
1668+
return -EINVAL;
1669+
1670+
if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) &&
1671+
map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
1672+
return -EINVAL;
1673+
16651674
return 0;
16661675
}
16671676

@@ -1695,7 +1704,7 @@ static int map_lookup_elem(union bpf_attr *attr)
16951704
if (IS_ERR(key))
16961705
return PTR_ERR(key);
16971706

1698-
value_size = bpf_map_value_size(map);
1707+
value_size = bpf_map_value_size(map, attr->flags);
16991708

17001709
err = -ENOMEM;
17011710
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1762,7 +1771,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
17621771
goto err_put;
17631772
}
17641773

1765-
value_size = bpf_map_value_size(map);
1774+
value_size = bpf_map_value_size(map, attr->flags);
17661775
value = kvmemdup_bpfptr(uvalue, value_size);
17671776
if (IS_ERR(value)) {
17681777
err = PTR_ERR(value);
@@ -1962,7 +1971,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
19621971
if (err)
19631972
return err;
19641973

1965-
value_size = bpf_map_value_size(map);
1974+
value_size = bpf_map_value_size(map, attr->batch.elem_flags);
19661975

19671976
max_count = attr->batch.count;
19681977
if (!max_count)
@@ -2021,7 +2030,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
20212030
if (err)
20222031
return err;
20232032

2024-
value_size = bpf_map_value_size(map);
2033+
value_size = bpf_map_value_size(map, attr->batch.elem_flags);
20252034

20262035
max_count = attr->batch.count;
20272036
if (!max_count)
@@ -2143,7 +2152,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
21432152
goto err_put;
21442153
}
21452154

2146-
value_size = bpf_map_value_size(map);
2155+
value_size = bpf_map_value_size(map, 0);
21472156

21482157
err = -ENOMEM;
21492158
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);

tools/include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,8 @@ enum {
13721372
BPF_NOEXIST = 1, /* create new element if it didn't exist */
13731373
BPF_EXIST = 2, /* update existing element */
13741374
BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
1375+
BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */
1376+
BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */
13751377
};
13761378

13771379
/* flags for BPF_MAP_CREATE command */

0 commit comments

Comments
 (0)