Skip to content

Commit 4667283

Browse files
AsphalttKernel Patches Daemon
authored andcommitted
bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu_hash and lru_percpu_hash maps
Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value for both update_elem and update_batch APIs. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow: * update value for specified CPU for both update_elem and update_batch APIs. * lookup value for specified CPU for both lookup_elem and lookup_batch APIs. The BPF_F_CPU flag is passed via: * map_flags along with embedded cpu info. * elem_flags along with embedded cpu info. Signed-off-by: Leon Hwang <[email protected]>
1 parent 5734554 commit 4667283

File tree

3 files changed

+63
-38
lines changed

3 files changed

+63
-38
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2745,7 +2745,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
27452745
struct bpf_func_state *caller,
27462746
struct bpf_func_state *callee);
27472747

2748-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
2748+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27492749
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
27502750
u64 flags);
27512751
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -3763,6 +3763,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
37633763
{
37643764
switch (map_type) {
37653765
case BPF_MAP_TYPE_PERCPU_ARRAY:
3766+
case BPF_MAP_TYPE_PERCPU_HASH:
3767+
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
37663768
return true;
37673769
default:
37683770
return false;

kernel/bpf/hashtab.c

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -937,24 +937,39 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
937937
}
938938

939939
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
940-
void *value, bool onallcpus)
940+
void *value, bool onallcpus, u64 map_flags)
941941
{
942+
int cpu = map_flags & BPF_F_CPU ? map_flags >> 32 : 0;
943+
int current_cpu = raw_smp_processor_id();
944+
942945
if (!onallcpus) {
943946
/* copy true value_size bytes */
944-
copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
947+
copy_map_value(&htab->map, (map_flags & BPF_F_CPU) && cpu != current_cpu ?
948+
per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value);
945949
} else {
946950
u32 size = round_up(htab->map.value_size, 8);
947-
int off = 0, cpu;
951+
int off = 0;
952+
953+
if (map_flags & BPF_F_CPU) {
954+
copy_map_value_long(&htab->map, cpu != current_cpu ?
955+
per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value);
956+
return;
957+
}
948958

949959
for_each_possible_cpu(cpu) {
950960
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
951-
off += size;
961+
/* same user-provided value is used if
962+
* BPF_F_ALL_CPUS is specified, otherwise value is
963+
* an array of per-cpu values.
964+
*/
965+
if (!(map_flags & BPF_F_ALL_CPUS))
966+
off += size;
952967
}
953968
}
954969
}
955970

956971
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
957-
void *value, bool onallcpus)
972+
void *value, bool onallcpus, u64 map_flags)
958973
{
959974
/* When not setting the initial value on all cpus, zero-fill element
960975
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -972,7 +987,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
972987
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
973988
}
974989
} else {
975-
pcpu_copy_value(htab, pptr, value, onallcpus);
990+
pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
976991
}
977992
}
978993

@@ -984,7 +999,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
984999
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
9851000
void *value, u32 key_size, u32 hash,
9861001
bool percpu, bool onallcpus,
987-
struct htab_elem *old_elem)
1002+
struct htab_elem *old_elem, u64 map_flags)
9881003
{
9891004
u32 size = htab->map.value_size;
9901005
bool prealloc = htab_is_prealloc(htab);
@@ -1042,7 +1057,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
10421057
pptr = *(void __percpu **)ptr;
10431058
}
10441059

1045-
pcpu_init_value(htab, pptr, value, onallcpus);
1060+
pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
10461061

10471062
if (!prealloc)
10481063
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1147,7 +1162,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
11471162
}
11481163

11491164
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1150-
l_old);
1165+
l_old, map_flags);
11511166
if (IS_ERR(l_new)) {
11521167
/* all pre-allocated elements are in use or memory exhausted */
11531168
ret = PTR_ERR(l_new);
@@ -1263,9 +1278,15 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12631278
u32 key_size, hash;
12641279
int ret;
12651280

1266-
if (unlikely(map_flags > BPF_EXIST))
1267-
/* unknown flags */
1268-
return -EINVAL;
1281+
if (percpu) {
1282+
ret = bpf_map_check_cpu_flags(map_flags, true);
1283+
if (unlikely(ret))
1284+
return ret;
1285+
} else {
1286+
if (unlikely(map_flags > BPF_EXIST))
1287+
/* unknown flags */
1288+
return -EINVAL;
1289+
}
12691290

12701291
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
12711292
!rcu_read_lock_bh_held());
@@ -1291,7 +1312,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12911312
/* Update value in-place */
12921313
if (percpu) {
12931314
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1294-
value, onallcpus);
1315+
value, onallcpus, map_flags);
12951316
} else {
12961317
void **inner_map_pptr = htab_elem_value(l_old, key_size);
12971318

@@ -1300,7 +1321,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
13001321
}
13011322
} else {
13021323
l_new = alloc_htab_elem(htab, key, value, key_size,
1303-
hash, percpu, onallcpus, NULL);
1324+
hash, percpu, onallcpus, NULL, map_flags);
13041325
if (IS_ERR(l_new)) {
13051326
ret = PTR_ERR(l_new);
13061327
goto err;
@@ -1326,9 +1347,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13261347
u32 key_size, hash;
13271348
int ret;
13281349

1329-
if (unlikely(map_flags > BPF_EXIST))
1330-
/* unknown flags */
1331-
return -EINVAL;
1350+
ret = bpf_map_check_cpu_flags(map_flags, true);
1351+
if (unlikely(ret))
1352+
return ret;
13321353

13331354
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
13341355
!rcu_read_lock_bh_held());
@@ -1366,10 +1387,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13661387

13671388
/* per-cpu hash map can update value in-place */
13681389
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1369-
value, onallcpus);
1390+
value, onallcpus, map_flags);
13701391
} else {
13711392
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1372-
value, onallcpus);
1393+
value, onallcpus, map_flags);
13731394
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
13741395
l_new = NULL;
13751396
}
@@ -1698,9 +1719,16 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
16981719
int ret = 0;
16991720

17001721
elem_map_flags = attr->batch.elem_flags;
1701-
if ((elem_map_flags & ~BPF_F_LOCK) ||
1702-
((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1703-
return -EINVAL;
1722+
if (!do_delete && is_percpu) {
1723+
ret = bpf_map_check_lookup_flags(map, elem_map_flags);
1724+
if (ret)
1725+
return ret;
1726+
} else {
1727+
if ((elem_map_flags & ~BPF_F_LOCK) ||
1728+
((elem_map_flags & BPF_F_LOCK) &&
1729+
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1730+
return -EINVAL;
1731+
}
17041732

17051733
map_flags = attr->batch.flags;
17061734
if (map_flags)
@@ -1802,15 +1830,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
18021830
memcpy(dst_key, l->key, key_size);
18031831

18041832
if (is_percpu) {
1805-
int off = 0, cpu;
18061833
void __percpu *pptr;
18071834

18081835
pptr = htab_elem_get_ptr(l, map->key_size);
1809-
for_each_possible_cpu(cpu) {
1810-
copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
1811-
check_and_init_map_value(&htab->map, dst_val + off);
1812-
off += size;
1813-
}
1836+
bpf_percpu_copy_to_user(&htab->map, pptr, dst_val, size, elem_map_flags);
18141837
} else {
18151838
value = htab_elem_value(l, key_size);
18161839
if (is_fd_htab(htab)) {
@@ -2365,13 +2388,17 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
23652388
return NULL;
23662389
}
23672390

2368-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2391+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
23692392
{
23702393
struct htab_elem *l;
23712394
void __percpu *pptr;
2372-
int ret = -ENOENT;
2373-
int cpu, off = 0;
23742395
u32 size;
2396+
int ret;
2397+
2398+
ret = bpf_map_check_cpu_flags(map_flags, false);
2399+
if (unlikely(ret))
2400+
return ret;
2401+
ret = -ENOENT;
23752402

23762403
/* per_cpu areas are zero-filled and bpf programs can only
23772404
* access 'value_size' of them, so copying rounded areas
@@ -2386,11 +2413,7 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
23862413
* eviction heuristics when user space does a map walk.
23872414
*/
23882415
pptr = htab_elem_get_ptr(l, map->key_size);
2389-
for_each_possible_cpu(cpu) {
2390-
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
2391-
check_and_init_map_value(map, value + off);
2392-
off += size;
2393-
}
2416+
bpf_percpu_copy_to_user(map, pptr, value, size, map_flags);
23942417
ret = 0;
23952418
out:
23962419
rcu_read_unlock();

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
314314
bpf_disable_instrumentation();
315315
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
316316
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
317-
err = bpf_percpu_hash_copy(map, key, value);
317+
err = bpf_percpu_hash_copy(map, key, value, flags);
318318
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
319319
err = bpf_percpu_array_copy(map, key, value, flags);
320320
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {

0 commit comments

Comments
 (0)