Skip to content

Commit 6bbb21d

Browse files
AsphalttKernel Patches Daemon
authored andcommitted
bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_hash and lru_percpu_hash maps
Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash maps to allow updating values for all CPUs with a single value for both update_elem and update_batch APIs. Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash maps to allow: * update value for specified CPU for both update_elem and update_batch APIs. * lookup value for specified CPU for both lookup_elem and lookup_batch APIs. The BPF_F_CPU flag is passed via: * map_flags along with embedded cpu info. * elem_flags along with embedded cpu info. Signed-off-by: Leon Hwang <[email protected]>
1 parent 47308ea commit 6bbb21d

File tree

3 files changed

+68
-32
lines changed

3 files changed

+68
-32
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2761,7 +2761,7 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
27612761
struct bpf_func_state *caller,
27622762
struct bpf_func_state *callee);
27632763

2764-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
2764+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27652765
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags);
27662766
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
27672767
u64 flags);
@@ -3833,6 +3833,8 @@ static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type)
38333833
{
38343834
switch (map_type) {
38353835
case BPF_MAP_TYPE_PERCPU_ARRAY:
3836+
case BPF_MAP_TYPE_PERCPU_HASH:
3837+
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
38363838
return true;
38373839
default:
38383840
return false;

kernel/bpf/hashtab.c

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
932932
}
933933

934934
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
935-
void *value, bool onallcpus)
935+
void *value, bool onallcpus, u64 map_flags)
936936
{
937937
void *ptr;
938938

@@ -943,19 +943,28 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
943943
bpf_obj_free_fields(htab->map.record, ptr);
944944
} else {
945945
u32 size = round_up(htab->map.value_size, 8);
946-
int off = 0, cpu;
946+
void *val;
947+
int cpu;
948+
949+
if (map_flags & BPF_F_CPU) {
950+
cpu = map_flags >> 32;
951+
ptr = per_cpu_ptr(pptr, cpu);
952+
copy_map_value(&htab->map, ptr, value);
953+
bpf_obj_free_fields(htab->map.record, ptr);
954+
return;
955+
}
947956

948957
for_each_possible_cpu(cpu) {
949958
ptr = per_cpu_ptr(pptr, cpu);
950-
copy_map_value_long(&htab->map, ptr, value + off);
959+
val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
960+
copy_map_value(&htab->map, ptr, val);
951961
bpf_obj_free_fields(htab->map.record, ptr);
952-
off += size;
953962
}
954963
}
955964
}
956965

957966
static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
958-
void *value, bool onallcpus)
967+
void *value, bool onallcpus, u64 map_flags)
959968
{
960969
/* When not setting the initial value on all cpus, zero-fill element
961970
* values for other cpus. Otherwise, bpf program has no way to ensure
@@ -973,7 +982,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
973982
zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
974983
}
975984
} else {
976-
pcpu_copy_value(htab, pptr, value, onallcpus);
985+
pcpu_copy_value(htab, pptr, value, onallcpus, map_flags);
977986
}
978987
}
979988

@@ -985,7 +994,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
985994
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
986995
void *value, u32 key_size, u32 hash,
987996
bool percpu, bool onallcpus,
988-
struct htab_elem *old_elem)
997+
struct htab_elem *old_elem, u64 map_flags)
989998
{
990999
u32 size = htab->map.value_size;
9911000
bool prealloc = htab_is_prealloc(htab);
@@ -1043,7 +1052,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
10431052
pptr = *(void __percpu **)ptr;
10441053
}
10451054

1046-
pcpu_init_value(htab, pptr, value, onallcpus);
1055+
pcpu_init_value(htab, pptr, value, onallcpus, map_flags);
10471056

10481057
if (!prealloc)
10491058
htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1147,7 +1156,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
11471156
}
11481157

11491158
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1150-
l_old);
1159+
l_old, map_flags);
11511160
if (IS_ERR(l_new)) {
11521161
/* all pre-allocated elements are in use or memory exhausted */
11531162
ret = PTR_ERR(l_new);
@@ -1249,6 +1258,15 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
12491258
return ret;
12501259
}
12511260

1261+
static int htab_map_check_update_flags(bool onallcpus, u64 map_flags)
1262+
{
1263+
if (unlikely(!onallcpus && map_flags > BPF_EXIST))
1264+
return -EINVAL;
1265+
if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS)))
1266+
return -EINVAL;
1267+
return 0;
1268+
}
1269+
12521270
static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12531271
void *value, u64 map_flags,
12541272
bool percpu, bool onallcpus)
@@ -1262,9 +1280,9 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12621280
u32 key_size, hash;
12631281
int ret;
12641282

1265-
if (unlikely(map_flags > BPF_EXIST))
1266-
/* unknown flags */
1267-
return -EINVAL;
1283+
ret = htab_map_check_update_flags(onallcpus, map_flags);
1284+
if (unlikely(ret))
1285+
return ret;
12681286

12691287
WARN_ON_ONCE(!bpf_rcu_lock_held());
12701288

@@ -1289,7 +1307,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12891307
/* Update value in-place */
12901308
if (percpu) {
12911309
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1292-
value, onallcpus);
1310+
value, onallcpus, map_flags);
12931311
} else {
12941312
void **inner_map_pptr = htab_elem_value(l_old, key_size);
12951313

@@ -1298,7 +1316,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
12981316
}
12991317
} else {
13001318
l_new = alloc_htab_elem(htab, key, value, key_size,
1301-
hash, percpu, onallcpus, NULL);
1319+
hash, percpu, onallcpus, NULL, map_flags);
13021320
if (IS_ERR(l_new)) {
13031321
ret = PTR_ERR(l_new);
13041322
goto err;
@@ -1324,9 +1342,9 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13241342
u32 key_size, hash;
13251343
int ret;
13261344

1327-
if (unlikely(map_flags > BPF_EXIST))
1328-
/* unknown flags */
1329-
return -EINVAL;
1345+
ret = htab_map_check_update_flags(onallcpus, map_flags);
1346+
if (unlikely(ret))
1347+
return ret;
13301348

13311349
WARN_ON_ONCE(!bpf_rcu_lock_held());
13321350

@@ -1363,10 +1381,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
13631381

13641382
/* per-cpu hash map can update value in-place */
13651383
pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1366-
value, onallcpus);
1384+
value, onallcpus, map_flags);
13671385
} else {
13681386
pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1369-
value, onallcpus);
1387+
value, onallcpus, map_flags);
13701388
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
13711389
l_new = NULL;
13721390
}
@@ -1678,9 +1696,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
16781696
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
16791697
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
16801698
u32 batch, max_count, size, bucket_size, map_id;
1699+
u64 elem_map_flags, map_flags, allowed_flags;
16811700
u32 bucket_cnt, total, key_size, value_size;
16821701
struct htab_elem *node_to_free = NULL;
1683-
u64 elem_map_flags, map_flags;
16841702
struct hlist_nulls_head *head;
16851703
struct hlist_nulls_node *n;
16861704
unsigned long flags = 0;
@@ -1690,9 +1708,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
16901708
int ret = 0;
16911709

16921710
elem_map_flags = attr->batch.elem_flags;
1693-
if ((elem_map_flags & ~BPF_F_LOCK) ||
1694-
((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
1695-
return -EINVAL;
1711+
allowed_flags = BPF_F_LOCK;
1712+
if (!do_delete && is_percpu)
1713+
allowed_flags |= BPF_F_CPU;
1714+
ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags);
1715+
if (ret)
1716+
return ret;
16961717

16971718
map_flags = attr->batch.flags;
16981719
if (map_flags)
@@ -1715,7 +1736,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
17151736
key_size = htab->map.key_size;
17161737
value_size = htab->map.value_size;
17171738
size = round_up(value_size, 8);
1718-
if (is_percpu)
1739+
if (is_percpu && !(elem_map_flags & BPF_F_CPU))
17191740
value_size = size * num_possible_cpus();
17201741
total = 0;
17211742
/* while experimenting with hash tables with sizes ranging from 10 to
@@ -1798,10 +1819,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
17981819
void __percpu *pptr;
17991820

18001821
pptr = htab_elem_get_ptr(l, map->key_size);
1801-
for_each_possible_cpu(cpu) {
1802-
copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
1803-
check_and_init_map_value(&htab->map, dst_val + off);
1804-
off += size;
1822+
if (elem_map_flags & BPF_F_CPU) {
1823+
cpu = elem_map_flags >> 32;
1824+
copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu));
1825+
check_and_init_map_value(&htab->map, dst_val);
1826+
} else {
1827+
for_each_possible_cpu(cpu) {
1828+
copy_map_value_long(&htab->map, dst_val + off,
1829+
per_cpu_ptr(pptr, cpu));
1830+
check_and_init_map_value(&htab->map, dst_val + off);
1831+
off += size;
1832+
}
18051833
}
18061834
} else {
18071835
value = htab_elem_value(l, key_size);
@@ -2357,7 +2385,7 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k
23572385
return NULL;
23582386
}
23592387

2360-
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2388+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags)
23612389
{
23622390
struct htab_elem *l;
23632391
void __percpu *pptr;
@@ -2374,16 +2402,22 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
23742402
l = __htab_map_lookup_elem(map, key);
23752403
if (!l)
23762404
goto out;
2405+
ret = 0;
23772406
/* We do not mark LRU map element here in order to not mess up
23782407
* eviction heuristics when user space does a map walk.
23792408
*/
23802409
pptr = htab_elem_get_ptr(l, map->key_size);
2410+
if (map_flags & BPF_F_CPU) {
2411+
cpu = map_flags >> 32;
2412+
copy_map_value(map, value, per_cpu_ptr(pptr, cpu));
2413+
check_and_init_map_value(map, value);
2414+
goto out;
2415+
}
23812416
for_each_possible_cpu(cpu) {
23822417
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
23832418
check_and_init_map_value(map, value + off);
23842419
off += size;
23852420
}
2386-
ret = 0;
23872421
out:
23882422
rcu_read_unlock();
23892423
return ret;

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
316316
bpf_disable_instrumentation();
317317
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
318318
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
319-
err = bpf_percpu_hash_copy(map, key, value);
319+
err = bpf_percpu_hash_copy(map, key, value, flags);
320320
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
321321
err = bpf_percpu_array_copy(map, key, value, flags);
322322
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {

0 commit comments

Comments
 (0)