Skip to content

Commit 1612cc4

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says: ==================== The following pull-request contains BPF updates for your *net* tree. We've added 21 non-merge commits during the last 8 day(s) which contain a total of 21 files changed, 450 insertions(+), 36 deletions(-). The main changes are: 1) Adjust bpf_mem_alloc buckets to match ksize(), from Hou Tao. 2) Check whether override is allowed in kprobe mult, from Jiri Olsa. 3) Fix btf_id symbol generation with ld.lld, from Jiri and Nick. 4) Fix potential deadlock when using queue and stack maps from NMI, from Toke Høiland-Jørgensen. Please consider pulling these changes from: git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git Thanks a lot! Also thanks to reporters, reviewers and testers of commits in this pull-request: Alan Maguire, Biju Das, Björn Töpel, Dan Carpenter, Daniel Borkmann, Eduard Zingerman, Hsin-Wei Hung, Marcus Seyfarth, Nathan Chancellor, Satya Durga Srinivasu Prabhala, Song Liu, Stephen Rothwell ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 615efed + c0bb9fb commit 1612cc4

File tree

21 files changed

+450
-36
lines changed

21 files changed

+450
-36
lines changed

include/linux/btf_ids.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ word \
4949
____BTF_ID(symbol, word)
5050

5151
#define __ID(prefix) \
52-
__PASTE(prefix, __COUNTER__)
52+
__PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
5353

5454
/*
5555
* The BTF_ID defines unique symbol for each ID pointing

include/uapi/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1962,7 +1962,9 @@ union bpf_attr {
19621962
* performed again, if the helper is used in combination with
19631963
* direct packet access.
19641964
* Return
1965-
* 0 on success, or a negative error in case of failure.
1965+
* 0 on success, or a negative error in case of failure. Positive
1966+
* error indicates a potential drop or congestion in the target
1967+
* device. The particular positive error codes are not defined.
19661968
*
19671969
* u64 bpf_get_current_pid_tgid(void)
19681970
* Description

kernel/bpf/btf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
85018501
tname = btf_name_by_offset(btf, walk_type->name_off);
85028502

85038503
ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
8504-
if (ret < 0)
8504+
if (ret >= sizeof(safe_tname))
85058505
return false;
85068506

85078507
safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));

kernel/bpf/cgroup.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,8 @@ static void replace_effective_prog(struct cgroup *cgrp,
785785
* to descendants
786786
* @cgrp: The cgroup which descendants to traverse
787787
* @link: A link for which to replace BPF program
788-
* @type: Type of attach operation
788+
* @new_prog: &struct bpf_prog for the target BPF program with its refcnt
789+
* incremented
789790
*
790791
* Must be called with cgroup_mutex held.
791792
*/
@@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
13341335
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
13351336
* @sk: The socket sending or receiving traffic
13361337
* @skb: The skb that is being sent or received
1337-
* @type: The type of program to be executed
1338+
* @atype: The type of program to be executed
13381339
*
13391340
* If no socket is passed, or the socket is not of type INET or INET6,
13401341
* this function does nothing and returns 0.
@@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
14241425
/**
14251426
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
14261427
* @sk: sock structure to manipulate
1427-
* @type: The type of program to be executed
1428+
* @atype: The type of program to be executed
14281429
*
14291430
* socket is passed is expected to be of type INET or INET6.
14301431
*
@@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
14491450
* provided by user sockaddr
14501451
* @sk: sock struct that will use sockaddr
14511452
* @uaddr: sockaddr struct provided by user
1452-
* @type: The type of program to be executed
1453+
* @atype: The type of program to be executed
14531454
* @t_ctx: Pointer to attach type specific context
14541455
* @flags: Pointer to u32 which contains higher bits of BPF program
14551456
* return value (OR'ed together).
@@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
14961497
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
14971498
* sk with connection information (IP addresses, etc.) May not contain
14981499
* cgroup info if it is a req sock.
1499-
* @type: The type of program to be executed
1500+
* @atype: The type of program to be executed
15001501
*
15011502
* socket passed is expected to be of type INET or INET6.
15021503
*
@@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
16701671
* @ppos: value-result argument: value is position at which read from or write
16711672
* to sysctl is happening, result is new position if program overrode it,
16721673
* initial value otherwise
1673-
* @type: type of program to be executed
1674+
* @atype: type of program to be executed
16741675
*
16751676
* Program is run when sysctl is being accessed, either read or written, and
16761677
* can allow or deny such access.

kernel/bpf/memalloc.c

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
459459
* Typical case will be between 11K and 116K closer to 11K.
460460
* bpf progs can and should share bpf_mem_cache when possible.
461461
*/
462-
463-
static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
462+
static void init_refill_work(struct bpf_mem_cache *c)
464463
{
465464
init_irq_work(&c->refill_work, bpf_mem_refill);
466465
if (c->unit_size <= 256) {
@@ -476,14 +475,42 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
476475
c->high_watermark = max(96 * 256 / c->unit_size, 3);
477476
}
478477
c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
478+
}
479479

480+
static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
481+
{
480482
/* To avoid consuming memory assume that 1st run of bpf
481483
* prog won't be doing more than 4 map_update_elem from
482484
* irq disabled region
483485
*/
484486
alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
485487
}
486488

489+
static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
490+
{
491+
struct llist_node *first;
492+
unsigned int obj_size;
493+
494+
/* For per-cpu allocator, the size of free objects in free list doesn't
495+
* match with unit_size and now there is no way to get the size of
496+
* per-cpu pointer saved in free object, so just skip the checking.
497+
*/
498+
if (c->percpu_size)
499+
return 0;
500+
501+
first = c->free_llist.first;
502+
if (!first)
503+
return 0;
504+
505+
obj_size = ksize(first);
506+
if (obj_size != c->unit_size) {
507+
WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
508+
idx, obj_size, c->unit_size);
509+
return -EINVAL;
510+
}
511+
return 0;
512+
}
513+
487514
/* When size != 0 bpf_mem_cache for each cpu.
488515
* This is typical bpf hash map use case when all elements have equal size.
489516
*
@@ -494,10 +521,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
494521
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
495522
{
496523
static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
524+
int cpu, i, err, unit_size, percpu_size = 0;
497525
struct bpf_mem_caches *cc, __percpu *pcc;
498526
struct bpf_mem_cache *c, __percpu *pc;
499527
struct obj_cgroup *objcg = NULL;
500-
int cpu, i, unit_size, percpu_size = 0;
501528

502529
if (size) {
503530
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -521,6 +548,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
521548
c->objcg = objcg;
522549
c->percpu_size = percpu_size;
523550
c->tgt = c;
551+
init_refill_work(c);
524552
prefill_mem_cache(c, cpu);
525553
}
526554
ma->cache = pc;
@@ -534,6 +562,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
534562
pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
535563
if (!pcc)
536564
return -ENOMEM;
565+
err = 0;
537566
#ifdef CONFIG_MEMCG_KMEM
538567
objcg = get_obj_cgroup_from_current();
539568
#endif
@@ -544,11 +573,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
544573
c->unit_size = sizes[i];
545574
c->objcg = objcg;
546575
c->tgt = c;
576+
577+
init_refill_work(c);
578+
/* Another bpf_mem_cache will be used when allocating
579+
* c->unit_size in bpf_mem_alloc(), so doesn't prefill
580+
* for the bpf_mem_cache because these free objects will
581+
* never be used.
582+
*/
583+
if (i != bpf_mem_cache_idx(c->unit_size))
584+
continue;
547585
prefill_mem_cache(c, cpu);
586+
err = check_obj_size(c, i);
587+
if (err)
588+
goto out;
548589
}
549590
}
591+
592+
out:
550593
ma->caches = pcc;
551-
return 0;
594+
/* refill_work is either zeroed or initialized, so it is safe to
595+
* call irq_work_sync().
596+
*/
597+
if (err)
598+
bpf_mem_alloc_destroy(ma);
599+
return err;
552600
}
553601

554602
static void drain_mem_cache(struct bpf_mem_cache *c)
@@ -916,3 +964,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
916964

917965
return !ret ? NULL : ret + LLIST_NODE_SZ;
918966
}
967+
968+
/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
969+
static __init int bpf_mem_cache_adjust_size(void)
970+
{
971+
unsigned int size, index;
972+
973+
/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
974+
* up-to 256-bytes.
975+
*/
976+
size = KMALLOC_MIN_SIZE;
977+
if (size <= 192)
978+
index = size_index[(size - 1) / 8];
979+
else
980+
index = fls(size - 1) - 1;
981+
for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
982+
size_index[(size - 1) / 8] = index;
983+
984+
/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
985+
* use 128-bytes cache instead.
986+
*/
987+
if (KMALLOC_MIN_SIZE >= 64) {
988+
index = size_index[(128 - 1) / 8];
989+
for (size = 64 + 8; size <= 96; size += 8)
990+
size_index[(size - 1) / 8] = index;
991+
}
992+
993+
/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
994+
* use 256-bytes cache instead.
995+
*/
996+
if (KMALLOC_MIN_SIZE >= 128) {
997+
index = fls(256 - 1) - 1;
998+
for (size = 128 + 8; size <= 192; size += 8)
999+
size_index[(size - 1) / 8] = index;
1000+
}
1001+
1002+
return 0;
1003+
}
1004+
subsys_initcall(bpf_mem_cache_adjust_size);

kernel/bpf/offload.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,14 @@ static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *n
199199
offload->netdev = netdev;
200200

201201
ondev = bpf_offload_find_netdev(offload->netdev);
202+
/* When program is offloaded require presence of "true"
203+
* bpf_offload_netdev, avoid the one created for !ondev case below.
204+
*/
205+
if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) {
206+
err = -EINVAL;
207+
goto err_free;
208+
}
202209
if (!ondev) {
203-
if (bpf_prog_is_offloaded(prog->aux)) {
204-
err = -EINVAL;
205-
goto err_free;
206-
}
207-
208210
/* When only binding to the device, explicitly
209211
* create an entry in the hashtable.
210212
*/

kernel/bpf/queue_stack_maps.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,12 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
9898
int err = 0;
9999
void *ptr;
100100

101-
raw_spin_lock_irqsave(&qs->lock, flags);
101+
if (in_nmi()) {
102+
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
103+
return -EBUSY;
104+
} else {
105+
raw_spin_lock_irqsave(&qs->lock, flags);
106+
}
102107

103108
if (queue_stack_map_is_empty(qs)) {
104109
memset(value, 0, qs->map.value_size);
@@ -128,7 +133,12 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
128133
void *ptr;
129134
u32 index;
130135

131-
raw_spin_lock_irqsave(&qs->lock, flags);
136+
if (in_nmi()) {
137+
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
138+
return -EBUSY;
139+
} else {
140+
raw_spin_lock_irqsave(&qs->lock, flags);
141+
}
132142

133143
if (queue_stack_map_is_empty(qs)) {
134144
memset(value, 0, qs->map.value_size);
@@ -193,7 +203,12 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
193203
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
194204
return -EINVAL;
195205

196-
raw_spin_lock_irqsave(&qs->lock, irq_flags);
206+
if (in_nmi()) {
207+
if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
208+
return -EBUSY;
209+
} else {
210+
raw_spin_lock_irqsave(&qs->lock, irq_flags);
211+
}
197212

198213
if (queue_stack_map_is_full(qs)) {
199214
if (!replace) {

kernel/trace/bpf_trace.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3
28532853
return arr.mods_cnt;
28542854
}
28552855

2856+
static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
2857+
{
2858+
u32 i;
2859+
2860+
for (i = 0; i < cnt; i++) {
2861+
if (!within_error_injection_list(addrs[i]))
2862+
return -EINVAL;
2863+
}
2864+
return 0;
2865+
}
2866+
28562867
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
28572868
{
28582869
struct bpf_kprobe_multi_link *link = NULL;
@@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
29302941
goto error;
29312942
}
29322943

2944+
if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
2945+
err = -EINVAL;
2946+
goto error;
2947+
}
2948+
29332949
link = kzalloc(sizeof(*link), GFP_KERNEL);
29342950
if (!link) {
29352951
err = -ENOMEM;
@@ -3207,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
32073223
rcu_read_lock();
32083224
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
32093225
rcu_read_unlock();
3210-
if (!task)
3226+
if (!task) {
3227+
err = -ESRCH;
32113228
goto error_path_put;
3229+
}
32123230
}
32133231

32143232
err = -ENOMEM;

net/netfilter/nf_conntrack_bpf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
381381
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
382382
int err;
383383

384+
if (!nf_ct_is_confirmed(nfct))
385+
nfct->timeout += nfct_time_stamp;
384386
nfct->status |= IPS_CONFIRMED;
385387
err = nf_conntrack_hash_check_insert(nfct);
386388
if (err < 0) {

tools/include/linux/btf_ids.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ asm( \
3838
____BTF_ID(symbol)
3939

4040
#define __ID(prefix) \
41-
__PASTE(prefix, __COUNTER__)
41+
__PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
4242

4343
/*
4444
* The BTF_ID defines unique symbol for each ID pointing

0 commit comments

Comments
 (0)