Skip to content

Commit 3b7dc70

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf-next 2024-09-11 We've added 12 non-merge commits during the last 16 day(s) which contain a total of 20 files changed, 228 insertions(+), 30 deletions(-). There's a minor merge conflict in drivers/net/netkit.c: 00d066a ("netdev_features: convert NETIF_F_LLTX to dev->lltx") d966087 ("netkit: Disable netpoll support") The main changes are: 1) Enable bpf_dynptr_from_skb for tp_btf such that this can be used to easily parse skbs in BPF programs attached to tracepoints, from Philo Lu. 2) Add a cond_resched() point in BPF's sock_hash_free() as there have been several syzbot soft lockup reports recently, from Eric Dumazet. 3) Fix xsk_buff_can_alloc() to account for queue_empty_descs which got noticed when zero copy ice driver started to use it, from Maciej Fijalkowski. 4) Move the xdp:xdp_cpumap_kthread tracepoint before cpumap pushes skbs up via netif_receive_skb_list() to better measure latencies, from Daniel Xu. 5) Follow-up to disable netpoll support from netkit, from Daniel Borkmann. 6) Improve xsk selftests to not assume a fixed MAX_SKB_FRAGS of 17 but instead gather the actual value via /proc/sys/net/core/max_skb_frags, also from Maciej Fijalkowski. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: sock_map: Add a cond_resched() in sock_hash_free() selftests/bpf: Expand skb dynptr selftests for tp_btf bpf: Allow bpf_dynptr_from_skb() for tp_btf tcp: Use skb__nullable in trace_tcp_send_reset selftests/bpf: Add test for __nullable suffix in tp_btf bpf: Support __nullable argument suffix for tp_btf bpf, cpumap: Move xdp:xdp_cpumap_kthread tracepoint before rcv selftests/xsk: Read current MAX_SKB_FRAGS from sysctl knob xsk: Bump xsk_queue::queue_empty_descs in xp_can_alloc() tcp_bpf: Remove an unused parameter for bpf_tcp_ingress() bpf, sockmap: Correct spelling skmsg.c netkit: Disable netpoll support Signed-off-by: Jakub Kicinski <[email protected]> ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents f1bcd48 + b1339be commit 3b7dc70

File tree

20 files changed

+228
-30
lines changed

20 files changed

+228
-30
lines changed

drivers/net/netkit.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static void netkit_setup(struct net_device *dev)
255255
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
256256
dev->priv_flags |= IFF_PHONY_HEADROOM;
257257
dev->priv_flags |= IFF_NO_QUEUE;
258+
dev->priv_flags |= IFF_DISABLE_NETPOLL;
258259
dev->lltx = true;
259260

260261
dev->ethtool_ops = &netkit_ethtool_ops;

include/trace/events/tcp.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ DEFINE_RST_REASON(FN, FN)
9191
TRACE_EVENT(tcp_send_reset,
9292

9393
TP_PROTO(const struct sock *sk,
94-
const struct sk_buff *skb,
94+
const struct sk_buff *skb__nullable,
9595
const enum sk_rst_reason reason),
9696

97-
TP_ARGS(sk, skb, reason),
97+
TP_ARGS(sk, skb__nullable, reason),
9898

9999
TP_STRUCT__entry(
100100
__field(const void *, skbaddr)
@@ -106,7 +106,7 @@ TRACE_EVENT(tcp_send_reset,
106106
),
107107

108108
TP_fast_assign(
109-
__entry->skbaddr = skb;
109+
__entry->skbaddr = skb__nullable;
110110
__entry->skaddr = sk;
111111
/* Zero means unknown state. */
112112
__entry->state = sk ? sk->sk_state : 0;
@@ -118,13 +118,13 @@ TRACE_EVENT(tcp_send_reset,
118118
const struct inet_sock *inet = inet_sk(sk);
119119

120120
TP_STORE_ADDR_PORTS(__entry, inet, sk);
121-
} else if (skb) {
122-
const struct tcphdr *th = (const struct tcphdr *)skb->data;
121+
} else if (skb__nullable) {
122+
const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data;
123123
/*
124124
* We should reverse the 4-tuple of skb, so later
125125
* it can print the right flow direction of rst.
126126
*/
127-
TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr);
127+
TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr);
128128
}
129129
__entry->reason = reason;
130130
),

kernel/bpf/btf.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6525,6 +6525,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
65256525
if (prog_args_trusted(prog))
65266526
info->reg_type |= PTR_TRUSTED;
65276527

6528+
if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
6529+
info->reg_type |= PTR_MAYBE_NULL;
6530+
65286531
if (tgt_prog) {
65296532
enum bpf_prog_type tgt_type;
65306533

kernel/bpf/cpumap.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,12 +354,14 @@ static int cpu_map_kthread_run(void *data)
354354

355355
list_add_tail(&skb->list, &list);
356356
}
357-
netif_receive_skb_list(&list);
358357

359-
/* Feedback loop via tracepoint */
358+
/* Feedback loop via tracepoint.
359+
* NB: keep before recv to allow measuring enqueue/dequeue latency.
360+
*/
360361
trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
361362
sched, &stats);
362363

364+
netif_receive_skb_list(&list);
363365
local_bh_enable(); /* resched point, may call do_softirq() */
364366
}
365367
__set_current_state(TASK_RUNNING);

kernel/bpf/verifier.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <linux/cpumask.h>
2929
#include <linux/bpf_mem_alloc.h>
3030
#include <net/xdp.h>
31+
#include <linux/trace_events.h>
32+
#include <linux/kallsyms.h>
3133

3234
#include "disasm.h"
3335

@@ -21154,11 +21156,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
2115421156
{
2115521157
bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
2115621158
bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
21159+
char trace_symbol[KSYM_SYMBOL_LEN];
2115721160
const char prefix[] = "btf_trace_";
21161+
struct bpf_raw_event_map *btp;
2115821162
int ret = 0, subprog = -1, i;
2115921163
const struct btf_type *t;
2116021164
bool conservative = true;
21161-
const char *tname;
21165+
const char *tname, *fname;
2116221166
struct btf *btf;
2116321167
long addr = 0;
2116421168
struct module *mod = NULL;
@@ -21289,10 +21293,34 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
2128921293
return -EINVAL;
2129021294
}
2129121295
tname += sizeof(prefix) - 1;
21292-
t = btf_type_by_id(btf, t->type);
21293-
if (!btf_type_is_ptr(t))
21294-
/* should never happen in valid vmlinux build */
21296+
21297+
/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
21298+
* names. Thus using bpf_raw_event_map to get argument names.
21299+
*/
21300+
btp = bpf_get_raw_tracepoint(tname);
21301+
if (!btp)
2129521302
return -EINVAL;
21303+
fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
21304+
trace_symbol);
21305+
bpf_put_raw_tracepoint(btp);
21306+
21307+
if (fname)
21308+
ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
21309+
21310+
if (!fname || ret < 0) {
21311+
bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
21312+
prefix, tname);
21313+
t = btf_type_by_id(btf, t->type);
21314+
if (!btf_type_is_ptr(t))
21315+
/* should never happen in valid vmlinux build */
21316+
return -EINVAL;
21317+
} else {
21318+
t = btf_type_by_id(btf, ret);
21319+
if (!btf_type_is_func(t))
21320+
/* should never happen in valid vmlinux build */
21321+
return -EINVAL;
21322+
}
21323+
2129621324
t = btf_type_by_id(btf, t->type);
2129721325
if (!btf_type_is_func_proto(t))
2129821326
/* should never happen in valid vmlinux build */

net/core/filter.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12063,7 +12063,7 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
1206312063
}
1206412064

1206512065
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
12066-
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
12066+
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
1206712067
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
1206812068

1206912069
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@@ -12112,6 +12112,7 @@ static int __init bpf_kfunc_init(void)
1211212112
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
1211312113
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
1211412114
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
12115+
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
1211512116
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
1211612117
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
1211712118
&bpf_kfunc_set_sock_addr);

net/core/skmsg.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
293293
/* If we trim data a full sg elem before curr pointer update
294294
* copybreak and current so that any future copy operations
295295
* start at new copy location.
296-
* However trimed data that has not yet been used in a copy op
296+
* However trimmed data that has not yet been used in a copy op
297297
* does not require an update.
298298
*/
299299
if (!msg->sg.size) {

net/core/sock_map.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,7 @@ static void sock_hash_free(struct bpf_map *map)
11831183
sock_put(elem->sk);
11841184
sock_hash_free_elem(htab, elem);
11851185
}
1186+
cond_resched();
11861187
}
11871188

11881189
/* wait for psock readers accessing its map link */

net/ipv4/tcp_bpf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
3030
}
3131

3232
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
33-
struct sk_msg *msg, u32 apply_bytes, int flags)
33+
struct sk_msg *msg, u32 apply_bytes)
3434
{
3535
bool apply = apply_bytes;
3636
struct scatterlist *sge;
@@ -167,7 +167,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
167167
if (unlikely(!psock))
168168
return -EPIPE;
169169

170-
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
170+
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes) :
171171
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
172172
sk_psock_put(sk, psock);
173173
return ret;

net/xdp/xsk_buff_pool.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,17 @@ EXPORT_SYMBOL(xp_alloc_batch);
661661

662662
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
663663
{
664+
u32 req_count, avail_count;
665+
664666
if (pool->free_list_cnt >= count)
665667
return true;
666-
return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
668+
669+
req_count = count - pool->free_list_cnt;
670+
avail_count = xskq_cons_nb_entries(pool->fq, req_count);
671+
if (!avail_count)
672+
pool->fq->queue_empty_descs++;
673+
674+
return avail_count >= req_count;
667675
}
668676
EXPORT_SYMBOL(xp_can_alloc);
669677

0 commit comments

Comments
 (0)