Skip to content

Commit acfadf2

Browse files
author
Alexei Starovoitov
committed
Merge branch 'samples-bpf-make-bpf-programs-more-libbpf-aware'
Daniel T. Lee says: ==================== samples/bpf: make BPF programs more libbpf aware The existing tracing programs have been developed for a considerable period of time and, as a result, do not properly incorporate the features of the current libbpf, such as CO-RE. This is evident in frequent usage of functions like PT_REGS* and the persistence of "hack" methods using underscore-style bpf_probe_read_kernel from the past. These programs are far behind the current level of libbpf and can potentially confuse users. The kernel has undergone significant changes, and some of these changes have broken these programs, but on the other hand, more robust APIs have been developed for increased stableness. To list some of the kernel changes that this patch set is focusing on, - symbol mismatch occurs due to compiler optimization [1] - inline of blk_account_io* breaks BPF kprobe program [2] - new tracepoints for the block_io_start/done are introduced [3] - map lookup probes can't be triggered (bpf_disable_instrumentation)[4] - BPF_KSYSCALL has been introduced to simplify argument fetching [5] - convert to vmlinux.h and use tp argument structure within it - make tracing programs to be more CO-RE centric In this regard, this patch set aims not only to integrate the latest features of libbpf into BPF programs but also to reduce confusion and clarify the BPF programs. This will help with the potential confusion among users and make the programs more intutitive. [1]: iovisor/bcc#1754 [2]: iovisor/bcc#4261 [3]: commit 5a80bd0 ("block: introduce block_io_start/block_io_done tracepoints") [4]: commit 7c4cd05 ("bpf: Fix syscall's stackmap lookup potential deadlock") [5]: commit 6f5d467 ("libbpf: improve BPF_KPROBE_SYSCALL macro and rename it to BPF_KSYSCALL") ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 5bebd3e + 456d535 commit acfadf2

21 files changed

+117
-171
lines changed

samples/bpf/Makefile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,21 +124,21 @@ always-y := $(tprogs-y)
124124
always-y += sockex1_kern.o
125125
always-y += sockex2_kern.o
126126
always-y += sockex3_kern.o
127-
always-y += tracex1_kern.o
127+
always-y += tracex1.bpf.o
128128
always-y += tracex2.bpf.o
129-
always-y += tracex3_kern.o
130-
always-y += tracex4_kern.o
131-
always-y += tracex5_kern.o
132-
always-y += tracex6_kern.o
133-
always-y += tracex7_kern.o
129+
always-y += tracex3.bpf.o
130+
always-y += tracex4.bpf.o
131+
always-y += tracex5.bpf.o
132+
always-y += tracex6.bpf.o
133+
always-y += tracex7.bpf.o
134134
always-y += sock_flags.bpf.o
135135
always-y += test_probe_write_user.bpf.o
136136
always-y += trace_output.bpf.o
137137
always-y += tcbpf1_kern.o
138138
always-y += tc_l2_redirect_kern.o
139139
always-y += lathist_kern.o
140-
always-y += offwaketime_kern.o
141-
always-y += spintest_kern.o
140+
always-y += offwaketime.bpf.o
141+
always-y += spintest.bpf.o
142142
always-y += map_perf_test.bpf.o
143143
always-y += test_overhead_tp.bpf.o
144144
always-y += test_overhead_raw_tp.bpf.o
@@ -333,7 +333,7 @@ $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
333333
$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
334334
$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h
335335

336-
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
336+
$(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h
337337
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
338338
$(obj)/hbm.o: $(src)/hbm.h
339339
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
@@ -440,7 +440,7 @@ $(obj)/%.o: $(src)/%.c
440440
-Wno-gnu-variable-sized-type-not-at-end \
441441
-Wno-address-of-packed-member -Wno-tautological-compare \
442442
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
443-
-fno-asynchronous-unwind-tables \
443+
-fno-asynchronous-unwind-tables -fcf-protection \
444444
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
445445
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
446446
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \

samples/bpf/net_shared.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#define TC_ACT_OK 0
1818
#define TC_ACT_SHOT 2
1919

20+
#define IFNAMSIZ 16
21+
2022
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
2123
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
2224
#define bpf_ntohs(x) __builtin_bswap16(x)

samples/bpf/offwaketime_kern.c renamed to samples/bpf/offwaketime.bpf.c

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,15 @@
44
* modify it under the terms of version 2 of the GNU General Public
55
* License as published by the Free Software Foundation.
66
*/
7-
#include <uapi/linux/bpf.h>
8-
#include <uapi/linux/ptrace.h>
9-
#include <uapi/linux/perf_event.h>
7+
#include "vmlinux.h"
108
#include <linux/version.h>
11-
#include <linux/sched.h>
129
#include <bpf/bpf_helpers.h>
1310
#include <bpf/bpf_tracing.h>
11+
#include <bpf/bpf_core_read.h>
1412

15-
#define _(P) \
16-
({ \
17-
typeof(P) val; \
18-
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
19-
val; \
20-
})
13+
#ifndef PERF_MAX_STACK_DEPTH
14+
#define PERF_MAX_STACK_DEPTH 127
15+
#endif
2116

2217
#define MINBLOCK_US 1
2318
#define MAX_ENTRIES 10000
@@ -67,11 +62,9 @@ struct {
6762
SEC("kprobe/try_to_wake_up")
6863
int waker(struct pt_regs *ctx)
6964
{
70-
struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
65+
struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
66+
u32 pid = BPF_CORE_READ(p, pid);
7167
struct wokeby_t woke;
72-
u32 pid;
73-
74-
pid = _(p->pid);
7568

7669
bpf_get_current_comm(&woke.name, sizeof(woke.name));
7770
woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
@@ -111,28 +104,18 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
111104

112105
#if 1
113106
/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
114-
struct sched_switch_args {
115-
unsigned long long pad;
116-
char prev_comm[TASK_COMM_LEN];
117-
int prev_pid;
118-
int prev_prio;
119-
long long prev_state;
120-
char next_comm[TASK_COMM_LEN];
121-
int next_pid;
122-
int next_prio;
123-
};
124107
SEC("tracepoint/sched/sched_switch")
125-
int oncpu(struct sched_switch_args *ctx)
108+
int oncpu(struct trace_event_raw_sched_switch *ctx)
126109
{
127110
/* record previous thread sleep time */
128111
u32 pid = ctx->prev_pid;
129112
#else
130-
SEC("kprobe/finish_task_switch")
113+
SEC("kprobe.multi/finish_task_switch*")
131114
int oncpu(struct pt_regs *ctx)
132115
{
133-
struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
116+
struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
134117
/* record previous thread sleep time */
135-
u32 pid = _(p->pid);
118+
u32 pid = BPF_CORE_READ(p, pid);
136119
#endif
137120
u64 delta, ts, *tsp;
138121

samples/bpf/offwaketime_user.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ int main(int argc, char **argv)
105105
return 2;
106106
}
107107

108-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
108+
snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
109109
obj = bpf_object__open_file(filename, NULL);
110110
if (libbpf_get_error(obj)) {
111111
fprintf(stderr, "ERROR: opening BPF object file failed\n");

samples/bpf/spintest_kern.c renamed to samples/bpf/spintest.bpf.c

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
* modify it under the terms of version 2 of the GNU General Public
55
* License as published by the Free Software Foundation.
66
*/
7-
#include <linux/skbuff.h>
8-
#include <linux/netdevice.h>
7+
#include "vmlinux.h"
98
#include <linux/version.h>
10-
#include <uapi/linux/bpf.h>
11-
#include <uapi/linux/perf_event.h>
129
#include <bpf/bpf_helpers.h>
1310
#include <bpf/bpf_tracing.h>
1411

12+
#ifndef PERF_MAX_STACK_DEPTH
13+
#define PERF_MAX_STACK_DEPTH 127
14+
#endif
15+
1516
struct {
1617
__uint(type, BPF_MAP_TYPE_HASH);
1718
__type(key, long);
@@ -46,20 +47,10 @@ int foo(struct pt_regs *ctx) \
4647
}
4748

4849
/* add kprobes to all possible *spin* functions */
49-
SEC("kprobe/spin_unlock")PROG(p1)
50-
SEC("kprobe/spin_lock")PROG(p2)
51-
SEC("kprobe/mutex_spin_on_owner")PROG(p3)
52-
SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
53-
SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
54-
SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
55-
SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
56-
SEC("kprobe/_raw_spin_unlock")PROG(p8)
57-
SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
58-
SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
59-
SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
60-
SEC("kprobe/_raw_spin_trylock")PROG(p12)
61-
SEC("kprobe/_raw_spin_lock")PROG(p13)
62-
SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
50+
SEC("kprobe.multi/spin_*lock*")PROG(spin_lock)
51+
SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner)
52+
SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock)
53+
6354
/* and to inner bpf helpers */
6455
SEC("kprobe/htab_map_update_elem")PROG(p15)
6556
SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)

samples/bpf/spintest_user.c

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,20 @@
99

1010
int main(int ac, char **argv)
1111
{
12-
char filename[256], symbol[256];
1312
struct bpf_object *obj = NULL;
1413
struct bpf_link *links[20];
1514
long key, next_key, value;
1615
struct bpf_program *prog;
1716
int map_fd, i, j = 0;
18-
const char *section;
17+
char filename[256];
1918
struct ksym *sym;
2019

2120
if (load_kallsyms()) {
2221
printf("failed to process /proc/kallsyms\n");
2322
return 2;
2423
}
2524

26-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
25+
snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
2726
obj = bpf_object__open_file(filename, NULL);
2827
if (libbpf_get_error(obj)) {
2928
fprintf(stderr, "ERROR: opening BPF object file failed\n");
@@ -44,20 +43,13 @@ int main(int ac, char **argv)
4443
}
4544

4645
bpf_object__for_each_program(prog, obj) {
47-
section = bpf_program__section_name(prog);
48-
if (sscanf(section, "kprobe/%s", symbol) != 1)
49-
continue;
50-
51-
/* Attach prog only when symbol exists */
52-
if (ksym_get_addr(symbol)) {
53-
links[j] = bpf_program__attach(prog);
54-
if (libbpf_get_error(links[j])) {
55-
fprintf(stderr, "bpf_program__attach failed\n");
56-
links[j] = NULL;
57-
goto cleanup;
58-
}
59-
j++;
46+
links[j] = bpf_program__attach(prog);
47+
if (libbpf_get_error(links[j])) {
48+
fprintf(stderr, "bpf_program__attach failed\n");
49+
links[j] = NULL;
50+
goto cleanup;
6051
}
52+
j++;
6153
}
6254

6355
for (i = 0; i < 5; i++) {

samples/bpf/test_map_in_map.bpf.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,15 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
103103
return result ? *result : -ENOENT;
104104
}
105105

106-
SEC("kprobe/__sys_connect")
107-
int trace_sys_connect(struct pt_regs *ctx)
106+
SEC("ksyscall/connect")
107+
int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen)
108108
{
109-
struct sockaddr_in6 *in6;
110109
u16 test_case, port, dst6[8];
111-
int addrlen, ret, inline_ret, ret_key = 0;
110+
int ret, inline_ret, ret_key = 0;
112111
u32 port_key;
113112
void *outer_map, *inner_map;
114113
bool inline_hash = false;
115114

116-
in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
117-
addrlen = (int)PT_REGS_PARM3_CORE(ctx);
118-
119115
if (addrlen != sizeof(*in6))
120116
return 0;
121117

samples/bpf/test_overhead_kprobe.bpf.c

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,7 @@
88
#include <linux/version.h>
99
#include <bpf/bpf_helpers.h>
1010
#include <bpf/bpf_tracing.h>
11-
12-
#define _(P) \
13-
({ \
14-
typeof(P) val = 0; \
15-
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
16-
val; \
17-
})
11+
#include <bpf/bpf_core_read.h>
1812

1913
SEC("kprobe/__set_task_comm")
2014
int prog(struct pt_regs *ctx)
@@ -26,14 +20,14 @@ int prog(struct pt_regs *ctx)
2620
u16 oom_score_adj;
2721
u32 pid;
2822

29-
tsk = (void *)PT_REGS_PARM1(ctx);
23+
tsk = (void *)PT_REGS_PARM1_CORE(ctx);
3024

31-
pid = _(tsk->pid);
32-
bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
33-
bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
25+
pid = BPF_CORE_READ(tsk, pid);
26+
bpf_core_read_str(oldcomm, sizeof(oldcomm), &tsk->comm);
27+
bpf_core_read_str(newcomm, sizeof(newcomm),
3428
(void *)PT_REGS_PARM2(ctx));
35-
signal = _(tsk->signal);
36-
oom_score_adj = _(signal->oom_score_adj);
29+
signal = BPF_CORE_READ(tsk, signal);
30+
oom_score_adj = BPF_CORE_READ(signal, oom_score_adj);
3731
return 0;
3832
}
3933

samples/bpf/test_overhead_tp.bpf.c

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,15 @@
88
#include <bpf/bpf_helpers.h>
99

1010
/* from /sys/kernel/tracing/events/task/task_rename/format */
11-
struct task_rename {
12-
__u64 pad;
13-
__u32 pid;
14-
char oldcomm[TASK_COMM_LEN];
15-
char newcomm[TASK_COMM_LEN];
16-
__u16 oom_score_adj;
17-
};
1811
SEC("tracepoint/task/task_rename")
19-
int prog(struct task_rename *ctx)
12+
int prog(struct trace_event_raw_task_rename *ctx)
2013
{
2114
return 0;
2215
}
2316

2417
/* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */
25-
struct fib_table_lookup {
26-
__u64 pad;
27-
__u32 tb_id;
28-
int err;
29-
int oif;
30-
int iif;
31-
__u8 proto;
32-
__u8 tos;
33-
__u8 scope;
34-
__u8 flags;
35-
__u8 src[4];
36-
__u8 dst[4];
37-
__u8 gw4[4];
38-
__u8 gw6[16];
39-
__u16 sport;
40-
__u16 dport;
41-
char name[16];
42-
};
4318
SEC("tracepoint/fib/fib_table_lookup")
44-
int prog2(struct fib_table_lookup *ctx)
19+
int prog2(struct trace_event_raw_fib_table_lookup *ctx)
4520
{
4621
return 0;
4722
}

samples/bpf/tracex1_kern.c renamed to samples/bpf/tracex1.bpf.c

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,35 @@
44
* modify it under the terms of version 2 of the GNU General Public
55
* License as published by the Free Software Foundation.
66
*/
7-
#include <linux/skbuff.h>
8-
#include <linux/netdevice.h>
9-
#include <uapi/linux/bpf.h>
7+
#include "vmlinux.h"
8+
#include "net_shared.h"
109
#include <linux/version.h>
1110
#include <bpf/bpf_helpers.h>
11+
#include <bpf/bpf_core_read.h>
1212
#include <bpf/bpf_tracing.h>
1313

14-
#define _(P) \
15-
({ \
16-
typeof(P) val = 0; \
17-
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
18-
val; \
19-
})
20-
2114
/* kprobe is NOT a stable ABI
2215
* kernel functions can be removed, renamed or completely change semantics.
2316
* Number of arguments and their positions can change, etc.
2417
* In such case this bpf+kprobe example will no longer be meaningful
2518
*/
26-
SEC("kprobe/__netif_receive_skb_core")
19+
SEC("kprobe.multi/__netif_receive_skb_core*")
2720
int bpf_prog1(struct pt_regs *ctx)
2821
{
2922
/* attaches to kprobe __netif_receive_skb_core,
3023
* looks for packets on loobpack device and prints them
24+
* (wildcard is used for avoiding symbol mismatch due to optimization)
3125
*/
3226
char devname[IFNAMSIZ];
3327
struct net_device *dev;
3428
struct sk_buff *skb;
3529
int len;
3630

37-
/* non-portable! works for the given kernel only */
38-
bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
39-
dev = _(skb->dev);
40-
len = _(skb->len);
31+
bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
32+
dev = BPF_CORE_READ(skb, dev);
33+
len = BPF_CORE_READ(skb, len);
4134

42-
bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
35+
BPF_CORE_READ_STR_INTO(&devname, dev, name);
4336

4437
if (devname[0] == 'l' && devname[1] == 'o') {
4538
char fmt[] = "skb %p len %d\n";

0 commit comments

Comments
 (0)