@@ -31,6 +31,55 @@ struct bpf_stack_map {
3131 struct stack_map_bucket * buckets [] __counted_by (n_buckets );
3232};
3333
34+ struct bpf_perf_callchain_entry {
35+ u64 nr ;
36+ u64 ip [PERF_MAX_STACK_DEPTH ];
37+ };
38+
39+ #define MAX_PERF_CALLCHAIN_PREEMPT 3
40+ static DEFINE_PER_CPU (struct bpf_perf_callchain_entry [MAX_PERF_CALLCHAIN_PREEMPT ],
41+ bpf_perf_callchain_entries ) ;
42+ static DEFINE_PER_CPU (int , bpf_perf_callchain_preempt_cnt ) ;
43+
44+ static int bpf_get_perf_callchain (struct bpf_perf_callchain_entry * * entry ,
45+ struct pt_regs * regs , u32 init_nr , bool kernel ,
46+ bool user , u32 max_stack , bool crosstack ,
47+ bool add_mark )
48+ {
49+ struct bpf_perf_callchain_entry * bpf_entry ;
50+ struct perf_callchain_entry * perf_entry ;
51+ int preempt_cnt ;
52+
53+ preempt_cnt = this_cpu_inc_return (bpf_perf_callchain_preempt_cnt );
54+ if (WARN_ON_ONCE (preempt_cnt > MAX_PERF_CALLCHAIN_PREEMPT )) {
55+ this_cpu_dec (bpf_perf_callchain_preempt_cnt );
56+ return - EBUSY ;
57+ }
58+
59+ bpf_entry = this_cpu_ptr (& bpf_perf_callchain_entries [preempt_cnt - 1 ]);
60+
61+ preempt_disable ();
62+ perf_entry = get_perf_callchain (regs , init_nr , kernel , user , max_stack ,
63+ crosstack , add_mark );
64+ if (unlikely (!perf_entry )) {
65+ preempt_enable ();
66+ this_cpu_dec (bpf_perf_callchain_preempt_cnt );
67+ return - EFAULT ;
68+ }
69+ memcpy (bpf_entry , perf_entry , sizeof (u64 ) * (perf_entry -> nr + 1 ));
70+ * entry = bpf_entry ;
71+ preempt_enable ();
72+
73+ return 0 ;
74+ }
75+
76+ static void bpf_put_perf_callchain (void )
77+ {
78+ if (WARN_ON_ONCE (this_cpu_read (bpf_perf_callchain_preempt_cnt ) == 0 ))
79+ return ;
80+ this_cpu_dec (bpf_perf_callchain_preempt_cnt );
81+ }
82+
3483static inline bool stack_map_use_build_id (struct bpf_map * map )
3584{
3685 return (map -> map_flags & BPF_F_STACK_BUILD_ID );
@@ -303,8 +352,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
303352 u32 max_depth = map -> value_size / stack_map_data_size (map );
304353 u32 skip = flags & BPF_F_SKIP_FIELD_MASK ;
305354 bool user = flags & BPF_F_USER_STACK ;
306- struct perf_callchain_entry * trace ;
355+ struct bpf_perf_callchain_entry * trace ;
307356 bool kernel = !user ;
357+ int err ;
308358
309359 if (unlikely (flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
310360 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID )))
@@ -314,14 +364,15 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
314364 if (max_depth > sysctl_perf_event_max_stack )
315365 max_depth = sysctl_perf_event_max_stack ;
316366
317- trace = get_perf_callchain (regs , 0 , kernel , user , max_depth ,
318- false, false);
367+ err = bpf_get_perf_callchain (& trace , regs , 0 , kernel , user , max_depth ,
368+ false, false);
369+ if (err )
370+ return err ;
319371
320- if (unlikely (!trace ))
321- /* couldn't fetch the stack trace */
322- return - EFAULT ;
372+ err = __bpf_get_stackid (map , (struct perf_callchain_entry * )trace , flags );
373+ bpf_put_perf_callchain ();
323374
324- return __bpf_get_stackid ( map , trace , flags ) ;
375+ return err ;
325376}
326377
327378const struct bpf_func_proto bpf_get_stackid_proto = {
@@ -443,8 +494,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
443494 if (sysctl_perf_event_max_stack < max_depth )
444495 max_depth = sysctl_perf_event_max_stack ;
445496
446- if (may_fault )
447- rcu_read_lock (); /* need RCU for perf's callchain below */
497+ preempt_disable ();
448498
449499 if (trace_in )
450500 trace = trace_in ;
@@ -455,8 +505,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
455505 crosstask , false);
456506
457507 if (unlikely (!trace ) || trace -> nr < skip ) {
458- if (may_fault )
459- rcu_read_unlock ();
508+ preempt_enable ();
460509 goto err_fault ;
461510 }
462511
@@ -474,10 +523,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
474523 } else {
475524 memcpy (buf , ips , copy_len );
476525 }
477-
478- /* trace/ips should not be dereferenced after this point */
479- if (may_fault )
480- rcu_read_unlock ();
526+ preempt_enable ();
481527
482528 if (user_build_id )
483529 stack_map_get_build_id_offset (buf , trace_nr , user , may_fault );
0 commit comments