@@ -31,6 +31,52 @@ struct bpf_stack_map {
3131 struct stack_map_bucket * buckets [] __counted_by (n_buckets );
3232};
3333
34+ struct bpf_perf_callchain_entry {
35+ u64 nr ;
36+ u64 ip [PERF_MAX_STACK_DEPTH ];
37+ };
38+
39+ #define MAX_PERF_CALLCHAIN_PREEMPT 3
40+ static DEFINE_PER_CPU (struct bpf_perf_callchain_entry [MAX_PERF_CALLCHAIN_PREEMPT ],
41+ bpf_perf_callchain_entries ) ;
42+ static DEFINE_PER_CPU (int , bpf_perf_callchain_preempt_cnt ) ;
43+
44+ static int bpf_get_perf_callchain_or_entry (struct perf_callchain_entry * * entry ,
45+ struct pt_regs * regs , bool kernel ,
46+ bool user , u32 max_stack , bool crosstack ,
47+ bool add_mark , bool get_callchain )
48+ {
49+ struct bpf_perf_callchain_entry * bpf_entry ;
50+ struct perf_callchain_entry * perf_entry ;
51+ int preempt_cnt ;
52+
53+ preempt_cnt = this_cpu_inc_return (bpf_perf_callchain_preempt_cnt );
54+ if (WARN_ON_ONCE (preempt_cnt > MAX_PERF_CALLCHAIN_PREEMPT )) {
55+ this_cpu_dec (bpf_perf_callchain_preempt_cnt );
56+ return - EBUSY ;
57+ }
58+
59+ bpf_entry = this_cpu_ptr (& bpf_perf_callchain_entries [preempt_cnt - 1 ]);
60+ if (!get_callchain ) {
61+ * entry = (struct perf_callchain_entry * )bpf_entry ;
62+ return 0 ;
63+ }
64+
65+ perf_entry = get_perf_callchain (regs , (struct perf_callchain_entry * )bpf_entry ,
66+ kernel , user , max_stack ,
67+ crosstack , add_mark );
68+ * entry = perf_entry ;
69+
70+ return 0 ;
71+ }
72+
73+ static void bpf_put_perf_callchain (void )
74+ {
75+ if (WARN_ON_ONCE (this_cpu_read (bpf_perf_callchain_preempt_cnt ) == 0 ))
76+ return ;
77+ this_cpu_dec (bpf_perf_callchain_preempt_cnt );
78+ }
79+
3480static inline bool stack_map_use_build_id (struct bpf_map * map )
3581{
3682 return (map -> map_flags & BPF_F_STACK_BUILD_ID );
@@ -192,11 +238,11 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
192238{
193239#ifdef CONFIG_STACKTRACE
194240 struct perf_callchain_entry * entry ;
195- int rctx ;
196-
197- entry = get_callchain_entry (& rctx );
241+ int ret ;
198242
199- if (!entry )
243+ ret = bpf_get_perf_callchain_or_entry (& entry , NULL , false, false, 0 , false, false,
244+ false);
245+ if (ret )
200246 return NULL ;
201247
202248 entry -> nr = stack_trace_save_tsk (task , (unsigned long * )entry -> ip ,
@@ -216,7 +262,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
216262 to [i ] = (u64 )(from [i ]);
217263 }
218264
219- put_callchain_entry ( rctx );
265+ bpf_put_perf_callchain ( );
220266
221267 return entry ;
222268#else /* CONFIG_STACKTRACE */
@@ -305,6 +351,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
305351 bool user = flags & BPF_F_USER_STACK ;
306352 struct perf_callchain_entry * trace ;
307353 bool kernel = !user ;
354+ int err ;
308355
309356 if (unlikely (flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
310357 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID )))
@@ -314,14 +361,15 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
314361 if (max_depth > sysctl_perf_event_max_stack )
315362 max_depth = sysctl_perf_event_max_stack ;
316363
317- trace = get_perf_callchain (regs , NULL , kernel , user , max_depth ,
318- false, false);
364+ err = bpf_get_perf_callchain_or_entry (& trace , regs , kernel , user , max_depth ,
365+ false, false, true);
366+ if (err )
367+ return err ;
319368
320- if (unlikely (!trace ))
321- /* couldn't fetch the stack trace */
322- return - EFAULT ;
369+ err = __bpf_get_stackid (map , trace , flags );
370+ bpf_put_perf_callchain ();
323371
324- return __bpf_get_stackid ( map , trace , flags ) ;
372+ return err ;
325373}
326374
327375const struct bpf_func_proto bpf_get_stackid_proto = {
@@ -443,20 +491,23 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
443491 if (sysctl_perf_event_max_stack < max_depth )
444492 max_depth = sysctl_perf_event_max_stack ;
445493
446- if (may_fault )
447- rcu_read_lock (); /* need RCU for perf's callchain below */
448-
449494 if (trace_in )
450495 trace = trace_in ;
451- else if (kernel && task )
496+ else if (kernel && task ) {
452497 trace = get_callchain_entry_for_task (task , max_depth );
453- else
454- trace = get_perf_callchain (regs , NULL , kernel , user , max_depth ,
455- crosstask , false);
498+ } else {
499+ err = bpf_get_perf_callchain_or_entry (& trace , regs , kernel , user , max_depth ,
500+ false, false, true);
501+ if (err )
502+ return err ;
503+ }
504+
505+ if (unlikely (!trace ))
506+ goto err_fault ;
456507
457- if (unlikely (! trace ) || trace -> nr < skip ) {
458- if (may_fault )
459- rcu_read_unlock ();
508+ if (trace -> nr < skip ) {
509+ if (! trace_in )
510+ bpf_put_perf_callchain ();
460511 goto err_fault ;
461512 }
462513
@@ -475,9 +526,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
475526 memcpy (buf , ips , copy_len );
476527 }
477528
478- /* trace/ips should not be dereferenced after this point */
479- if (may_fault )
480- rcu_read_unlock ();
529+ if (!trace_in )
530+ bpf_put_perf_callchain ();
481531
482532 if (user_build_id )
483533 stack_map_get_build_id_offset (buf , trace_nr , user , may_fault );
0 commit comments