@@ -42,6 +42,28 @@ static inline int stack_map_data_size(struct bpf_map *map)
4242 sizeof (struct bpf_stack_build_id ) : sizeof (u64 );
4343}
4444
45+ /**
46+ * stack_map_calculate_max_depth - Calculate maximum allowed stack trace depth
47+ * @size: Size of the buffer/map value in bytes
48+ * @elem_size: Size of each stack trace element
49+ * @flags: BPF stack trace flags (BPF_F_USER_STACK, BPF_F_USER_BUILD_ID, ...)
50+ *
51+ * Return: Maximum number of stack trace entries that can be safely stored
52+ */
53+ static u32 stack_map_calculate_max_depth (u32 size , u32 elem_size , u64 flags )
54+ {
55+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK ;
56+ u32 max_depth ;
57+ u32 curr_sysctl_max_stack = READ_ONCE (sysctl_perf_event_max_stack );
58+
59+ max_depth = size / elem_size ;
60+ max_depth += skip ;
61+ if (max_depth > curr_sysctl_max_stack )
62+ return curr_sysctl_max_stack ;
63+
64+ return max_depth ;
65+ }
66+
4567static int prealloc_elems_and_freelist (struct bpf_stack_map * smap )
4668{
4769 u64 elem_size = sizeof (struct stack_map_bucket ) +
@@ -300,20 +322,17 @@ static long __bpf_get_stackid(struct bpf_map *map,
300322BPF_CALL_3 (bpf_get_stackid , struct pt_regs * , regs , struct bpf_map * , map ,
301323 u64 , flags )
302324{
303- u32 max_depth = map -> value_size / stack_map_data_size (map );
304- u32 skip = flags & BPF_F_SKIP_FIELD_MASK ;
325+ u32 elem_size = stack_map_data_size (map );
305326 bool user = flags & BPF_F_USER_STACK ;
306327 struct perf_callchain_entry * trace ;
307328 bool kernel = !user ;
329+ u32 max_depth ;
308330
309331 if (unlikely (flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
310332 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID )))
311333 return - EINVAL ;
312334
313- max_depth += skip ;
314- if (max_depth > sysctl_perf_event_max_stack )
315- max_depth = sysctl_perf_event_max_stack ;
316-
335+ max_depth = stack_map_calculate_max_depth (map -> value_size , elem_size , flags );
317336 trace = get_perf_callchain (regs , 0 , kernel , user , max_depth ,
318337 false, false);
319338
@@ -350,6 +369,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
350369{
351370 struct perf_event * event = ctx -> event ;
352371 struct perf_callchain_entry * trace ;
372+ u32 elem_size , max_depth ;
353373 bool kernel , user ;
354374 __u64 nr_kernel ;
355375 int ret ;
@@ -371,11 +391,14 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
371391 return - EFAULT ;
372392
373393 nr_kernel = count_kernel_ip (trace );
394+ elem_size = stack_map_data_size (map );
374395
375396 if (kernel ) {
376397 __u64 nr = trace -> nr ;
377398
378- trace -> nr = nr_kernel ;
399+ max_depth =
400+ stack_map_calculate_max_depth (map -> value_size , elem_size , flags );
401+ trace -> nr = min_t (u32 , nr_kernel , max_depth );
379402 ret = __bpf_get_stackid (map , trace , flags );
380403
381404 /* restore nr */
@@ -388,6 +411,9 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
388411 return - EFAULT ;
389412
390413 flags = (flags & ~BPF_F_SKIP_FIELD_MASK ) | skip ;
414+ max_depth =
415+ stack_map_calculate_max_depth (map -> value_size , elem_size , flags );
416+ trace -> nr = min_t (u32 , trace -> nr , max_depth );
391417 ret = __bpf_get_stackid (map , trace , flags );
392418 }
393419 return ret ;
@@ -406,8 +432,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
406432 struct perf_callchain_entry * trace_in ,
407433 void * buf , u32 size , u64 flags , bool may_fault )
408434{
409- u32 trace_nr , copy_len , elem_size , num_elem , max_depth ;
410435 bool user_build_id = flags & BPF_F_USER_BUILD_ID ;
436+ u32 trace_nr , copy_len , elem_size , max_depth ;
411437 bool crosstask = task && task != current ;
412438 u32 skip = flags & BPF_F_SKIP_FIELD_MASK ;
413439 bool user = flags & BPF_F_USER_STACK ;
@@ -438,21 +464,20 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
438464 goto clear ;
439465 }
440466
441- num_elem = size / elem_size ;
442- max_depth = num_elem + skip ;
443- if (sysctl_perf_event_max_stack < max_depth )
444- max_depth = sysctl_perf_event_max_stack ;
467+ max_depth = stack_map_calculate_max_depth (size , elem_size , flags );
445468
446469 if (may_fault )
447470 rcu_read_lock (); /* need RCU for perf's callchain below */
448471
449- if (trace_in )
472+ if (trace_in ) {
450473 trace = trace_in ;
451- else if (kernel && task )
474+ trace -> nr = min_t (u32 , trace -> nr , max_depth );
475+ } else if (kernel && task ) {
452476 trace = get_callchain_entry_for_task (task , max_depth );
453- else
477+ } else {
454478 trace = get_perf_callchain (regs , 0 , kernel , user , max_depth ,
455- crosstask , false);
479+ crosstask , false);
480+ }
456481
457482 if (unlikely (!trace ) || trace -> nr < skip ) {
458483 if (may_fault )
@@ -461,7 +486,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
461486 }
462487
463488 trace_nr = trace -> nr - skip ;
464- trace_nr = (trace_nr <= num_elem ) ? trace_nr : num_elem ;
465489 copy_len = trace_nr * elem_size ;
466490
467491 ips = trace -> ip + skip ;
0 commit comments