Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 40 additions & 15 deletions kernel/bpf/stackmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,28 @@ static inline int stack_map_data_size(struct bpf_map *map)
sizeof(struct bpf_stack_build_id) : sizeof(u64);
}

/**
* stack_map_calculate_max_depth - Calculate maximum allowed stack trace depth
* @size: Size of the buffer/map value in bytes
* @elem_size: Size of each stack trace element
* @flags: BPF stack trace flags (BPF_F_USER_STACK, BPF_F_USER_BUILD_ID, ...)
*
* Return: Maximum number of stack trace entries that can be safely stored
*/
static u32 stack_map_calculate_max_depth(u32 size, u32 elem_size, u64 flags)
{
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 max_depth;
u32 curr_sysctl_max_stack = READ_ONCE(sysctl_perf_event_max_stack);

max_depth = size / elem_size;
max_depth += skip;
if (max_depth > curr_sysctl_max_stack)
return curr_sysctl_max_stack;

return max_depth;
}

static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
{
u64 elem_size = sizeof(struct stack_map_bucket) +
Expand Down Expand Up @@ -300,20 +322,17 @@ static long __bpf_get_stackid(struct bpf_map *map,
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags)
{
u32 max_depth = map->value_size / stack_map_data_size(map);
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 elem_size = stack_map_data_size(map);
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
u32 max_depth;

if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
return -EINVAL;

max_depth += skip;
if (max_depth > sysctl_perf_event_max_stack)
max_depth = sysctl_perf_event_max_stack;

max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags);
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
false, false);

Expand Down Expand Up @@ -350,6 +369,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
{
struct perf_event *event = ctx->event;
struct perf_callchain_entry *trace;
u32 elem_size, max_depth;
bool kernel, user;
__u64 nr_kernel;
int ret;
Expand All @@ -371,11 +391,15 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
return -EFAULT;

nr_kernel = count_kernel_ip(trace);
elem_size = stack_map_data_size(map);

if (kernel) {
__u64 nr = trace->nr;

trace->nr = nr_kernel;
max_depth =
stack_map_calculate_max_depth(map->value_size, elem_size, flags);
trace->nr = min_t(u32, nr_kernel, max_depth);
ret = __bpf_get_stackid(map, trace, flags);

/* restore nr */
Expand All @@ -388,6 +412,9 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
return -EFAULT;

flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
max_depth =
stack_map_calculate_max_depth(map->value_size, elem_size, flags);
trace->nr = min_t(u32, trace->nr, max_depth);
ret = __bpf_get_stackid(map, trace, flags);
}
return ret;
Expand All @@ -406,8 +433,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags, bool may_fault)
{
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 trace_nr, copy_len, elem_size, max_depth;
bool crosstask = task && task != current;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
Expand Down Expand Up @@ -438,21 +465,20 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
goto clear;
}

num_elem = size / elem_size;
max_depth = num_elem + skip;
if (sysctl_perf_event_max_stack < max_depth)
max_depth = sysctl_perf_event_max_stack;
max_depth = stack_map_calculate_max_depth(size, elem_size, flags);

if (may_fault)
rcu_read_lock(); /* need RCU for perf's callchain below */

if (trace_in)
if (trace_in) {
trace = trace_in;
else if (kernel && task)
trace->nr = min_t(u32, trace->nr, max_depth);
} else if (kernel && task) {
trace = get_callchain_entry_for_task(task, max_depth);
else
} else {
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
crosstask, false);
}

if (unlikely(!trace) || trace->nr < skip) {
if (may_fault)
Expand All @@ -461,7 +487,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
}

trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;

ips = trace->ip + skip;
Expand Down
Loading