@@ -124,6 +124,12 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
124124 return ERR_PTR (err );
125125}
126126
127+ static int fetch_build_id (struct vm_area_struct * vma , unsigned char * build_id , bool may_fault )
128+ {
129+ return may_fault ? build_id_parse (vma , build_id , NULL )
130+ : build_id_parse_nofault (vma , build_id , NULL );
131+ }
132+
127133/*
128134 * Expects all id_offs[i].ip values to be set to correct initial IPs.
129135 * They will be subsequently:
@@ -135,7 +141,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
135141 * BPF_STACK_BUILD_ID_IP.
136142 */
137143static void stack_map_get_build_id_offset (struct bpf_stack_build_id * id_offs ,
138- u32 trace_nr , bool user )
144+ u32 trace_nr , bool user , bool may_fault )
139145{
140146 int i ;
141147 struct mmap_unlock_irq_work * work = NULL ;
@@ -166,7 +172,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
166172 goto build_id_valid ;
167173 }
168174 vma = find_vma (current -> mm , ip );
169- if (!vma || build_id_parse_nofault (vma , id_offs [i ].build_id , NULL )) {
175+ if (!vma || fetch_build_id (vma , id_offs [i ].build_id , may_fault )) {
170176 /* per entry fall back to ips */
171177 id_offs [i ].status = BPF_STACK_BUILD_ID_IP ;
172178 memset (id_offs [i ].build_id , 0 , BUILD_ID_SIZE_MAX );
@@ -257,7 +263,7 @@ static long __bpf_get_stackid(struct bpf_map *map,
257263 id_offs = (struct bpf_stack_build_id * )new_bucket -> data ;
258264 for (i = 0 ; i < trace_nr ; i ++ )
259265 id_offs [i ].ip = ips [i ];
260- stack_map_get_build_id_offset (id_offs , trace_nr , user );
266+ stack_map_get_build_id_offset (id_offs , trace_nr , user , false /* !may_fault */ );
261267 trace_len = trace_nr * sizeof (struct bpf_stack_build_id );
262268 if (hash_matches && bucket -> nr == trace_nr &&
263269 memcmp (bucket -> data , new_bucket -> data , trace_len ) == 0 ) {
@@ -398,7 +404,7 @@ const struct bpf_func_proto bpf_get_stackid_proto_pe = {
398404
399405static long __bpf_get_stack (struct pt_regs * regs , struct task_struct * task ,
400406 struct perf_callchain_entry * trace_in ,
401- void * buf , u32 size , u64 flags )
407+ void * buf , u32 size , u64 flags , bool may_fault )
402408{
403409 u32 trace_nr , copy_len , elem_size , num_elem , max_depth ;
404410 bool user_build_id = flags & BPF_F_USER_BUILD_ID ;
@@ -416,8 +422,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
416422 if (kernel && user_build_id )
417423 goto clear ;
418424
419- elem_size = (user && user_build_id ) ? sizeof (struct bpf_stack_build_id )
420- : sizeof (u64 );
425+ elem_size = user_build_id ? sizeof (struct bpf_stack_build_id ) : sizeof (u64 );
421426 if (unlikely (size % elem_size ))
422427 goto clear ;
423428
@@ -438,35 +443,45 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
438443 if (sysctl_perf_event_max_stack < max_depth )
439444 max_depth = sysctl_perf_event_max_stack ;
440445
446+ if (may_fault )
447+ rcu_read_lock (); /* need RCU for perf's callchain below */
448+
441449 if (trace_in )
442450 trace = trace_in ;
443451 else if (kernel && task )
444452 trace = get_callchain_entry_for_task (task , max_depth );
445453 else
446454 trace = get_perf_callchain (regs , 0 , kernel , user , max_depth ,
447455 crosstask , false);
448- if (unlikely (!trace ))
449- goto err_fault ;
450456
451- if (trace -> nr < skip )
457+ if (unlikely (!trace ) || trace -> nr < skip ) {
458+ if (may_fault )
459+ rcu_read_unlock ();
452460 goto err_fault ;
461+ }
453462
454463 trace_nr = trace -> nr - skip ;
455464 trace_nr = (trace_nr <= num_elem ) ? trace_nr : num_elem ;
456465 copy_len = trace_nr * elem_size ;
457466
458467 ips = trace -> ip + skip ;
459- if (user && user_build_id ) {
468+ if (user_build_id ) {
460469 struct bpf_stack_build_id * id_offs = buf ;
461470 u32 i ;
462471
463472 for (i = 0 ; i < trace_nr ; i ++ )
464473 id_offs [i ].ip = ips [i ];
465- stack_map_get_build_id_offset (buf , trace_nr , user );
466474 } else {
467475 memcpy (buf , ips , copy_len );
468476 }
469477
478+ /* trace/ips should not be dereferenced after this point */
479+ if (may_fault )
480+ rcu_read_unlock ();
481+
482+ if (user_build_id )
483+ stack_map_get_build_id_offset (buf , trace_nr , user , may_fault );
484+
470485 if (size > copy_len )
471486 memset (buf + copy_len , 0 , size - copy_len );
472487 return copy_len ;
@@ -481,7 +496,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
481496BPF_CALL_4 (bpf_get_stack , struct pt_regs * , regs , void * , buf , u32 , size ,
482497 u64 , flags )
483498{
484- return __bpf_get_stack (regs , NULL , NULL , buf , size , flags );
499+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , false /* !may_fault */ );
485500}
486501
487502const struct bpf_func_proto bpf_get_stack_proto = {
@@ -494,8 +509,24 @@ const struct bpf_func_proto bpf_get_stack_proto = {
494509 .arg4_type = ARG_ANYTHING ,
495510};
496511
497- BPF_CALL_4 (bpf_get_task_stack , struct task_struct * , task , void * , buf ,
498- u32 , size , u64 , flags )
512+ BPF_CALL_4 (bpf_get_stack_sleepable , struct pt_regs * , regs , void * , buf , u32 , size ,
513+ u64 , flags )
514+ {
515+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , true /* may_fault */ );
516+ }
517+
518+ const struct bpf_func_proto bpf_get_stack_sleepable_proto = {
519+ .func = bpf_get_stack_sleepable ,
520+ .gpl_only = true,
521+ .ret_type = RET_INTEGER ,
522+ .arg1_type = ARG_PTR_TO_CTX ,
523+ .arg2_type = ARG_PTR_TO_UNINIT_MEM ,
524+ .arg3_type = ARG_CONST_SIZE_OR_ZERO ,
525+ .arg4_type = ARG_ANYTHING ,
526+ };
527+
528+ static long __bpf_get_task_stack (struct task_struct * task , void * buf , u32 size ,
529+ u64 flags , bool may_fault )
499530{
500531 struct pt_regs * regs ;
501532 long res = - EINVAL ;
@@ -505,12 +536,18 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
505536
506537 regs = task_pt_regs (task );
507538 if (regs )
508- res = __bpf_get_stack (regs , task , NULL , buf , size , flags );
539+ res = __bpf_get_stack (regs , task , NULL , buf , size , flags , may_fault );
509540 put_task_stack (task );
510541
511542 return res ;
512543}
513544
545+ BPF_CALL_4 (bpf_get_task_stack , struct task_struct * , task , void * , buf ,
546+ u32 , size , u64 , flags )
547+ {
548+ return __bpf_get_task_stack (task , buf , size , flags , false /* !may_fault */ );
549+ }
550+
514551const struct bpf_func_proto bpf_get_task_stack_proto = {
515552 .func = bpf_get_task_stack ,
516553 .gpl_only = false,
@@ -522,6 +559,23 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
522559 .arg4_type = ARG_ANYTHING ,
523560};
524561
562+ BPF_CALL_4 (bpf_get_task_stack_sleepable , struct task_struct * , task , void * , buf ,
563+ u32 , size , u64 , flags )
564+ {
565+ return __bpf_get_task_stack (task , buf , size , flags , true /* !may_fault */ );
566+ }
567+
568+ const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = {
569+ .func = bpf_get_task_stack_sleepable ,
570+ .gpl_only = false,
571+ .ret_type = RET_INTEGER ,
572+ .arg1_type = ARG_PTR_TO_BTF_ID ,
573+ .arg1_btf_id = & btf_tracing_ids [BTF_TRACING_TYPE_TASK ],
574+ .arg2_type = ARG_PTR_TO_UNINIT_MEM ,
575+ .arg3_type = ARG_CONST_SIZE_OR_ZERO ,
576+ .arg4_type = ARG_ANYTHING ,
577+ };
578+
525579BPF_CALL_4 (bpf_get_stack_pe , struct bpf_perf_event_data_kern * , ctx ,
526580 void * , buf , u32 , size , u64 , flags )
527581{
@@ -533,7 +587,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
533587 __u64 nr_kernel ;
534588
535589 if (!(event -> attr .sample_type & PERF_SAMPLE_CALLCHAIN ))
536- return __bpf_get_stack (regs , NULL , NULL , buf , size , flags );
590+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , false /* !may_fault */ );
537591
538592 if (unlikely (flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
539593 BPF_F_USER_BUILD_ID )))
@@ -553,7 +607,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
553607 __u64 nr = trace -> nr ;
554608
555609 trace -> nr = nr_kernel ;
556- err = __bpf_get_stack (regs , NULL , trace , buf , size , flags );
610+ err = __bpf_get_stack (regs , NULL , trace , buf , size , flags , false /* !may_fault */ );
557611
558612 /* restore nr */
559613 trace -> nr = nr ;
@@ -565,7 +619,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
565619 goto clear ;
566620
567621 flags = (flags & ~BPF_F_SKIP_FIELD_MASK ) | skip ;
568- err = __bpf_get_stack (regs , NULL , trace , buf , size , flags );
622+ err = __bpf_get_stack (regs , NULL , trace , buf , size , flags , false /* !may_fault */ );
569623 }
570624 return err ;
571625
0 commit comments