@@ -124,6 +124,12 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
124
124
return ERR_PTR (err );
125
125
}
126
126
127
+ static int fetch_build_id (struct vm_area_struct * vma , unsigned char * build_id , bool may_fault )
128
+ {
129
+ return may_fault ? build_id_parse (vma , build_id , NULL )
130
+ : build_id_parse_nofault (vma , build_id , NULL );
131
+ }
132
+
127
133
/*
128
134
* Expects all id_offs[i].ip values to be set to correct initial IPs.
129
135
* They will be subsequently:
@@ -135,7 +141,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
135
141
* BPF_STACK_BUILD_ID_IP.
136
142
*/
137
143
static void stack_map_get_build_id_offset (struct bpf_stack_build_id * id_offs ,
138
- u32 trace_nr , bool user )
144
+ u32 trace_nr , bool user , bool may_fault )
139
145
{
140
146
int i ;
141
147
struct mmap_unlock_irq_work * work = NULL ;
@@ -166,7 +172,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
166
172
goto build_id_valid ;
167
173
}
168
174
vma = find_vma (current -> mm , ip );
169
- if (!vma || build_id_parse_nofault (vma , id_offs [i ].build_id , NULL )) {
175
+ if (!vma || fetch_build_id (vma , id_offs [i ].build_id , may_fault )) {
170
176
/* per entry fall back to ips */
171
177
id_offs [i ].status = BPF_STACK_BUILD_ID_IP ;
172
178
memset (id_offs [i ].build_id , 0 , BUILD_ID_SIZE_MAX );
@@ -257,7 +263,7 @@ static long __bpf_get_stackid(struct bpf_map *map,
257
263
id_offs = (struct bpf_stack_build_id * )new_bucket -> data ;
258
264
for (i = 0 ; i < trace_nr ; i ++ )
259
265
id_offs [i ].ip = ips [i ];
260
- stack_map_get_build_id_offset (id_offs , trace_nr , user );
266
+ stack_map_get_build_id_offset (id_offs , trace_nr , user , false /* !may_fault */ );
261
267
trace_len = trace_nr * sizeof (struct bpf_stack_build_id );
262
268
if (hash_matches && bucket -> nr == trace_nr &&
263
269
memcmp (bucket -> data , new_bucket -> data , trace_len ) == 0 ) {
@@ -398,7 +404,7 @@ const struct bpf_func_proto bpf_get_stackid_proto_pe = {
398
404
399
405
static long __bpf_get_stack (struct pt_regs * regs , struct task_struct * task ,
400
406
struct perf_callchain_entry * trace_in ,
401
- void * buf , u32 size , u64 flags )
407
+ void * buf , u32 size , u64 flags , bool may_fault )
402
408
{
403
409
u32 trace_nr , copy_len , elem_size , num_elem , max_depth ;
404
410
bool user_build_id = flags & BPF_F_USER_BUILD_ID ;
@@ -416,8 +422,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
416
422
if (kernel && user_build_id )
417
423
goto clear ;
418
424
419
- elem_size = (user && user_build_id ) ? sizeof (struct bpf_stack_build_id )
420
- : sizeof (u64 );
425
+ elem_size = user_build_id ? sizeof (struct bpf_stack_build_id ) : sizeof (u64 );
421
426
if (unlikely (size % elem_size ))
422
427
goto clear ;
423
428
@@ -438,35 +443,45 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
438
443
if (sysctl_perf_event_max_stack < max_depth )
439
444
max_depth = sysctl_perf_event_max_stack ;
440
445
446
+ if (may_fault )
447
+ rcu_read_lock (); /* need RCU for perf's callchain below */
448
+
441
449
if (trace_in )
442
450
trace = trace_in ;
443
451
else if (kernel && task )
444
452
trace = get_callchain_entry_for_task (task , max_depth );
445
453
else
446
454
trace = get_perf_callchain (regs , 0 , kernel , user , max_depth ,
447
455
crosstask , false);
448
- if (unlikely (!trace ))
449
- goto err_fault ;
450
456
451
- if (trace -> nr < skip )
457
+ if (unlikely (!trace ) || trace -> nr < skip ) {
458
+ if (may_fault )
459
+ rcu_read_unlock ();
452
460
goto err_fault ;
461
+ }
453
462
454
463
trace_nr = trace -> nr - skip ;
455
464
trace_nr = (trace_nr <= num_elem ) ? trace_nr : num_elem ;
456
465
copy_len = trace_nr * elem_size ;
457
466
458
467
ips = trace -> ip + skip ;
459
- if (user && user_build_id ) {
468
+ if (user_build_id ) {
460
469
struct bpf_stack_build_id * id_offs = buf ;
461
470
u32 i ;
462
471
463
472
for (i = 0 ; i < trace_nr ; i ++ )
464
473
id_offs [i ].ip = ips [i ];
465
- stack_map_get_build_id_offset (buf , trace_nr , user );
466
474
} else {
467
475
memcpy (buf , ips , copy_len );
468
476
}
469
477
478
+ /* trace/ips should not be dereferenced after this point */
479
+ if (may_fault )
480
+ rcu_read_unlock ();
481
+
482
+ if (user_build_id )
483
+ stack_map_get_build_id_offset (buf , trace_nr , user , may_fault );
484
+
470
485
if (size > copy_len )
471
486
memset (buf + copy_len , 0 , size - copy_len );
472
487
return copy_len ;
@@ -481,7 +496,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
481
496
BPF_CALL_4 (bpf_get_stack , struct pt_regs * , regs , void * , buf , u32 , size ,
482
497
u64 , flags )
483
498
{
484
- return __bpf_get_stack (regs , NULL , NULL , buf , size , flags );
499
+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , false /* !may_fault */ );
485
500
}
486
501
487
502
const struct bpf_func_proto bpf_get_stack_proto = {
@@ -494,8 +509,24 @@ const struct bpf_func_proto bpf_get_stack_proto = {
494
509
.arg4_type = ARG_ANYTHING ,
495
510
};
496
511
497
- BPF_CALL_4 (bpf_get_task_stack , struct task_struct * , task , void * , buf ,
498
- u32 , size , u64 , flags )
512
+ BPF_CALL_4 (bpf_get_stack_sleepable , struct pt_regs * , regs , void * , buf , u32 , size ,
513
+ u64 , flags )
514
+ {
515
+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , true /* may_fault */ );
516
+ }
517
+
518
+ const struct bpf_func_proto bpf_get_stack_sleepable_proto = {
519
+ .func = bpf_get_stack_sleepable ,
520
+ .gpl_only = true,
521
+ .ret_type = RET_INTEGER ,
522
+ .arg1_type = ARG_PTR_TO_CTX ,
523
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM ,
524
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO ,
525
+ .arg4_type = ARG_ANYTHING ,
526
+ };
527
+
528
+ static long __bpf_get_task_stack (struct task_struct * task , void * buf , u32 size ,
529
+ u64 flags , bool may_fault )
499
530
{
500
531
struct pt_regs * regs ;
501
532
long res = - EINVAL ;
@@ -505,12 +536,18 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
505
536
506
537
regs = task_pt_regs (task );
507
538
if (regs )
508
- res = __bpf_get_stack (regs , task , NULL , buf , size , flags );
539
+ res = __bpf_get_stack (regs , task , NULL , buf , size , flags , may_fault );
509
540
put_task_stack (task );
510
541
511
542
return res ;
512
543
}
513
544
545
+ BPF_CALL_4 (bpf_get_task_stack , struct task_struct * , task , void * , buf ,
546
+ u32 , size , u64 , flags )
547
+ {
548
+ return __bpf_get_task_stack (task , buf , size , flags , false /* !may_fault */ );
549
+ }
550
+
514
551
const struct bpf_func_proto bpf_get_task_stack_proto = {
515
552
.func = bpf_get_task_stack ,
516
553
.gpl_only = false,
@@ -522,6 +559,23 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
522
559
.arg4_type = ARG_ANYTHING ,
523
560
};
524
561
562
+ BPF_CALL_4 (bpf_get_task_stack_sleepable , struct task_struct * , task , void * , buf ,
563
+ u32 , size , u64 , flags )
564
+ {
565
+ return __bpf_get_task_stack (task , buf , size , flags , true /* !may_fault */ );
566
+ }
567
+
568
+ const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = {
569
+ .func = bpf_get_task_stack_sleepable ,
570
+ .gpl_only = false,
571
+ .ret_type = RET_INTEGER ,
572
+ .arg1_type = ARG_PTR_TO_BTF_ID ,
573
+ .arg1_btf_id = & btf_tracing_ids [BTF_TRACING_TYPE_TASK ],
574
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM ,
575
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO ,
576
+ .arg4_type = ARG_ANYTHING ,
577
+ };
578
+
525
579
BPF_CALL_4 (bpf_get_stack_pe , struct bpf_perf_event_data_kern * , ctx ,
526
580
void * , buf , u32 , size , u64 , flags )
527
581
{
@@ -533,7 +587,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
533
587
__u64 nr_kernel ;
534
588
535
589
if (!(event -> attr .sample_type & PERF_SAMPLE_CALLCHAIN ))
536
- return __bpf_get_stack (regs , NULL , NULL , buf , size , flags );
590
+ return __bpf_get_stack (regs , NULL , NULL , buf , size , flags , false /* !may_fault */ );
537
591
538
592
if (unlikely (flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
539
593
BPF_F_USER_BUILD_ID )))
@@ -553,7 +607,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
553
607
__u64 nr = trace -> nr ;
554
608
555
609
trace -> nr = nr_kernel ;
556
- err = __bpf_get_stack (regs , NULL , trace , buf , size , flags );
610
+ err = __bpf_get_stack (regs , NULL , trace , buf , size , flags , false /* !may_fault */ );
557
611
558
612
/* restore nr */
559
613
trace -> nr = nr ;
@@ -565,7 +619,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
565
619
goto clear ;
566
620
567
621
flags = (flags & ~BPF_F_SKIP_FIELD_MASK ) | skip ;
568
- err = __bpf_get_stack (regs , NULL , trace , buf , size , flags );
622
+ err = __bpf_get_stack (regs , NULL , trace , buf , size , flags , false /* !may_fault */ );
569
623
}
570
624
return err ;
571
625
0 commit comments