Skip to content

Commit a498ee7

Browse files
mykyta5Alexei Starovoitov
authored andcommitted
bpf: Implement dynptr copy kfuncs
This patch introduces a new set of kfuncs for working with dynptrs in BPF programs, enabling reading variable-length user or kernel data into dynptr directly. To enable memory-safety, verifier allows only constant-sized reads via existing bpf_probe_read_{user|kernel} etc. kfuncs, dynptr-based kfuncs allow dynamically-sized reads without memory safety shortcomings. The following kfuncs are introduced: * `bpf_probe_read_kernel_dynptr()`: probes kernel-space data into a dynptr * `bpf_probe_read_user_dynptr()`: probes user-space data into a dynptr * `bpf_probe_read_kernel_str_dynptr()`: probes kernel-space string into a dynptr * `bpf_probe_read_user_str_dynptr()`: probes user-space string into a dynptr * `bpf_copy_from_user_dynptr()`: sleepable, copies user-space data into a dynptr for the current task * `bpf_copy_from_user_str_dynptr()`: sleepable, copies user-space string into a dynptr for the current task * `bpf_copy_from_user_task_dynptr()`: sleepable, copies user-space data of the task into a dynptr * `bpf_copy_from_user_task_str_dynptr()`: sleepable, copies user-space string of the task into a dynptr The implementation is built on two generic functions: * __bpf_dynptr_copy * __bpf_dynptr_copy_str These functions take function pointers as arguments, enabling the copying of data from various sources, including both kernel and user space. Use __always_inline for generic functions and callbacks to make sure the compiler doesn't generate indirect calls into callbacks, which is more expensive, especially on some kernel configurations. Inlining allows compiler to put direct calls into all the specific callback implementations (copy_user_data_sleepable, copy_user_data_nofault, and so on). Reviewed-by: Andrii Nakryiko <[email protected]> Signed-off-by: Mykyta Yatsenko <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent d060b6a commit a498ee7

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

kernel/bpf/helpers.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3378,6 +3378,14 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
33783378
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
33793379
BTF_ID_FLAGS(func, bpf_local_irq_save)
33803380
BTF_ID_FLAGS(func, bpf_local_irq_restore)
3381+
BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
3382+
BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
3383+
BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
3384+
BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
3385+
BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
3386+
BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
3387+
BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
3388+
BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
33813389
BTF_KFUNCS_END(common_btf_ids)
33823390

33833391
static const struct btf_kfunc_id_set common_kfunc_set = {

kernel/trace/bpf_trace.c

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3466,6 +3466,142 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
34663466

34673467
late_initcall(bpf_kprobe_multi_kfuncs_init);
34683468

3469+
typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
3470+
3471+
/*
3472+
* The __always_inline is to make sure the compiler doesn't
3473+
* generate indirect calls into callbacks, which is expensive,
3474+
* on some kernel configurations. This allows compiler to put
3475+
* direct calls into all the specific callback implementations
3476+
* (copy_user_data_sleepable, copy_user_data_nofault, and so on)
3477+
*/
3478+
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
3479+
const void *unsafe_src,
3480+
copy_fn_t str_copy_fn,
3481+
struct task_struct *tsk)
3482+
{
3483+
struct bpf_dynptr_kern *dst;
3484+
u32 chunk_sz, off;
3485+
void *dst_slice;
3486+
int cnt, err;
3487+
char buf[256];
3488+
3489+
dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3490+
if (likely(dst_slice))
3491+
return str_copy_fn(dst_slice, unsafe_src, size, tsk);
3492+
3493+
dst = (struct bpf_dynptr_kern *)dptr;
3494+
if (bpf_dynptr_check_off_len(dst, doff, size))
3495+
return -E2BIG;
3496+
3497+
for (off = 0; off < size; off += chunk_sz - 1) {
3498+
chunk_sz = min_t(u32, sizeof(buf), size - off);
3499+
/* Expect str_copy_fn to return count of copied bytes, including
3500+
* zero terminator. Next iteration increment off by chunk_sz - 1 to
3501+
* overwrite NUL.
3502+
*/
3503+
cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3504+
if (cnt < 0)
3505+
return cnt;
3506+
err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
3507+
if (err)
3508+
return err;
3509+
if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
3510+
return off + cnt;
3511+
}
3512+
return off;
3513+
}
3514+
3515+
static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
3516+
u32 size, const void *unsafe_src,
3517+
copy_fn_t copy_fn, struct task_struct *tsk)
3518+
{
3519+
struct bpf_dynptr_kern *dst;
3520+
void *dst_slice;
3521+
char buf[256];
3522+
u32 off, chunk_sz;
3523+
int err;
3524+
3525+
dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3526+
if (likely(dst_slice))
3527+
return copy_fn(dst_slice, unsafe_src, size, tsk);
3528+
3529+
dst = (struct bpf_dynptr_kern *)dptr;
3530+
if (bpf_dynptr_check_off_len(dst, doff, size))
3531+
return -E2BIG;
3532+
3533+
for (off = 0; off < size; off += chunk_sz) {
3534+
chunk_sz = min_t(u32, sizeof(buf), size - off);
3535+
err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3536+
if (err)
3537+
return err;
3538+
err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
3539+
if (err)
3540+
return err;
3541+
}
3542+
return 0;
3543+
}
3544+
3545+
static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
3546+
u32 size, struct task_struct *tsk)
3547+
{
3548+
return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3549+
}
3550+
3551+
static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
3552+
u32 size, struct task_struct *tsk)
3553+
{
3554+
int ret;
3555+
3556+
if (!tsk) /* Read from the current task */
3557+
return copy_from_user(dst, (const void __user *)unsafe_src, size);
3558+
3559+
ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
3560+
if (ret != size)
3561+
return -EFAULT;
3562+
return 0;
3563+
}
3564+
3565+
static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
3566+
u32 size, struct task_struct *tsk)
3567+
{
3568+
return copy_from_kernel_nofault(dst, unsafe_src, size);
3569+
}
3570+
3571+
static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
3572+
u32 size, struct task_struct *tsk)
3573+
{
3574+
return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3575+
}
3576+
3577+
static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
3578+
u32 size, struct task_struct *tsk)
3579+
{
3580+
int ret;
3581+
3582+
if (unlikely(size == 0))
3583+
return 0;
3584+
3585+
if (tsk) {
3586+
ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
3587+
} else {
3588+
ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
3589+
/* strncpy_from_user does not guarantee NUL termination */
3590+
if (ret >= 0)
3591+
((char *)dst)[ret] = '\0';
3592+
}
3593+
3594+
if (ret < 0)
3595+
return ret;
3596+
return ret + 1;
3597+
}
3598+
3599+
static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
3600+
u32 size, struct task_struct *tsk)
3601+
{
3602+
return strncpy_from_kernel_nofault(dst, unsafe_src, size);
3603+
}
3604+
34693605
__bpf_kfunc_start_defs();
34703606

34713607
__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
@@ -3477,4 +3613,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
34773613
return bpf_send_signal_common(sig, type, task, value);
34783614
}
34793615

3616+
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
3617+
u32 size, const void __user *unsafe_ptr__ign)
3618+
{
3619+
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
3620+
copy_user_data_nofault, NULL);
3621+
}
3622+
3623+
__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
3624+
u32 size, const void *unsafe_ptr__ign)
3625+
{
3626+
return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
3627+
copy_kernel_data_nofault, NULL);
3628+
}
3629+
3630+
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
3631+
u32 size, const void __user *unsafe_ptr__ign)
3632+
{
3633+
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
3634+
copy_user_str_nofault, NULL);
3635+
}
3636+
3637+
__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
3638+
u32 size, const void *unsafe_ptr__ign)
3639+
{
3640+
return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
3641+
copy_kernel_str_nofault, NULL);
3642+
}
3643+
3644+
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
3645+
u32 size, const void __user *unsafe_ptr__ign)
3646+
{
3647+
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
3648+
copy_user_data_sleepable, NULL);
3649+
}
3650+
3651+
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
3652+
u32 size, const void __user *unsafe_ptr__ign)
3653+
{
3654+
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
3655+
copy_user_str_sleepable, NULL);
3656+
}
3657+
3658+
__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
3659+
u32 size, const void __user *unsafe_ptr__ign,
3660+
struct task_struct *tsk)
3661+
{
3662+
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
3663+
copy_user_data_sleepable, tsk);
3664+
}
3665+
3666+
__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
3667+
u32 size, const void __user *unsafe_ptr__ign,
3668+
struct task_struct *tsk)
3669+
{
3670+
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
3671+
copy_user_str_sleepable, tsk);
3672+
}
3673+
34803674
__bpf_kfunc_end_defs();

0 commit comments

Comments
 (0)