From 014658069bdd5fc38ecb9547eeb27dfe9570815c Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:32 +0800 Subject: [PATCH 01/10] bpf: Extend bpf syscall with common attributes support Extend the 'bpf()' syscall to support a set of common attributes shared across all BPF commands: 1. 'log_buf': User-provided buffer for storing logs. 2. 'log_size': Size of the log buffer. 3. 'log_level': Log verbosity level. 4. 'log_true_size': The size of log reported by kernel. These common attributes are passed as the 4th argument to the 'bpf()' syscall, with the 5th argument specifying the size of this structure. To indicate the use of these common attributes from userspace, a new flag 'BPF_COMMON_ATTRS' ('1 << 16') is introduced. This flag is OR-ed into the 'cmd' field of the syscall. When 'cmd & BPF_COMMON_ATTRS' is set, the kernel will copy the common attributes from userspace into kernel space for use. Signed-off-by: Leon Hwang --- include/linux/syscalls.h | 3 ++- include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 23 +++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 8 ++++++++ 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 77f45e5d44139..94408575dc49b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -933,7 +933,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size, + struct bpf_common_attr __user *attr_common, unsigned int size_common); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ae83d8649ef1c..bb38afd265132 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -975,6 +975,7 @@ enum bpf_cmd { BPF_TOKEN_CREATE, BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */ }; enum bpf_map_type { @@ -1474,6 +1475,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a48fa86f82a7f..8d97d67e6abaa 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6092,8 +6092,10 @@ static int prog_stream_read(union bpf_attr *attr) return ret; } -static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, + bpfptr_t uattr_common, unsigned int size_common) { + struct bpf_common_attr common_attrs; union bpf_attr attr; int err; @@ -6107,6 +6109,18 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) if (copy_from_bpfptr(&attr, uattr, size) != 0) return -EFAULT; + memset(&common_attrs, 0, sizeof(common_attrs)); + if (cmd & BPF_COMMON_ATTRS) { + err = bpf_check_uarg_tail_zero(uattr_common, sizeof(common_attrs), size_common); + if (err) + return err; + + cmd &= ~BPF_COMMON_ATTRS; + size_common = min_t(u32, size_common, sizeof(common_attrs)); + if (copy_from_bpfptr(&common_attrs, uattr_common, size_common) != 0) + return -EFAULT; + } + err = security_bpf(cmd, &attr, size, uattr.is_kernel); if (err < 0) return err; @@ -6239,9 +6253,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) return err; } -SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) +SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size, + struct bpf_common_attr __user *, uattr_common, unsigned int, size_common) { - return __sys_bpf(cmd, USER_BPFPTR(uattr), size); + return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common); } static bool syscall_prog_is_valid_access(int off, int size, @@ -6272,7 +6287,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) default: return -EINVAL; } - return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); + return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ae83d8649ef1c..bb38afd265132 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -975,6 +975,7 @@ enum bpf_cmd { BPF_TOKEN_CREATE, BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */ }; enum bpf_map_type { @@ -1474,6 +1475,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { From ea127e0520c49366aa34825c7ed22aa70a9690da Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:33 +0800 Subject: [PATCH 02/10] libbpf: Add support for extended bpf syscall To support the extended 'bpf()' syscall introduced in the previous commit, introduce the following internal APIs: * 'sys_bpf_ext()' * 'sys_bpf_ext_fd()' They wrap the raw 'syscall()' interface to support passing extended attributes. * 'probe_sys_bpf_ext()' Check whether current kernel supports the extended attributes. Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 33 +++++++++++++++++++++++++++++++++ tools/lib/bpf/features.c | 8 ++++++++ tools/lib/bpf/libbpf_internal.h | 3 +++ 3 files changed, 44 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 339b197972374..9cd79beb13a2d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,6 +69,39 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *common_attrs, + unsigned int size_common) +{ + cmd = common_attrs ? cmd | BPF_COMMON_ATTRS : cmd & ~BPF_COMMON_ATTRS; + return syscall(__NR_bpf, cmd, attr, size, common_attrs, size_common); +} + +static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *common_attrs, + unsigned int size_common) +{ + int fd; + + fd = sys_bpf_ext(cmd, attr, size, common_attrs, size_common); + return ensure_good_fd(fd); +} + +int probe_sys_bpf_ext(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + union bpf_attr attr; + int fd; + + memset(&attr, 0, attr_sz); + fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL, + sizeof(struct bpf_common_attr)); + fd = errno == EFAULT ? syscall(__NR_memfd_create, "fd", 0) : fd; + return ensure_good_fd(fd); +} + static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 760657f5224c2..d01df62394f89 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -507,6 +507,11 @@ static int probe_kern_arg_ctx_tag(int token_fd) return probe_fd(prog_fd); } +static int probe_kern_extended_syscall(int token_fd) +{ + return probe_fd(probe_sys_bpf_ext()); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -582,6 +587,9 @@ static struct kern_feature_desc { [FEAT_BTF_QMARK_DATASEC] = { "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, }, + [FEAT_EXTENDED_SYSCALL] = { + "Kernel supports extended syscall", probe_kern_extended_syscall, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index c93797dcaf5bc..af05df8d040ce 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -380,6 +380,8 @@ enum kern_feature_id { FEAT_ARG_CTX_TAG, /* Kernel supports '?' at the front of datasec names */ FEAT_BTF_QMARK_DATASEC, + /* Kernel supports extended syscall */ + FEAT_EXTENDED_SYSCALL, __FEAT_CNT, }; @@ -740,4 +742,5 @@ int probe_fd(int fd); #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); +int probe_sys_bpf_ext(void); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ From 8c760d6f2aa090d23007970f29225bb757b8676c Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:34 +0800 Subject: [PATCH 03/10] bpf: Refactor reporting log_true_size for prog_load In the next commit, it will be able to report logs via extended common attributes, which will report 'log_true_size' via the extended common attributes meanwhile. Therefore, refactor the way of 'log_true_size' reporting in order to report 'log_true_size' via the extended common attributes easily. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 24 ++++++++++++++++++++---- kernel/bpf/verifier.c | 12 ++---------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c833461347..4f595439943d7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2738,7 +2738,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8d97d67e6abaa..2bdc0b43ec832 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2841,7 +2841,7 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD keyring_id -static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; @@ -3059,7 +3059,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) goto free_prog_sec; /* run eBPF verifier */ - err = bpf_check(&prog, attr, uattr, uattr_size); + err = bpf_check(&prog, attr, uattr); if (err < 0) goto free_used_maps; @@ -6092,12 +6092,25 @@ static int prog_stream_read(union bpf_attr *attr) return ret; } +static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size) +{ + if (!attr->log_true_size) + return 0; + + if (size >= offsetofend(union bpf_attr, log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), + &attr->log_true_size, sizeof(attr->log_true_size))) + return -EFAULT; + + return 0; +} + static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, bpfptr_t uattr_common, unsigned int size_common) { struct bpf_common_attr common_attrs; union bpf_attr attr; - int err; + int err, ret; err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) @@ -6145,7 +6158,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = map_freeze(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr, size); + attr.log_true_size = 0; + err = bpf_prog_load(&attr, uattr); + ret = copy_prog_load_log_true_size(&attr, uattr, size); + err = ret ? ret : err; break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e892df386eed7..d5089f026f578 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24499,12 +24499,11 @@ static int compute_scc(struct bpf_verifier_env *env) return err; } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; int i, len, ret = -EINVAL, err; - u32 log_true_size; bool is_priv; BTF_TYPE_EMIT(enum bpf_features); @@ -24700,17 +24699,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog->aux->verified_insns = env->insn_processed; /* preserve original error even if log finalization is successful */ - err = bpf_vlog_finalize(&env->log, &log_true_size); + err = bpf_vlog_finalize(&env->log, &attr->log_true_size); if (err) ret = err; - if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), - &log_true_size, sizeof(log_true_size))) { - ret = -EFAULT; - goto err_release_maps; - } - if (ret) goto err_release_maps; From b6d3656c077c9fad39c6fbd208b7a4242a7b409e Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:35 +0800 Subject: [PATCH 04/10] bpf: Add common attr support for prog_load The log buffer of common attributes would be confusing with the one in 'union bpf_attr' for BPF_PROG_LOAD. In order to clarify the usage of these two log buffers, they both can be used for logging if: * They are same, including 'log_buf', 'log_level' and 'log_size'. * One of them is missing, then another one will be used for logging. If they both have 'log_buf' but they are not same totally, return -EUSERS. Signed-off-by: Leon Hwang --- kernel/bpf/syscall.c | 55 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2bdc0b43ec832..698c30ff99486 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6092,11 +6092,57 @@ static int prog_stream_read(union bpf_attr *attr) return ret; } -static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size) +static int check_log_attrs(u64 log_buf, u32 log_size, u32 log_level, + struct bpf_common_attr *common_attrs) +{ + if (log_buf && common_attrs->log_buf && (log_buf != common_attrs->log_buf || + log_size != common_attrs->log_size || + log_level != common_attrs->log_level)) + return -EUSERS; + + return 0; +} + +static int check_prog_load_log_attrs(union bpf_attr *attr, struct bpf_common_attr *common_attrs, + bool *log_common_attrs) +{ + int err; + + err = check_log_attrs(attr->log_buf, attr->log_size, attr->log_level, common_attrs); + if (err) + return err; + + if (!attr->log_buf && common_attrs->log_buf) { + *log_common_attrs = true; + attr->log_buf = common_attrs->log_buf; + attr->log_size = common_attrs->log_size; + attr->log_level = common_attrs->log_level; + } + + return 0; +} + +static int __copy_common_attr_log_true_size(bpfptr_t uattr, unsigned int size, u32 *log_true_size) +{ + if (size >= offsetofend(struct bpf_common_attr, log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(struct bpf_common_attr, log_true_size), + log_true_size, sizeof(*log_true_size))) + return -EFAULT; + + return 0; +} + +static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size, + struct bpf_common_attr *common_attrs, bpfptr_t uattr_common, + unsigned int size_common, bool log_common_attrs) { if (!attr->log_true_size) return 0; + if (log_common_attrs) + return __copy_common_attr_log_true_size(uattr_common, size_common, + &attr->log_true_size); + if (size >= offsetofend(union bpf_attr, log_true_size) && copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), &attr->log_true_size, sizeof(attr->log_true_size))) @@ -6109,6 +6155,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, bpfptr_t uattr_common, unsigned int size_common) { struct bpf_common_attr common_attrs; + bool log_common_attrs = false; union bpf_attr attr; int err, ret; @@ -6158,9 +6205,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = map_freeze(&attr); break; case BPF_PROG_LOAD: + err = check_prog_load_log_attrs(&attr, &common_attrs, &log_common_attrs); + if (err) + break; attr.log_true_size = 0; err = bpf_prog_load(&attr, uattr); - ret = copy_prog_load_log_true_size(&attr, uattr, size); + ret = copy_prog_load_log_true_size(&attr, uattr, size, &common_attrs, uattr_common, + size_common, log_common_attrs); err = ret ? ret : err; break; case BPF_OBJ_PIN: From e5dff25e9c596de8d59c74dec5881fe53410f8c2 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:36 +0800 Subject: [PATCH 05/10] bpf: Refactor reporting btf_log_true_size for btf_load In the next commit, it will be able to report logs via extended common attributes, which will report 'log_true_size' via the extended common attributes meanwhile. Therefore, refactor the way of 'btf_log_true_size' reporting in order to report 'log_true_size' via the extended common attributes easily. Signed-off-by: Leon Hwang --- include/linux/btf.h | 2 +- kernel/bpf/btf.c | 25 +++++-------------------- kernel/bpf/syscall.c | 22 +++++++++++++++++++--- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index f06976ffb63f9..60e29d05a8a90 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -145,7 +145,7 @@ const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); const struct btf_header *btf_header(const struct btf *btf); -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); +int btf_new_fd(union bpf_attr *attr, bpfptr_t uattr); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0de8fc8a0e0b3..0d83ce16947d7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5745,22 +5745,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env, return 0; } -static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) -{ - u32 log_true_size; - int err; - - err = bpf_vlog_finalize(log, &log_true_size); - - if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), - &log_true_size, sizeof(log_true_size))) - err = -EFAULT; - - return err; -} - -static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static struct btf *btf_parse(union bpf_attr *attr, bpfptr_t uattr) { bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); @@ -5841,7 +5826,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat } } - err = finalize_log(&env->log, uattr, uattr_size); + err = bpf_vlog_finalize(&env->log, &attr->btf_log_true_size); if (err) goto errout_free; @@ -5853,7 +5838,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat btf_free_struct_meta_tab(btf); errout: /* overwrite err with -ENOSPC or -EFAULT */ - ret = finalize_log(&env->log, uattr, uattr_size); + ret = bpf_vlog_finalize(&env->log, &attr->btf_log_true_size); if (ret) err = ret; errout_free: @@ -8017,12 +8002,12 @@ static int __btf_new_fd(struct btf *btf) return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +int btf_new_fd(union bpf_attr *attr, bpfptr_t uattr) { struct btf *btf; int ret; - btf = btf_parse(attr, uattr, uattr_size); + btf = btf_parse(attr, uattr); if (IS_ERR(btf)) return PTR_ERR(btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 698c30ff99486..3bdcd6c065039 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5397,7 +5397,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd -static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +static int bpf_btf_load(union bpf_attr *attr, bpfptr_t uattr) { struct bpf_token *token = NULL; @@ -5424,7 +5424,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_ bpf_token_put(token); - return btf_new_fd(attr, uattr, uattr_size); + return btf_new_fd(attr, uattr); } #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd @@ -6151,6 +6151,19 @@ static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, un return 0; } +static int copy_btf_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size) +{ + if (!attr->btf_log_true_size) + return 0; + + if (size >= offsetofend(union bpf_attr, btf_log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), + &attr->btf_log_true_size, sizeof(attr->btf_log_true_size))) + return -EFAULT; + + return 0; +} + static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, bpfptr_t uattr_common, unsigned int size_common) { @@ -6257,7 +6270,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = bpf_raw_tracepoint_open(&attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr, size); + attr.btf_log_true_size = 0; + err = bpf_btf_load(&attr, uattr); + ret = copy_btf_load_log_true_size(&attr, uattr, size); + err = ret ? ret : err; break; case BPF_BTF_GET_FD_BY_ID: err = bpf_btf_get_fd_by_id(&attr); From 88b413002f627d37e650238a049f4f4bc74c9af6 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:37 +0800 Subject: [PATCH 06/10] bpf: Add common attr support for btf_load The log buffer of common attributes would be confusing with the one in 'union bpf_attr' for BPF_BTF_LOAD. In order to clarify the usage of these two log buffers, they both can be used for logging if: * They are same, including 'log_buf', 'log_level' and 'log_size'. * One of them is missing, then another one will be used for logging. If they both have 'log_buf' but they are not same totally, return -EUSERS. Signed-off-by: Leon Hwang --- kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3bdcd6c065039..fc1b5c8c5e82f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6151,11 +6151,37 @@ static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, un return 0; } -static int copy_btf_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size) +static int check_btf_load_log_attrs(union bpf_attr *attr, struct bpf_common_attr *common_attrs, + bool *log_common_attrs) +{ + int err; + + err = check_log_attrs(attr->btf_log_buf, attr->btf_log_size, attr->btf_log_level, + common_attrs); + if (err) + return err; + + if (!attr->btf_log_buf && common_attrs->log_buf) { + *log_common_attrs = true; + attr->btf_log_buf = common_attrs->log_buf; + attr->btf_log_size = common_attrs->log_size; + attr->btf_log_level = common_attrs->log_level; + } + + return 0; +} + +static int copy_btf_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size, + struct bpf_common_attr *common_attrs, bpfptr_t uattr_common, + unsigned int size_common, bool log_common_attrs) { if (!attr->btf_log_true_size) return 0; + if (log_common_attrs) + return __copy_common_attr_log_true_size(uattr_common, size_common, + &attr->btf_log_true_size); + if (size >= offsetofend(union bpf_attr, btf_log_true_size) && copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), &attr->btf_log_true_size, sizeof(attr->btf_log_true_size))) @@ -6270,9 +6296,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = bpf_raw_tracepoint_open(&attr); break; case BPF_BTF_LOAD: + err = check_btf_load_log_attrs(&attr, &common_attrs, &log_common_attrs); + if (err) + break; attr.btf_log_true_size = 0; err = bpf_btf_load(&attr, uattr); - ret = copy_btf_load_log_true_size(&attr, uattr, size); + ret = copy_btf_load_log_true_size(&attr, uattr, size, &common_attrs, uattr_common, + size_common, log_common_attrs); err = ret ? ret : err; break; case BPF_BTF_GET_FD_BY_ID: From 77fba268428d389cbb65d0ef01ee4dc141eda36d Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:38 +0800 Subject: [PATCH 07/10] bpf: Add warnings for internal bugs in map_create In next commit, it will report users the reason of -EINVAL in map_create. However, as for the check of '!ops' and '!ops->map_mem_usage', it shouldn't report the reason as they would be internal bugs. Instead, add WARN_ON_ONCE to them. Then, it is able to check dmesg to get the error details. Signed-off-by: Leon Hwang --- kernel/bpf/syscall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fc1b5c8c5e82f..49db250a2f5da 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1406,7 +1406,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) return -EINVAL; map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); ops = bpf_map_types[map_type]; - if (!ops) + if (WARN_ON_ONCE(!ops)) return -EINVAL; if (ops->map_alloc_check) { @@ -1416,7 +1416,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) } if (attr->map_ifindex) ops = &bpf_map_offload_ops; - if (!ops->map_mem_usage) + if (WARN_ON_ONCE(!ops->map_mem_usage)) return -EINVAL; if (token_flag) { From b402d84e4f4d171ca556bce3cd4d5938e1010911 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:39 +0800 Subject: [PATCH 08/10] bpf: Add common attr support for map_create Currently, many 'BPF_MAP_CREATE' failures return '-EINVAL' without providing any explanation to user space. With the extended BPF syscall support, detailed error messages can now be reported. This allows users to understand the specific reason for a failed map creation, rather than just receiving a generic '-EINVAL'. Signed-off-by: Leon Hwang --- kernel/bpf/syscall.c | 96 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 49db250a2f5da..24f46cf451bec 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1355,23 +1355,72 @@ static bool bpf_net_capable(void) return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); } +struct bpf_vlog_wrapper { + struct bpf_common_attr *attr; + struct bpf_verifier_log *log; +}; + +static void bpf_vlog_wrapper_destructor(struct bpf_vlog_wrapper *w) +{ + if (!w->log) + return; + + (void) bpf_vlog_finalize(w->log, &w->attr->log_true_size); + kfree(w->log); +} + +#define DEFINE_BPF_VLOG_WRAPPER(name, common_attrs) \ + struct bpf_vlog_wrapper name __cleanup(bpf_vlog_wrapper_destructor) = { \ + .attr = common_attrs, \ + } + +static int bpf_vlog_wrapper_init(struct bpf_vlog_wrapper *w) +{ + struct bpf_common_attr *attr = w->attr; + struct bpf_verifier_log *log; + int err; + + if (!attr->log_buf) + return 0; + + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) + return -ENOMEM; + + err = bpf_vlog_init(log, attr->log_level, u64_to_user_ptr(attr->log_buf), attr->log_size); + if (err) { + kfree(log); + return err; + } + + w->log = log; + return 0; +} + #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size /* called via syscall */ -static int map_create(union bpf_attr *attr, bpfptr_t uattr) +static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *common_attrs) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; int numa_node = bpf_map_attr_numa_node(attr); u32 map_type = attr->map_type; + struct bpf_verifier_log *log; struct bpf_map *map; bool token_flag; int f_flags; int err; + DEFINE_BPF_VLOG_WRAPPER(log_wrapper, common_attrs); err = CHECK_ATTR(BPF_MAP_CREATE); if (err) return -EINVAL; + err = bpf_vlog_wrapper_init(&log_wrapper); + if (err) + return err; + log = log_wrapper.log; + /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it * to avoid per-map type checks tripping on unknown flag */ @@ -1379,17 +1428,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) attr->map_flags &= ~BPF_F_TOKEN_FD; if (attr->btf_vmlinux_value_type_id) { - if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || - attr->btf_key_type_id || attr->btf_value_type_id) + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) { + bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"); + return -EINVAL; + } + if (attr->btf_key_type_id || attr->btf_value_type_id) { + bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"); return -EINVAL; + } } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { + bpf_log(log, "Invalid btf_value_type_id.\n"); return -EINVAL; } if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && attr->map_type != BPF_MAP_TYPE_ARENA && - attr->map_extra != 0) + attr->map_extra != 0) { + bpf_log(log, "Invalid map_extra.\n"); return -EINVAL; + } f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) @@ -1397,13 +1454,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || - !node_online(numa_node))) + !node_online(numa_node))) { + bpf_log(log, "Invalid numa_node.\n"); return -EINVAL; + } /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ map_type = attr->map_type; - if (map_type >= ARRAY_SIZE(bpf_map_types)) + if (map_type >= ARRAY_SIZE(bpf_map_types)) { + bpf_log(log, "Invalid map_type.\n"); return -EINVAL; + } map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); ops = bpf_map_types[map_type]; if (WARN_ON_ONCE(!ops)) @@ -1421,8 +1482,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (token_flag) { token = bpf_token_get_from_fd(attr->map_token_fd); - if (IS_ERR(token)) + if (IS_ERR(token)) { + bpf_log(log, "Invalid map_token_fd.\n"); return PTR_ERR(token); + } /* if current token doesn't grant map creation permissions, * then we can't use this token, so ignore it and rely on @@ -1504,8 +1567,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); - if (err < 0) + if (err < 0) { + bpf_log(log, "Invalid map_name.\n"); goto free_map; + } preempt_disable(); map->cookie = gen_cookie_next(&bpf_map_cookie); @@ -1528,6 +1593,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) btf = btf_get_by_fd(attr->btf_fd); if (IS_ERR(btf)) { + bpf_log(log, "Invalid btf_fd.\n"); err = PTR_ERR(btf); goto free_map; } @@ -6132,6 +6198,15 @@ static int __copy_common_attr_log_true_size(bpfptr_t uattr, unsigned int size, u return 0; } +static int copy_common_attr_log_true_size(bpfptr_t uattr, unsigned int size, + struct bpf_common_attr *attr) +{ + if (!attr->log_true_size) + return 0; + + return __copy_common_attr_log_true_size(uattr, size, &attr->log_true_size); +} + static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size, struct bpf_common_attr *common_attrs, bpfptr_t uattr_common, unsigned int size_common, bool log_common_attrs) @@ -6226,7 +6301,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr, uattr); + common_attrs.log_true_size = 0; + err = map_create(&attr, uattr, &common_attrs); + ret = copy_common_attr_log_true_size(uattr_common, size_common, &common_attrs); + err = ret ? ret : err; break; case BPF_MAP_LOOKUP_ELEM: err = map_lookup_elem(&attr); From 5ada586471cb902b2b9762c895a3074531f5e0c7 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:40 +0800 Subject: [PATCH 09/10] libbpf: Add common attr support for map_create With the previous commit adding common attribute support for BPF_MAP_CREATE, it is now possible to retrieve detailed error messages when map creation fails by using the 'log_buf' field from the common attributes. Extend 'bpf_map_create_opts' with these new fields, 'log_buf', 'log_size' , 'log_level' and 'log_true_size', allowing users to capture and inspect those log messages. Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 17 +++++++++++++++-- tools/lib/bpf/bpf.h | 9 +++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9cd79beb13a2d..ca66fcdb3f49f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -203,10 +203,13 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 key_size, __u32 value_size, __u32 max_entries, - const struct bpf_map_create_opts *opts) + struct bpf_map_create_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); + const size_t common_attrs_sz = sizeof(struct bpf_common_attr); + struct bpf_common_attr common_attrs; union bpf_attr attr; + const char *log_buf; int fd; bump_rlimit_memlock(); @@ -239,7 +242,17 @@ int bpf_map_create(enum bpf_map_type map_type, attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + log_buf = OPTS_GET(opts, log_buf, NULL); + if (log_buf && feat_supported(NULL, FEAT_EXTENDED_SYSCALL)) { + memset(&common_attrs, 0, common_attrs_sz); + common_attrs.log_buf = ptr_to_u64(log_buf); + common_attrs.log_size = OPTS_GET(opts, log_size, 0); + common_attrs.log_level = OPTS_GET(opts, log_level, 0); + fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &common_attrs, common_attrs_sz); + OPTS_SET(opts, log_true_size, common_attrs.log_true_size); + } else { + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + } return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index e983a3e40d612..77d475e7274a0 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -57,16 +57,21 @@ struct bpf_map_create_opts { const void *excl_prog_hash; __u32 excl_prog_hash_size; + + const char *log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; size_t :0; }; -#define bpf_map_create_opts__last_field excl_prog_hash_size +#define bpf_map_create_opts__last_field log_true_size LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, __u32 key_size, __u32 value_size, __u32 max_entries, - const struct bpf_map_create_opts *opts); + struct bpf_map_create_opts *opts); struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ From 6f5ca166263b28a3c67b597c4baddb152f832ce2 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 2 Oct 2025 23:48:41 +0800 Subject: [PATCH 10/10] selftests/bpf: Add cases to test map create failure log As kernel is able to report log when fail to create map, add test cases to check those logs. Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/map_init.c | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 14a31109dd0e0..97a632ae7688f 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -212,3 +212,143 @@ void test_map_init(void) if (test__start_subtest("pcpu_lru_map_init")) test_pcpu_lru_map_init(); } + +#define BPF_LOG_FIXED 8 + +static void test_map_create(enum bpf_map_type map_type, const char *map_name, + struct bpf_map_create_opts *opts, const char *exp_msg) +{ + char log_buf[128]; + int fd; + + log_buf[0] = '\0'; + opts->log_buf = log_buf; + opts->log_size = sizeof(log_buf); + opts->log_level = BPF_LOG_FIXED; + fd = bpf_map_create(map_type, map_name, 4, 4, 1, opts); + if (!ASSERT_LT(fd, 0, "bpf_map_create")) { + close(fd); + return; + } + + ASSERT_STREQ(log_buf, exp_msg, "log_buf"); + ASSERT_EQ(opts->log_true_size, strlen(exp_msg) + 1, "log_true_size"); +} + +static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg) +{ + test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg); +} + +static void test_invalid_vmlinux_value_type_id_struct_ops(void) +{ + const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_vmlinux_value_type_id_kv_type_id(void) +{ + const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + .btf_key_type_id = 1, + ); + + test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg); +} + +static void test_invalid_value_type_id(void) +{ + const char *msg = "Invalid btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_extra(void) +{ + const char *msg = "Invalid map_extra.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_extra = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_numa_node(void) +{ + const char *msg = "Invalid numa_node.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_type(void) +{ + const char *msg = "Invalid map_type.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg); +} + +static void test_invalid_token_fd(void) +{ + const char *msg = "Invalid map_token_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_TOKEN_FD, + .token_fd = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_name(void) +{ + const char *msg = "Invalid map_name.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg); +} + +static void test_invalid_btf_fd(void) +{ + const char *msg = "Invalid btf_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_fd = -1, + .btf_key_type_id = 1, + .btf_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +void test_map_create_failure(void) +{ + if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops")) + test_invalid_vmlinux_value_type_id_struct_ops(); + if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id")) + test_invalid_vmlinux_value_type_id_kv_type_id(); + if (test__start_subtest("invalid_value_type_id")) + test_invalid_value_type_id(); + if (test__start_subtest("invalid_map_extra")) + test_invalid_map_extra(); + if (test__start_subtest("invalid_numa_node")) + test_invalid_numa_node(); + if (test__start_subtest("invalid_map_type")) + test_invalid_map_type(); + if (test__start_subtest("invalid_token_fd")) + test_invalid_token_fd(); + if (test__start_subtest("invalid_map_name")) + test_invalid_map_name(); + if (test__start_subtest("invalid_btf_fd")) + test_invalid_btf_fd(); +}