diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 2a7865c8e3fe..1e6de0afe65d 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -34,13 +34,31 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF, + BPF_USDT_ARG_SIB, }; +/* + * To preserve overall layout and avoid growing this struct while adding SIB + * extras, we keep 4 bytes worth of space after val_off: + * + * - arg_type is stored as a single byte (values from enum below) + * - idx_packed is a 16-bit field packing idx_reg_off (high 12 bits) + * and scale shift (low 4 bits, i.e., scale = 1 << shift) + * - reserved is one spare byte for future use + * + * This keeps the offset of reg_off identical to the historical layout + * (val_off:8 + 4 bytes here), ensuring backwards/forwards compatibility for + * non-SIB modes that only rely on val_off/arg_type/reg_off/... offsets. + */ struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; /* arg location case, see bpf_usdt_arg() for details */ - enum __bpf_usdt_arg_type arg_type; + __u8 arg_type; + /* packed: [15:4] idx_reg_off, [3:0] scale_shift */ + __u16 idx_packed; + /* reserved for future use, keeps reg_off offset stable */ + __u8 reserved; /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */ @@ -52,6 +70,10 @@ struct __bpf_usdt_arg_spec { char arg_bitshift; }; +/* Helpers to (un)pack SIB extras from idx_packed without relying on bitfields. */ +#define USDT_IDX_OFF(packed) ((packed) >> 4) +#define USDT_IDX_SCALE_SHIFT(packed) ((packed) & 0x000f) + /* should match USDT_MAX_ARG_CNT in usdt.c exactly */ #define BPF_USDT_MAX_ARG_CNT 12 struct __bpf_usdt_spec { @@ -149,8 +171,9 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec; - unsigned long val; + unsigned long val, idx; int err, spec_id; + int idx_off = 0, scale = 0; *res = 0; @@ -202,6 +225,33 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) return err; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ val >>= arg_spec->arg_bitshift; +#endif + break; + case BPF_USDT_ARG_SIB: + /* Arg is in memory addressed by SIB (Scale-Index-Base) mode + * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). Register + * is identified like with BPF_USDT_ARG_SIB case, the offset + * is in arg_spec->val_off, the scale factor is in arg_spec->scale. + * Firstly, we fetch the base register contents and the index + * register contents from pt_regs. Secondly, we multiply the + * index register contents by the scale factor, then add the + * base address and the offset to get the final address. Finally, + * we do another user-space probe read to fetch argument value + * itself. + */ + idx_off = USDT_IDX_OFF(arg_spec->idx_packed); + scale = 1UL << USDT_IDX_SCALE_SHIFT(arg_spec->idx_packed); + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + idx_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + idx * scale + arg_spec->val_off); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; #endif break; default: diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 3373b9d45ac4..0a6499f654fc 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -200,12 +200,15 @@ enum usdt_arg_type { USDT_ARG_CONST, USDT_ARG_REG, USDT_ARG_REG_DEREF, + USDT_ARG_SIB, }; /* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ struct usdt_arg_spec { __u64 val_off; - enum usdt_arg_type arg_type; + __u8 arg_type; /* enum value stored as u8 */ + __u16 idx_packed; /* [15:4]=idx_reg_off, [3:0]=scale_shift */ + __u8 reserved; /* keep reg_off offset stable */ short reg_off; bool arg_signed; char arg_bitshift; @@ -214,6 +217,10 @@ struct usdt_arg_spec { /* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ #define USDT_MAX_ARG_CNT 12 +/* Helpers to (un)pack SIB extras from idx_packed without relying on bitfields. */ +#define BPF_USDT_IDX_PACK(idx_off, scale_shift) \ + ((__u16)(((__u16)((idx_off) & 0x0fff)) << 4) | (__u16)((scale_shift) & 0x000f)) + /* should match struct __bpf_usdt_spec from usdt.bpf.h */ struct usdt_spec { struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; @@ -1283,11 +1290,54 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { - char reg_name[16]; - int len, reg_off; - long off; + char reg_name[16] = {0}, idx_reg_name[16] = {0}; + int len, reg_off, idx_reg_off, scale = 1; + long off = 0; + __u16 scale_shift; + + if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 || + sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, &off, reg_name, idx_reg_name, &len) == 4 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, reg_name, idx_reg_name, &len) == 3 + ) { + /* + * Scale Index Base case: + * 1@-96(%rbp,%rax,8) + * 1@(%rbp,%rax,8) + * 1@-96(%rbp,%rax) + * 1@(%rbp,%rax) + */ + arg->arg_type = USDT_ARG_SIB; + arg->val_off = off; - if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) { + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + + idx_reg_off = calc_pt_regs_off(idx_reg_name); + if (idx_reg_off < 0) + return idx_reg_off; + /* pack idx_reg_off and scale shift (scale in {1,2,4,8}) */ + if (scale == 1) + scale_shift = 0; + else if (scale == 2) + scale_shift = 1; + else if (scale == 4) + scale_shift = 2; + else if (scale == 8) + scale_shift = 3; + else { + pr_warn("usdt: invalid SIB scale %d, expected 1,2,4,8; defaulting to 1\n", scale); + return -EINVAL; + } + arg->idx_packed = BPF_USDT_IDX_PACK(idx_reg_off, scale_shift); + } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", + arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1306,6 +1356,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; + /* register read has no memory offset */ arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 77794efc020e..d23a2f1b39ec 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -760,6 +760,7 @@ TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built) TRUNNER_BPF_CFLAGS := $(eval $(call DEFINE_TEST_RUNNER,test_maps)) + # Define test_verifier test runner. # It is much simpler than test_maps/test_progs and sufficiently different from # them (e.g., test.h is using completely pattern), that it's worth just diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c new file mode 100644 index 000000000000..e46d5743ad24 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Jiawei Zhao . */ +#include + +#include "../sdt.h" +#include "test_usdt_o2.skel.h" + +#if defined(__GNUC__) && !defined(__clang__) +__attribute__((optimize("O2"))) +#endif + +#define test_value 0xFEDCBA9876543210ULL +#define SEC(name) __attribute__((section(name), used)) + +int lets_test_this(int); +static volatile __u64 array[1] = {test_value}; + +static noinline void trigger_func(void) +{ +#if defined(__x86_64__) || defined(__i386__) + /* Base address + offset + (index * scale) */ + /* Force SIB addressing with inline assembly */ + const __u64 *base; + __u32 idx; + /* binding base to %rdx and idx to %rax */ + asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory"); + STAP_PROBE1(test, usdt1, base[idx]); +#else + STAP_PROBE1(test, usdt1, array[0]); +#endif +} + +static void basic_sib_usdt(void) +{ + LIBBPF_OPTS(bpf_usdt_opts, opts); + struct test_usdt_o2 *skel; + struct test_usdt_o2__bss *bss; + int err; + + skel = test_usdt_o2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = test_usdt_o2__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* usdt1 won't be auto-attached */ + opts.usdt_cookie = 0xcafedeadbeeffeed; + skel->links.usdt1 = bpf_program__attach_usdt(skel->progs.usdt1, + 0 /*self*/, "/proc/self/exe", + "test", "usdt1", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt1, "usdt1_link")) + goto cleanup; + + trigger_func(); + + ASSERT_EQ(bss->usdt1_called, 1, "usdt1_called"); + ASSERT_EQ(bss->usdt1_cookie, 0xcafedeadbeeffeed, "usdt1_cookie"); + ASSERT_EQ(bss->usdt1_arg_cnt, 1, "usdt1_arg_cnt"); + ASSERT_EQ(bss->usdt1_arg, test_value, "usdt1_arg"); + ASSERT_EQ(bss->usdt1_arg_ret, 0, "usdt1_arg_ret"); + ASSERT_EQ(bss->usdt1_arg_size, sizeof(array[0]), "usdt1_arg_size"); + +cleanup: + test_usdt_o2__destroy(skel); +} + +void test_usdt_o2(void) +{ +#if !defined(__x86_64__) && !defined(__i386__) + test__skip(); + return; +#endif + basic_sib_usdt(); +} diff --git a/tools/testing/selftests/bpf/progs/test_usdt_o2.c b/tools/testing/selftests/bpf/progs/test_usdt_o2.c new file mode 100644 index 000000000000..14602aa54578 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_usdt_o2.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include + +int my_pid; + +int usdt1_called; +u64 usdt1_cookie; +int usdt1_arg_cnt; +int usdt1_arg_ret; +u64 usdt1_arg; +int usdt1_arg_size; + +SEC("usdt") +int usdt1(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt1_called, 1); + + usdt1_cookie = bpf_usdt_cookie(ctx); + usdt1_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt1_arg_ret = bpf_usdt_arg(ctx, 0, &tmp); + usdt1_arg = (u64)tmp; + usdt1_arg_size = bpf_usdt_arg_size(ctx, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL";