diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 233de8677382e..d3b2fd2ae5276 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1430,6 +1430,9 @@ enum { /* Do not translate kernel bpf_arena pointers to user pointers */ BPF_F_NO_USER_CONV = (1U << 18), + +/* bpf ringbuf works in overwrite mode? */ + BPF_F_OVERWRITE = (1U << 19), }; /* Flags for BPF_PROG_QUERY. */ @@ -6215,6 +6218,7 @@ enum { BPF_RB_RING_SIZE = 1, BPF_RB_CONS_POS = 2, BPF_RB_PROD_POS = 3, + BPF_RB_OVER_POS = 4, }; /* BPF ring buffer constants */ diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 719d73299397b..6ca41d01f1873 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -13,7 +13,7 @@ #include #include -#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) +#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE | BPF_F_OVERWRITE) /* non-mmap()'able part of bpf_ringbuf (everything up to consumer page) */ #define RINGBUF_PGOFF \ @@ -27,7 +27,8 @@ struct bpf_ringbuf { wait_queue_head_t waitq; struct irq_work work; - u64 mask; + u64 mask:48; + u64 overwrite_mode:1; struct page **pages; int nr_pages; rqspinlock_t spinlock ____cacheline_aligned_in_smp; @@ -72,6 +73,7 @@ struct bpf_ringbuf { */ unsigned long consumer_pos __aligned(PAGE_SIZE); unsigned long producer_pos __aligned(PAGE_SIZE); + unsigned long overwrite_pos; /* to be overwritten in overwrite mode */ unsigned long pending_pos; char data[] __aligned(PAGE_SIZE); }; @@ -166,7 +168,8 @@ static void bpf_ringbuf_notify(struct irq_work *work) * considering that the maximum value of data_sz is (4GB - 1), there * will be no overflow, so just note the size limit in the comments. */ -static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) +static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node, + int overwrite_mode) { struct bpf_ringbuf *rb; @@ -183,17 +186,25 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) rb->consumer_pos = 0; rb->producer_pos = 0; rb->pending_pos = 0; + rb->overwrite_mode = overwrite_mode; return rb; } static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) { + int overwrite_mode = 0; struct bpf_ringbuf_map *rb_map; if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); + if (attr->map_flags & BPF_F_OVERWRITE) { + if (attr->map_type == BPF_MAP_TYPE_USER_RINGBUF) + return ERR_PTR(-EINVAL); + overwrite_mode = 1; + } + if (attr->key_size || attr->value_size || !is_power_of_2(attr->max_entries) || !PAGE_ALIGNED(attr->max_entries)) @@ -205,7 +216,8 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&rb_map->map, attr); - rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node); + rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node, + overwrite_mode); if (!rb_map->rb) { bpf_map_area_free(rb_map); return ERR_PTR(-ENOMEM); @@ -295,11 +307,16 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb) { - unsigned long cons_pos, prod_pos; + unsigned long cons_pos, prod_pos, over_pos; cons_pos = smp_load_acquire(&rb->consumer_pos); prod_pos = smp_load_acquire(&rb->producer_pos); - return prod_pos - cons_pos; + + if (likely(!rb->overwrite_mode)) + return prod_pos - cons_pos; + + over_pos = READ_ONCE(rb->overwrite_pos); + return min(prod_pos - max(cons_pos, over_pos), rb->mask + 1); } static u32 ringbuf_total_data_sz(const struct bpf_ringbuf *rb) @@ -402,11 +419,43 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr) return (void*)((addr & PAGE_MASK) - off); } + +static bool bpf_ringbuf_has_space(const struct bpf_ringbuf *rb, + unsigned long new_prod_pos, + unsigned long cons_pos, + unsigned long pend_pos) +{ + /* no space if oldest not yet committed record until the newest + * record span more than (ringbuf_size - 1) + */ + if (new_prod_pos - pend_pos > rb->mask) + return false; + + /* ok, we have space in ovewrite mode */ + if (unlikely(rb->overwrite_mode)) + return true; + + /* no space if producer position advances more than (ringbuf_size - 1) + * ahead than consumer position when not in overwrite mode + */ + if (new_prod_pos - cons_pos > rb->mask) + return false; + + return true; +} + +static u32 ringbuf_round_up_hdr_len(u32 hdr_len) +{ + hdr_len &= ~BPF_RINGBUF_DISCARD_BIT; + return round_up(hdr_len + BPF_RINGBUF_HDR_SZ, 8); +} + static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) { - unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags; + unsigned long flags; struct bpf_ringbuf_hdr *hdr; - u32 len, pg_off, tmp_size, hdr_len; + u32 len, pg_off, hdr_len; + unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, over_pos; if (unlikely(size > RINGBUF_MAX_RECORD_SZ)) return NULL; @@ -429,24 +478,39 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) hdr_len = READ_ONCE(hdr->len); if (hdr_len & BPF_RINGBUF_BUSY_BIT) break; - tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT; - tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8); - pend_pos += tmp_size; + pend_pos += ringbuf_round_up_hdr_len(hdr_len); } rb->pending_pos = pend_pos; - /* check for out of ringbuf space: - * - by ensuring producer position doesn't advance more than - * (ringbuf_size - 1) ahead - * - by ensuring oldest not yet committed record until newest - * record does not span more than (ringbuf_size - 1) - */ - if (new_prod_pos - cons_pos > rb->mask || - new_prod_pos - pend_pos > rb->mask) { + if (!bpf_ringbuf_has_space(rb, new_prod_pos, cons_pos, pend_pos)) { raw_res_spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } + /* In overwrite mode, move overwrite_pos to the next record to be + * overwritten if the ring buffer is full + */ + if (unlikely(rb->overwrite_mode)) { + over_pos = rb->overwrite_pos; + while (new_prod_pos - over_pos > rb->mask) { + hdr = (void *)rb->data + (over_pos & rb->mask); + hdr_len = READ_ONCE(hdr->len); + /* since pending_pos is the first record with BUSY + * bit set and overwrite_pos is never bigger than + * pending_pos, no need to check BUSY bit here. + */ + over_pos += ringbuf_round_up_hdr_len(hdr_len); + } + /* smp_store_release(&rb->producer_pos, new_prod_pos) at + * the end of the function ensures that when consumer sees + * the updated rb->producer_pos, it always sees the updated + * rb->overwrite_pos, so when consumer reads overwrite_pos + * after smp_load_acquire(r->producer_pos), the overwrite_pos + * will always be valid. + */ + WRITE_ONCE(rb->overwrite_pos, over_pos); + } + hdr = (void *)rb->data + (prod_pos & rb->mask); pg_off = bpf_ringbuf_rec_pg_off(rb, hdr); hdr->len = size | BPF_RINGBUF_BUSY_BIT; @@ -479,7 +543,50 @@ const struct bpf_func_proto bpf_ringbuf_reserve_proto = { .arg3_type = ARG_ANYTHING, }; -static void bpf_ringbuf_commit(void *sample, u64 flags, bool discard) +static __always_inline +bool ringbuf_should_wakeup(const struct bpf_ringbuf *rb, + unsigned long rec_pos, + unsigned long cons_pos, + u32 len, u64 flags) +{ + unsigned long rec_end; + + if (flags & BPF_RB_FORCE_WAKEUP) + return true; + + if (flags & BPF_RB_NO_WAKEUP) + return false; + + /* for non-overwrite mode, if consumer caught up and is waiting for + * our record, notify about new data availability + */ + if (likely(!rb->overwrite_mode)) + return cons_pos == rec_pos; + + /* for overwrite mode, to give the consumer a chance to catch up + * before being overwritten, wake up consumer every half a round + * ahead. + */ + rec_end = rec_pos + ringbuf_round_up_hdr_len(len); + + cons_pos &= (rb->mask >> 1); + rec_pos &= (rb->mask >> 1); + rec_end &= (rb->mask >> 1); + + if (cons_pos == rec_pos) + return true; + + if (rec_pos < cons_pos && cons_pos < rec_end) + return true; + + if (rec_end < rec_pos && (cons_pos > rec_pos || cons_pos < rec_end)) + return true; + + return false; +} + +static __always_inline +void bpf_ringbuf_commit(void *sample, u64 flags, bool discard) { unsigned long rec_pos, cons_pos; struct bpf_ringbuf_hdr *hdr; @@ -495,15 +602,10 @@ static void bpf_ringbuf_commit(void *sample, u64 flags, bool discard) /* update record header with correct final size prefix */ xchg(&hdr->len, new_len); - /* if consumer caught up and is waiting for our record, notify about - * new data availability - */ rec_pos = (void *)hdr - (void *)rb->data; cons_pos = smp_load_acquire(&rb->consumer_pos) & rb->mask; - if (flags & BPF_RB_FORCE_WAKEUP) - irq_work_queue(&rb->work); - else if (cons_pos == rec_pos && !(flags & BPF_RB_NO_WAKEUP)) + if (ringbuf_should_wakeup(rb, rec_pos, cons_pos, new_len, flags)) irq_work_queue(&rb->work); } @@ -576,6 +678,8 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags) return smp_load_acquire(&rb->consumer_pos); case BPF_RB_PROD_POS: return smp_load_acquire(&rb->producer_pos); + case BPF_RB_OVER_POS: + return READ_ONCE(rb->overwrite_pos); default: return 0; } @@ -749,6 +853,9 @@ BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map, rb = container_of(map, struct bpf_ringbuf_map, map)->rb; + if (unlikely(rb->overwrite_mode)) + return -EOPNOTSUPP; + /* If another consumer is already consuming a sample, wait for them to finish. */ if (!atomic_try_cmpxchg(&rb->busy, &busy, 1)) return -EBUSY; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 233de8677382e..d3b2fd2ae5276 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1430,6 +1430,9 @@ enum { /* Do not translate kernel bpf_arena pointers to user pointers */ BPF_F_NO_USER_CONV = (1U << 18), + +/* bpf ringbuf works in overwrite mode? */ + BPF_F_OVERWRITE = (1U << 19), }; /* Flags for BPF_PROG_QUERY. */ @@ -6215,6 +6218,7 @@ enum { BPF_RB_RING_SIZE = 1, BPF_RB_CONS_POS = 2, BPF_RB_PROD_POS = 3, + BPF_RB_OVER_POS = 4, }; /* BPF ring buffer constants */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 9702b70da444c..9c072af675ff3 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -27,10 +27,13 @@ struct ring { ring_buffer_sample_fn sample_cb; void *ctx; void *data; + void *read_buffer; unsigned long *consumer_pos; unsigned long *producer_pos; + unsigned long *overwrite_pos; unsigned long mask; int map_fd; + bool overwrite_mode; }; struct ring_buffer { @@ -69,6 +72,9 @@ static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) r->producer_pos = NULL; } + if (r->read_buffer) + free(r->read_buffer); + free(r); } @@ -119,6 +125,14 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->sample_cb = sample_cb; r->ctx = ctx; r->mask = info.max_entries - 1; + r->overwrite_mode = info.map_flags & BPF_F_OVERWRITE; + if (unlikely(r->overwrite_mode)) { + r->read_buffer = malloc(info.max_entries); + if (!r->read_buffer) { + err = -ENOMEM; + goto err_out; + } + } /* Map writable consumer page */ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); @@ -148,6 +162,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, goto err_out; } r->producer_pos = tmp; + r->overwrite_pos = r->producer_pos + 1; /* overwrite_pos is next to producer_pos */ r->data = tmp + rb->page_size; e = &rb->events[rb->ring_cnt]; @@ -232,7 +247,7 @@ static inline int roundup_len(__u32 len) return (len + 7) / 8 * 8; } -static int64_t ringbuf_process_ring(struct ring *r, size_t n) +static int64_t ringbuf_process_normal_ring(struct ring *r, size_t n) { int *len_ptr, len, err; /* 64-bit to avoid overflow in case of extreme application behavior */ @@ -278,6 +293,92 @@ static int64_t ringbuf_process_ring(struct ring *r, size_t n) return cnt; } +static int64_t ringbuf_process_overwrite_ring(struct ring *r, size_t n) +{ + + int err; + uint32_t *len_ptr, len; + /* 64-bit to avoid overflow in case of extreme application behavior */ + int64_t cnt = 0; + size_t size, offset; + unsigned long cons_pos, prod_pos, over_pos, tmp_pos; + bool got_new_data; + void *sample; + bool copied; + + size = r->mask + 1; + + cons_pos = smp_load_acquire(r->consumer_pos); + do { + got_new_data = false; + + /* grab a copy of data */ + prod_pos = smp_load_acquire(r->producer_pos); + do { + over_pos = READ_ONCE(*r->overwrite_pos); + /* prod_pos may be outdated now */ + if (over_pos < prod_pos) { + tmp_pos = max(cons_pos, over_pos); + /* smp_load_acquire(r->producer_pos) before + * READ_ONCE(*r->overwrite_pos) ensures that + * over_pos + r->mask < prod_pos never occurs, + * so size is never larger than r->mask + */ + size = prod_pos - tmp_pos; + if (!size) + goto done; + memcpy(r->read_buffer, + r->data + (tmp_pos & r->mask), size); + copied = true; + } else { + copied = false; + } + prod_pos = smp_load_acquire(r->producer_pos); + /* retry if data is overwritten by producer */ + } while (!copied || prod_pos - tmp_pos > r->mask); + + cons_pos = tmp_pos; + + for (offset = 0; offset < size; offset += roundup_len(len)) { + len_ptr = r->read_buffer + (offset & r->mask); + len = *len_ptr; + + if (len & BPF_RINGBUF_BUSY_BIT) + goto done; + + got_new_data = true; + cons_pos += roundup_len(len); + + if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) { + sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ; + err = r->sample_cb(r->ctx, sample, len); + if (err < 0) { + /* update consumer pos and bail out */ + smp_store_release(r->consumer_pos, + cons_pos); + return err; + } + cnt++; + } + + if (cnt >= n) + goto done; + } + } while (got_new_data); + +done: + smp_store_release(r->consumer_pos, cons_pos); + return cnt; +} + +static int64_t ringbuf_process_ring(struct ring *r, size_t n) +{ + if (likely(!r->overwrite_mode)) + return ringbuf_process_normal_ring(r, n); + else + return ringbuf_process_overwrite_ring(r, n); +} + /* Consume available ring buffer(s) data without event polling, up to n * records. * diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 77794efc020ea..ee5df3c814388 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -499,7 +499,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \ + test_ringbuf_overwrite.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index e1ee979e6acc6..6fdfc61c721be 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -19,6 +19,7 @@ static struct { int ringbuf_sz; /* per-ringbuf, in bytes */ bool ringbuf_use_output; /* use slower output API */ int perfbuf_sz; /* per-CPU size, in pages */ + bool overwrite_mode; } args = { .back2back = false, .batch_cnt = 500, @@ -27,6 +28,7 @@ static struct { .ringbuf_sz = 512 * 1024, .ringbuf_use_output = false, .perfbuf_sz = 128, + .overwrite_mode = false, }; enum { @@ -35,6 +37,7 @@ enum { ARG_RB_BATCH_CNT = 2002, ARG_RB_SAMPLED = 2003, ARG_RB_SAMPLE_RATE = 2004, + ARG_RB_OVERWRITE = 2005, }; static const struct argp_option opts[] = { @@ -43,6 +46,7 @@ static const struct argp_option opts[] = { { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, + { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "overwrite mode"}, {}, }; @@ -72,6 +76,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case ARG_RB_OVERWRITE: + args.overwrite_mode = true; + break; default: return ARGP_ERR_UNKNOWN; } @@ -104,6 +111,11 @@ static void bufs_validate(void) fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n"); exit(1); } + + if (args.overwrite_mode && strcmp(env.bench_name, "rb-libbpf") != 0) { + fprintf(stderr, "rb-overwrite mode only supports rb-libbpf!\n"); + exit(1); + } } static void *bufs_sample_producer(void *input) @@ -134,6 +146,8 @@ static void ringbuf_libbpf_measure(struct bench_res *res) static struct ringbuf_bench *ringbuf_setup_skeleton(void) { + __u32 flags; + struct bpf_map *ringbuf; struct ringbuf_bench *skel; setup_libbpf(); @@ -151,7 +165,13 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void) /* record data + header take 16 bytes */ skel->rodata->wakeup_data_size = args.sample_rate * 16; - bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); + ringbuf = skel->maps.ringbuf; + if (args.overwrite_mode) { + flags = bpf_map__map_flags(ringbuf) | BPF_F_OVERWRITE; + bpf_map__set_map_flags(ringbuf, flags); + } + + bpf_map__set_max_entries(ringbuf, args.ringbuf_sz); if (ringbuf_bench__load(skel)) { fprintf(stderr, "failed to load skeleton\n"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index 91e3567962ffb..4e758bc52b73b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" done +header "Ringbuf, multi-producer contention, overwrite mode" +for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-overwrite --rb-batch-cnt 50 rb-libbpf)" +done diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index d1e4cb28a72c6..205a51c725a75 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -17,6 +17,7 @@ #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" #include "test_ringbuf_write.lskel.h" +#include "test_ringbuf_overwrite.lskel.h" #define EDONE 7777 @@ -497,6 +498,77 @@ static void ringbuf_map_key_subtest(void) test_ringbuf_map_key_lskel__destroy(skel_map_key); } +static void ringbuf_overwrite_mode_subtest(void) +{ + unsigned long size, len1, len2, len3, len4, len5; + unsigned long expect_avail_data, expect_prod_pos, expect_over_pos; + struct test_ringbuf_overwrite_lskel *skel; + int err; + + skel = test_ringbuf_overwrite_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + size = 0x1000; + len1 = 0x800; + len2 = 0x400; + len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3; /* 0x3e8 */ + len4 = len3 - 8; /* 0x3e0 */ + len5 = len3; /* retry with len3 */ + + skel->maps.ringbuf.max_entries = size; + skel->rodata->LEN1 = len1; + skel->rodata->LEN2 = len2; + skel->rodata->LEN3 = len3; + skel->rodata->LEN4 = len4; + skel->rodata->LEN5 = len5; + + skel->bss->pid = getpid(); + + err = test_ringbuf_overwrite_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = test_ringbuf_overwrite_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1"); + ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2"); + ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3"); + ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4"); + ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5"); + + CHECK(skel->bss->ring_size != size, + "check_ring_size", "exp %lu, got %lu\n", + size, skel->bss->ring_size); + + expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ; + CHECK(skel->bss->avail_data != expect_avail_data, + "check_avail_size", "exp %lu, got %lu\n", + expect_avail_data, skel->bss->avail_data); + + CHECK(skel->bss->cons_pos != 0, + "check_cons_pos", "exp 0, got %lu\n", + skel->bss->cons_pos); + + expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ; + CHECK(skel->bss->prod_pos != expect_prod_pos, + "check_prod_pos", "exp %lu, got %lu\n", + expect_prod_pos, skel->bss->prod_pos); + + expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ; + CHECK(skel->bss->over_pos != expect_over_pos, + "check_over_pos", "exp %lu, got %lu\n", + (unsigned long)expect_over_pos, skel->bss->over_pos); + + test_ringbuf_overwrite_lskel__detach(skel); +cleanup: + test_ringbuf_overwrite_lskel__destroy(skel); +} + void test_ringbuf(void) { if (test__start_subtest("ringbuf")) @@ -507,4 +579,6 @@ void test_ringbuf(void) ringbuf_map_key_subtest(); if (test__start_subtest("ringbuf_write")) ringbuf_write_subtest(); + if (test__start_subtest("ringbuf_overwrite_mode")) + ringbuf_overwrite_mode_subtest(); } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c new file mode 100644 index 0000000000000..da89ba12a75c1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ + +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(map_flags, BPF_F_OVERWRITE); +} ringbuf SEC(".maps"); + +int pid; + +const volatile unsigned long LEN1; +const volatile unsigned long LEN2; +const volatile unsigned long LEN3; +const volatile unsigned long LEN4; +const volatile unsigned long LEN5; + +long reserve1_fail = 0; +long reserve2_fail = 0; +long reserve3_fail = 0; +long reserve4_fail = 0; +long reserve5_fail = 0; + +unsigned long avail_data = 0; +unsigned long ring_size = 0; +unsigned long cons_pos = 0; +unsigned long prod_pos = 0; +unsigned long over_pos = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_overwrite_ringbuf(void *ctx) +{ + char *rec1, *rec2, *rec3, *rec4, *rec5; + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid != pid) + return 0; + + rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0); + if (!rec1) { + reserve1_fail = 1; + return 0; + } + + rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0); + if (!rec2) { + bpf_ringbuf_discard(rec1, 0); + reserve2_fail = 1; + return 0; + } + + rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0); + /* expect failure */ + if (!rec3) { + reserve3_fail = 1; + } else { + bpf_ringbuf_discard(rec1, 0); + bpf_ringbuf_discard(rec2, 0); + bpf_ringbuf_discard(rec3, 0); + return 0; + } + + rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0); + if (!rec4) { + reserve4_fail = 1; + bpf_ringbuf_discard(rec1, 0); + bpf_ringbuf_discard(rec2, 0); + return 0; + } + + bpf_ringbuf_submit(rec1, 0); + bpf_ringbuf_submit(rec2, 0); + bpf_ringbuf_submit(rec4, 0); + + rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0); + if (!rec5) { + reserve5_fail = 1; + return 0; + } + + for (int i = 0; i < LEN3; i++) + rec5[i] = 0xdd; + + bpf_ringbuf_submit(rec5, 0); + + ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE); + avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA); + cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS); + prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS); + over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVER_POS); + + return 0; +}