Skip to content

Commit 028aaa1

Browse files
jfernandezdanielocfb
authored andcommitted
Add user ring buffer support to libbpf-rs
This change adds support for user ring buffers to libbpf-rs. The user ring buffer is a type of BPF map that allows user space to write data to a ring buffer that can be read by a BPF program. The motivation for this change is to improve the performance of the `scx_rustland` scheduler in the sched_ext project. We have verified the change by running scx_rustland with the new user ring buffer support. The `UserRingBuffer` struct is a wrapper around the `Map` type and provides a new set of functions for interacting with the ring buffer. The API closely resembles the libbpf API for user ring buffers. The struct was intentionally made !Send to avoid introducing locking overhead unless necessary. It is expected that the user will handle synchronization if needed. The `UserRingBufferSample` struct contains a mutable reference to the sample in the ring buffer memory. It is the interface for writing data to the ring buffer. To write to the sample, dereference with `as_mut()` to get a mutable reference to the raw byte slice. The `user_ring_buffer__discard` function was not exposed because the implementation handles this internally to simplify the API. The `user_ring_buffer__reserve_blocking` function is not yet implemented and will be added in the future. Link: https://lore.kernel.org/bpf/[email protected]/ Link: https://github.com/sched-ext/scx/tree/main/scheds/rust/scx_rustland Signed-off-by: Jose Fernandez <[email protected]> Reviewed-by: Andrea Righi <[email protected]>
1 parent 0ab1c69 commit 028aaa1

File tree

7 files changed

+317
-1
lines changed

7 files changed

+317
-1
lines changed

libbpf-rs/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ vendored = ["libbpf-sys/vendored"]
2525

2626
[dependencies]
2727
bitflags = "2.0"
28-
libbpf-sys = { version = "1.4", default-features = false }
28+
libbpf-sys = { version = "1.4.1", default-features = false }
2929
libc = "0.2"
3030
num_enum = "0.5"
3131
strum_macros = "0.24"

libbpf-rs/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ pub mod query;
9292
mod ringbuf;
9393
mod skeleton;
9494
mod tc;
95+
mod user_ringbuf;
9596
mod util;
9697
mod xdp;
9798

@@ -144,6 +145,7 @@ pub use crate::tc::TC_H_INGRESS;
144145
pub use crate::tc::TC_H_MIN_EGRESS;
145146
pub use crate::tc::TC_H_MIN_INGRESS;
146147
pub use crate::tc::TC_INGRESS;
148+
pub use crate::user_ringbuf::UserRingBuffer;
147149
pub use crate::util::num_possible_cpus;
148150
pub use crate::xdp::Xdp;
149151
pub use crate::xdp::XdpFlags;

libbpf-rs/src/map.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,7 @@ pub enum MapType {
10221022
InodeStorage,
10231023
TaskStorage,
10241024
BloomFilter,
1025+
UserRingBuf,
10251026
/// We choose to specify our own "unknown" type here b/c it's really up to the kernel
10261027
/// to decide if it wants to reject the map. If it accepts it, it just means whoever
10271028
/// using this library is a bit out of date.

libbpf-rs/src/user_ringbuf.rs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
use libc::E2BIG;
2+
use libc::ENOSPC;
3+
use std::ops::Deref;
4+
use std::ops::DerefMut;
5+
use std::os::fd::AsFd;
6+
use std::os::fd::AsRawFd;
7+
use std::os::raw::c_uint;
8+
use std::os::raw::c_void;
9+
use std::ptr::null_mut;
10+
use std::ptr::NonNull;
11+
use std::slice::from_raw_parts;
12+
use std::slice::from_raw_parts_mut;
13+
14+
use crate::AsRawLibbpf;
15+
use crate::Error;
16+
use crate::MapHandle;
17+
use crate::MapType;
18+
use crate::Result;
19+
20+
/// A mutable reference to sample from a [`UserRingBuffer`].
21+
///
22+
/// To write to the sample, dereference with `as_mut()` to get a mutable
23+
/// reference to the raw byte slice. You may find libraries such as
24+
/// [`plain`](https://crates.io/crates/plain) helpful to convert between raw
25+
/// bytes and structs.
26+
#[derive(Debug)]
27+
pub struct UserRingBufferSample<'slf> {
28+
// A pointer to an 8-byte aligned reserved region of the user ring buffer
29+
ptr: NonNull<c_void>,
30+
31+
// The size of the sample in bytes.
32+
size: usize,
33+
34+
// Reference to the owning ring buffer. This is used to discard the sample
35+
// if it is not submitted before being dropped.
36+
rb: &'slf UserRingBuffer,
37+
38+
// Track whether the sample has been submitted.
39+
submitted: bool,
40+
}
41+
42+
impl Deref for UserRingBufferSample<'_> {
43+
type Target = [u8];
44+
45+
fn deref(&self) -> &Self::Target {
46+
unsafe { from_raw_parts(self.ptr.as_ptr() as *const u8, self.size) }
47+
}
48+
}
49+
50+
impl DerefMut for UserRingBufferSample<'_> {
51+
fn deref_mut(&mut self) -> &mut Self::Target {
52+
unsafe { from_raw_parts_mut(self.ptr.as_ptr() as *mut u8, self.size) }
53+
}
54+
}
55+
56+
impl Drop for UserRingBufferSample<'_> {
57+
fn drop(&mut self) {
58+
// If the sample has not been submitted, explicitly discard it.
59+
// This is necessary to avoid leaking ring buffer memory.
60+
if !self.submitted {
61+
unsafe {
62+
libbpf_sys::user_ring_buffer__discard(self.rb.ptr.as_ptr(), self.ptr.as_ptr());
63+
}
64+
}
65+
}
66+
}
67+
68+
/// Represents a user ring buffer. This is a special kind of map that is used to
69+
/// transfer data between user space and kernel space.
70+
#[derive(Debug)]
71+
pub struct UserRingBuffer {
72+
// A non-null pointer to the underlying user ring buffer.
73+
ptr: NonNull<libbpf_sys::user_ring_buffer>,
74+
}
75+
76+
impl UserRingBuffer {
77+
/// Create a new user ring buffer from a map.
78+
///
79+
/// # Errors
80+
/// * If the map is not a user ring buffer.
81+
/// * If the underlying libbpf function fails.
82+
pub fn new(map: &MapHandle) -> Result<Self> {
83+
if map.map_type() != MapType::UserRingBuf {
84+
return Err(Error::with_invalid_data("must use a UserRingBuf map"));
85+
}
86+
87+
let fd = map.as_fd();
88+
let raw_ptr = unsafe { libbpf_sys::user_ring_buffer__new(fd.as_raw_fd(), null_mut()) };
89+
90+
let ptr = NonNull::new(raw_ptr).ok_or_else(|| {
91+
// Safely get the last OS error after a failed call to user_ring_buffer__new
92+
let errno = unsafe { *libc::__errno_location() };
93+
Error::from_raw_os_error(errno)
94+
})?;
95+
96+
Ok(UserRingBuffer { ptr })
97+
}
98+
99+
/// Reserve a sample in the user ring buffer.
100+
///
101+
/// Returns a [`UserRingBufferSample`](UserRingBufferSample<'slf>)
102+
/// that contains a mutable reference to sample that can be written to.
103+
/// The sample must be submitted via [`UserRingBuffer::submit`] before it is
104+
/// dropped.
105+
///
106+
/// # Parameters
107+
/// * `size` - The size of the sample in bytes.
108+
///
109+
/// This function is *not* thread-safe. It is necessary to synchronize
110+
/// amongst multiple producers when invoking this function.
111+
pub fn reserve(&self, size: usize) -> Result<UserRingBufferSample<'_>> {
112+
let sample_ptr =
113+
unsafe { libbpf_sys::user_ring_buffer__reserve(self.ptr.as_ptr(), size as c_uint) };
114+
115+
let ptr = NonNull::new(sample_ptr).ok_or_else(|| {
116+
// Fetch the current value of errno to determine the type of error.
117+
let errno = unsafe { *libc::__errno_location() };
118+
match errno {
119+
E2BIG => Error::with_invalid_data("requested size is too large"),
120+
ENOSPC => Error::with_invalid_data("not enough space in the ring buffer"),
121+
_ => Error::from_raw_os_error(errno),
122+
}
123+
})?;
124+
125+
Ok(UserRingBufferSample {
126+
ptr,
127+
size,
128+
submitted: false,
129+
rb: self,
130+
})
131+
}
132+
133+
/// Submit a sample to the user ring buffer.
134+
///
135+
/// This function takes ownership of the sample and submits it to the ring
136+
/// buffer. After submission, the consumer will be able to read the sample
137+
/// from the ring buffer.
138+
///
139+
/// This function is thread-safe. It is *not* necessary to synchronize
140+
/// amongst multiple producers when invoking this function.
141+
pub fn submit(&self, mut sample: UserRingBufferSample<'_>) -> Result<()> {
142+
unsafe {
143+
libbpf_sys::user_ring_buffer__submit(self.ptr.as_ptr(), sample.ptr.as_ptr());
144+
}
145+
146+
sample.submitted = true;
147+
148+
// The libbpf API does not return an error code, so we cannot determine
149+
// if the submission was successful. Return a `Result` to enable future
150+
// validation while maintaing backwards compatibility.
151+
Ok(())
152+
}
153+
}
154+
155+
impl AsRawLibbpf for UserRingBuffer {
156+
type LibbpfType = libbpf_sys::user_ring_buffer;
157+
158+
/// Retrieve the underlying [`libbpf_sys::user_ring_buffer`].
159+
fn as_libbpf_object(&self) -> NonNull<Self::LibbpfType> {
160+
self.ptr
161+
}
162+
}
163+
164+
impl Drop for UserRingBuffer {
165+
fn drop(&mut self) {
166+
unsafe {
167+
libbpf_sys::user_ring_buffer__free(self.ptr.as_ptr());
168+
}
169+
}
170+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
// Copyright (c) 2024 Jose Fernandez
3+
4+
#include "vmlinux.h"
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
8+
struct {
9+
__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
10+
__uint(max_entries, 4096 /* one page */);
11+
} user_ringbuf SEC(".maps");
12+
13+
struct {
14+
__uint(type, BPF_MAP_TYPE_HASH);
15+
__type(key, u32);
16+
__type(value, u32);
17+
__uint(max_entries, 100);
18+
} samples SEC(".maps");
19+
20+
struct my_struct_t {
21+
u32 key;
22+
u32 value;
23+
};
24+
25+
static long user_ringbuf_callback(struct bpf_dynptr *dynptr, void *context)
26+
{
27+
const struct my_struct_t *data;
28+
29+
data = bpf_dynptr_data(dynptr, 0, sizeof(*data));
30+
if (!data)
31+
return 0;
32+
33+
bpf_map_update_elem(&samples, &data->key, &data->value, BPF_ANY);
34+
35+
return 0;
36+
}
37+
38+
SEC("tp/syscalls/sys_enter_getpid")
39+
int handle__sys_enter_getpid(void *ctx)
40+
{
41+
bpf_user_ringbuf_drain(&user_ringbuf, user_ringbuf_callback, NULL, 0);
42+
43+
return 0;
44+
}
45+
46+
char LICENSE[] SEC("license") = "GPL";
7.4 KB
Binary file not shown.

libbpf-rs/tests/test.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use libbpf_rs::ProgramType;
3737
use libbpf_rs::TracepointOpts;
3838
use libbpf_rs::UprobeOpts;
3939
use libbpf_rs::UsdtOpts;
40+
use libbpf_rs::UserRingBuffer;
4041
use plain::Plain;
4142
use probe::probe;
4243
use scopeguard::defer;
@@ -1244,6 +1245,102 @@ fn test_sudo_object_ringbuf_with_closed_map() {
12441245
});
12451246
}
12461247

1248+
#[test]
1249+
fn test_sudo_object_user_ringbuf() {
1250+
#[repr(C)]
1251+
struct MyStruct {
1252+
key: u32,
1253+
value: u32,
1254+
}
1255+
1256+
unsafe impl Plain for MyStruct {}
1257+
1258+
bump_rlimit_mlock();
1259+
1260+
let mut obj = get_test_object("user_ringbuf.bpf.o");
1261+
let prog = obj
1262+
.prog_mut("handle__sys_enter_getpid")
1263+
.expect("failed to find program");
1264+
let _link = prog.attach().expect("failed to attach prog");
1265+
let urb_map = obj
1266+
.map("user_ringbuf")
1267+
.expect("failed to find user ringbuf map");
1268+
let user_ringbuf = UserRingBuffer::new(urb_map).expect("failed to create user ringbuf");
1269+
let mut urb_sample = user_ringbuf
1270+
.reserve(size_of::<MyStruct>())
1271+
.expect("failed to reserve space");
1272+
let bytes = urb_sample.as_mut();
1273+
let my_struct = plain::from_mut_bytes::<MyStruct>(bytes).expect("failed to convert bytes");
1274+
my_struct.key = 42;
1275+
my_struct.value = 1337;
1276+
user_ringbuf
1277+
.submit(urb_sample)
1278+
.expect("failed to submit sample");
1279+
1280+
// Trigger BPF program.
1281+
let _pid = unsafe { libc::getpid() };
1282+
1283+
// At this point, the BPF program should have run and consumed the sample in
1284+
// the user ring buffer, and stored the key/value in the samples map.
1285+
let samples_map = obj.map("samples").expect("failed to find map");
1286+
let key: u32 = 42;
1287+
let value: u32 = 1337;
1288+
let res = samples_map
1289+
.lookup(&key.to_ne_bytes(), MapFlags::ANY)
1290+
.expect("failed to lookup")
1291+
.expect("failed to find value for key");
1292+
1293+
// The value in the samples map should be the same as the value we submitted
1294+
assert_eq!(res.len(), size_of::<u32>());
1295+
let mut array = [0; size_of::<u32>()];
1296+
array.copy_from_slice(&res[..]);
1297+
assert_eq!(u32::from_ne_bytes(array), value);
1298+
}
1299+
1300+
#[test]
1301+
fn test_sudo_object_user_ringbuf_reservation_too_big() {
1302+
bump_rlimit_mlock();
1303+
1304+
let mut obj = get_test_object("user_ringbuf.bpf.o");
1305+
let prog = obj
1306+
.prog_mut("handle__sys_enter_getpid")
1307+
.expect("failed to find program");
1308+
let _link = prog.attach().expect("failed to attach prog");
1309+
let urb_map = obj
1310+
.map("user_ringbuf")
1311+
.expect("failed to find user ringbuf map");
1312+
let user_ringbuf = UserRingBuffer::new(urb_map).expect("failed to create user ringbuf");
1313+
let err = user_ringbuf.reserve(1024 * 1024).unwrap_err();
1314+
assert!(
1315+
err.to_string().contains("requested size is too large"),
1316+
"{err:#}"
1317+
);
1318+
}
1319+
1320+
#[test]
1321+
fn test_sudo_object_user_ringbuf_not_enough_space() {
1322+
bump_rlimit_mlock();
1323+
1324+
let mut obj = get_test_object("user_ringbuf.bpf.o");
1325+
let prog = obj
1326+
.prog_mut("handle__sys_enter_getpid")
1327+
.expect("failed to find program");
1328+
let _link = prog.attach().expect("failed to attach prog");
1329+
let urb_map = obj
1330+
.map("user_ringbuf")
1331+
.expect("failed to find user ringbuf map");
1332+
let user_ringbuf = UserRingBuffer::new(urb_map).expect("failed to create user ringbuf");
1333+
let _ = user_ringbuf
1334+
.reserve(1024 * 3)
1335+
.expect("failed to reserve space");
1336+
let err = user_ringbuf.reserve(1024 * 3).unwrap_err();
1337+
assert!(
1338+
err.to_string()
1339+
.contains("not enough space in the ring buffer"),
1340+
"{err:#}"
1341+
);
1342+
}
1343+
12471344
#[test]
12481345
fn test_sudo_object_task_iter() {
12491346
bump_rlimit_mlock();

0 commit comments

Comments
 (0)