Skip to content

Commit e53a34c

Browse files
committed
Use mio to replace Epoll
Epoll is linux-specific. So we use mio, which is a cross-platform event notification, to replace Epoll. Signed-off-by: Wenyu Huang <[email protected]>
1 parent cfff91e commit e53a34c

File tree

7 files changed

+144
-84
lines changed

7 files changed

+144
-84
lines changed

vhost-user-backend/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### Added
66
### Changed
7+
- [[316]](https://github.com/rust-vmm/vhost/pull/316) Use mio to replace Epoll. Expose event_loop::EventSet.
8+
79
### Deprecated
810
### Fixed
911

vhost-user-backend/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ xen = ["vm-memory/xen", "vhost/xen"]
1313
postcopy = ["vhost/postcopy", "userfaultfd"]
1414

1515
[dependencies]
16+
bitflags = "2.9.1"
1617
libc = "0.2.39"
1718
log = "0.4.17"
19+
mio = { version = "1.0.4", features = ["os-poll", "os-ext"] }
1820
userfaultfd = { version = "0.8.1", optional = true }
1921
vhost = { path = "../vhost", version = "0.14.0", features = ["vhost-user-backend"] }
2022
virtio-bindings = { workspace = true }

vhost-user-backend/src/backend.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ use vhost::vhost_user::message::{
2929
};
3030
use vhost::vhost_user::Backend;
3131
use vm_memory::bitmap::Bitmap;
32-
use vmm_sys_util::epoll::EventSet;
3332
use vmm_sys_util::eventfd::EventFd;
3433

3534
use vhost::vhost_user::GpuBackend;
3635

36+
use crate::EventSet;
37+
3738
use super::vring::VringT;
3839
use super::GM;
3940

@@ -793,7 +794,7 @@ pub mod tests {
793794

794795
let vring = VringRwLock::new(mem, 0x1000).unwrap();
795796
backend
796-
.handle_event(0x1, EventSet::IN, &[vring], 0)
797+
.handle_event(0x1, EventSet::READABLE, &[vring], 0)
797798
.unwrap();
798799

799800
backend.reset_device();

vhost-user-backend/src/event_loop.rs

Lines changed: 130 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,98 @@
33
//
44
// SPDX-License-Identifier: Apache-2.0
55

6+
use std::collections::HashSet;
67
use std::fmt::{Display, Formatter};
78
use std::io::{self, Result};
89
use std::marker::PhantomData;
910
use std::os::unix::io::{AsRawFd, RawFd};
11+
use std::sync::Mutex;
1012

11-
use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
13+
use mio::event::Event;
14+
use mio::unix::SourceFd;
15+
use mio::{Events, Interest, Poll, Registry, Token};
1216
use vmm_sys_util::eventfd::EventFd;
1317

1418
use super::backend::VhostUserBackend;
1519
use super::vring::VringT;
20+
use bitflags::bitflags;
1621

1722
/// Errors related to vring epoll event handling.
1823
#[derive(Debug)]
19-
pub enum VringEpollError {
24+
pub enum VringPollError {
2025
/// Failed to create epoll file descriptor.
21-
EpollCreateFd(io::Error),
26+
PollerCreate(io::Error),
2227
/// Failed while waiting for events.
23-
EpollWait(io::Error),
28+
PollerWait(io::Error),
2429
/// Could not register exit event
2530
RegisterExitEvent(io::Error),
2631
/// Failed to read the event from kick EventFd.
2732
HandleEventReadKick(io::Error),
2833
/// Failed to handle the event from the backend.
2934
HandleEventBackendHandling(io::Error),
35+
/// Failed to clone registry.
36+
RegistryClone(io::Error),
3037
}
3138

32-
impl Display for VringEpollError {
39+
impl Display for VringPollError {
3340
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
3441
match self {
35-
VringEpollError::EpollCreateFd(e) => write!(f, "cannot create epoll fd: {e}"),
36-
VringEpollError::EpollWait(e) => write!(f, "failed to wait for epoll event: {e}"),
37-
VringEpollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"),
38-
VringEpollError::HandleEventReadKick(e) => {
42+
VringPollError::PollerCreate(e) => write!(f, "cannot create poller: {e}"),
43+
VringPollError::PollerWait(e) => write!(f, "failed to wait for poller event: {e}"),
44+
VringPollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"),
45+
VringPollError::HandleEventReadKick(e) => {
3946
write!(f, "cannot read vring kick event: {e}")
4047
}
41-
VringEpollError::HandleEventBackendHandling(e) => {
42-
write!(f, "failed to handle epoll event: {e}")
48+
VringPollError::HandleEventBackendHandling(e) => {
49+
write!(f, "failed to handle poll event: {e}")
4350
}
51+
VringPollError::RegistryClone(e) => write!(f, "cannot clone poller's registry: {e}"),
4452
}
4553
}
4654
}
4755

48-
impl std::error::Error for VringEpollError {}
56+
impl std::error::Error for VringPollError {}
4957

5058
/// Result of vring epoll operations.
51-
pub type VringEpollResult<T> = std::result::Result<T, VringEpollError>;
59+
pub type VringEpollResult<T> = std::result::Result<T, VringPollError>;
60+
61+
bitflags! {
62+
#[derive(Debug, PartialEq, PartialOrd)]
63+
pub struct EventSet: u32 {
64+
const READABLE = 1u32;
65+
const WRITABLE = 2u32;
66+
}
67+
}
68+
69+
impl From<EventSet> for Interest {
70+
fn from(value: EventSet) -> Self {
71+
let mut interest = None;
72+
if value == EventSet::READABLE {
73+
interest = interest
74+
.map(|interest| Interest::READABLE | interest)
75+
.or(Some(Interest::READABLE));
76+
}
77+
if value == EventSet::WRITABLE {
78+
interest = interest
79+
.map(|interest| Interest::WRITABLE | interest)
80+
.or(Some(Interest::WRITABLE));
81+
}
82+
interest.expect("Unknown EventSet")
83+
}
84+
}
85+
86+
impl From<&Event> for EventSet {
87+
fn from(value: &Event) -> Self {
88+
let mut event_set = EventSet::empty();
89+
if value.is_readable() {
90+
event_set |= EventSet::READABLE;
91+
}
92+
if value.is_writable() {
93+
event_set |= EventSet::WRITABLE;
94+
}
95+
event_set
96+
}
97+
}
5298

5399
/// Epoll event handler to manage and process epoll events for registered file descriptor.
54100
///
@@ -57,7 +103,11 @@ pub type VringEpollResult<T> = std::result::Result<T, VringEpollError>;
57103
/// - remove registered file descriptors from the epoll fd
58104
/// - run the event loop to handle pending events on the epoll fd
59105
pub struct VringEpollHandler<T: VhostUserBackend> {
60-
epoll: Epoll,
106+
poller: Mutex<Poll>,
107+
registry: Registry,
108+
// Record the registered fd.
109+
// Because in mio, consecutive calls to register is unspecified behavior.
110+
fd_set: Mutex<HashSet<RawFd>>,
61111
backend: T,
62112
vrings: Vec<T::Vring>,
63113
thread_id: usize,
@@ -84,22 +134,32 @@ where
84134
vrings: Vec<T::Vring>,
85135
thread_id: usize,
86136
) -> VringEpollResult<Self> {
87-
let epoll = Epoll::new().map_err(VringEpollError::EpollCreateFd)?;
137+
let poller = Poll::new().map_err(VringPollError::PollerCreate)?;
88138
let exit_event_fd = backend.exit_event(thread_id);
139+
let fd_set = Mutex::new(HashSet::new());
89140

141+
let registry = poller
142+
.registry()
143+
.try_clone()
144+
.map_err(VringPollError::RegistryClone)?;
90145
if let Some(exit_event_fd) = &exit_event_fd {
91146
let id = backend.num_queues();
92-
epoll
93-
.ctl(
94-
ControlOperation::Add,
95-
exit_event_fd.as_raw_fd(),
96-
EpollEvent::new(EventSet::IN, id as u64),
147+
148+
registry
149+
.register(
150+
&mut SourceFd(&exit_event_fd.as_raw_fd()),
151+
Token(id),
152+
Interest::READABLE,
97153
)
98-
.map_err(VringEpollError::RegisterExitEvent)?;
154+
.map_err(VringPollError::RegisterExitEvent)?;
155+
156+
fd_set.lock().unwrap().insert(exit_event_fd.as_raw_fd());
99157
}
100158

101159
Ok(VringEpollHandler {
102-
epoll,
160+
poller: Mutex::new(poller),
161+
registry,
162+
fd_set,
103163
backend,
104164
vrings,
105165
thread_id,
@@ -135,13 +195,27 @@ where
135195
}
136196

137197
pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> {
138-
self.epoll
139-
.ctl(ControlOperation::Add, fd, EpollEvent::new(ev_type, data))
198+
let mut fd_set = self.fd_set.lock().unwrap();
199+
if fd_set.contains(&fd) {
200+
return Err(io::Error::from_raw_os_error(libc::EEXIST));
201+
}
202+
self.registry
203+
.register(&mut SourceFd(&fd), Token(data as usize), ev_type.into())
204+
.map_err(std::io::Error::other)?;
205+
fd_set.insert(fd);
206+
Ok(())
140207
}
141208

142-
pub(crate) fn unregister_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> {
143-
self.epoll
144-
.ctl(ControlOperation::Delete, fd, EpollEvent::new(ev_type, data))
209+
pub(crate) fn unregister_event(&self, fd: RawFd, _ev_type: EventSet, _data: u64) -> Result<()> {
210+
let mut fd_set = self.fd_set.lock().unwrap();
211+
if !fd_set.contains(&fd) {
212+
return Err(io::Error::from_raw_os_error(libc::ENOENT));
213+
}
214+
self.registry
215+
.deregister(&mut SourceFd(&fd))
216+
.map_err(|e| std::io::Error::other(format!("Failed to deregister fd {}: {}", fd, e)))?;
217+
fd_set.remove(&fd);
218+
Ok(())
145219
}
146220

147221
/// Run the event poll loop to handle all pending events on registered fds.
@@ -150,49 +224,28 @@ where
150224
/// associated with the backend.
151225
pub(crate) fn run(&self) -> VringEpollResult<()> {
152226
const EPOLL_EVENTS_LEN: usize = 100;
153-
let mut events = vec![EpollEvent::new(EventSet::empty(), 0); EPOLL_EVENTS_LEN];
154-
155-
'epoll: loop {
156-
let num_events = match self.epoll.wait(-1, &mut events[..]) {
157-
Ok(res) => res,
158-
Err(e) => {
159-
if e.kind() == io::ErrorKind::Interrupted {
160-
// It's well defined from the epoll_wait() syscall
161-
// documentation that the epoll loop can be interrupted
162-
// before any of the requested events occurred or the
163-
// timeout expired. In both those cases, epoll_wait()
164-
// returns an error of type EINTR, but this should not
165-
// be considered as a regular error. Instead it is more
166-
// appropriate to retry, by calling into epoll_wait().
167-
continue;
168-
}
169-
return Err(VringEpollError::EpollWait(e));
170-
}
171-
};
172-
173-
for event in events.iter().take(num_events) {
174-
let evset = match EventSet::from_bits(event.events) {
175-
Some(evset) => evset,
176-
None => {
177-
let evbits = event.events;
178-
println!("epoll: ignoring unknown event set: 0x{evbits:x}");
179-
continue;
180-
}
181-
};
182-
183-
let ev_type = event.data() as u16;
184-
185-
// handle_event() returns true if an event is received from the exit event fd.
186-
if self.handle_event(ev_type, evset)? {
187-
break 'epoll;
227+
228+
let mut events = Events::with_capacity(EPOLL_EVENTS_LEN);
229+
'poll: loop {
230+
self.poller
231+
.lock()
232+
.unwrap()
233+
.poll(&mut events, None)
234+
.map_err(VringPollError::PollerWait)?;
235+
236+
for event in events.iter() {
237+
let token = event.token();
238+
239+
if self.handle_event(token.0 as u16, event.into())? {
240+
break 'poll;
188241
}
189242
}
190243
}
191244

192245
Ok(())
193246
}
194247

195-
fn handle_event(&self, device_event: u16, evset: EventSet) -> VringEpollResult<bool> {
248+
fn handle_event(&self, device_event: u16, event: EventSet) -> VringEpollResult<bool> {
196249
if self.exit_event_fd.is_some() && device_event as usize == self.backend.num_queues() {
197250
return Ok(true);
198251
}
@@ -201,7 +254,7 @@ where
201254
let vring = &self.vrings[device_event as usize];
202255
let enabled = vring
203256
.read_kick()
204-
.map_err(VringEpollError::HandleEventReadKick)?;
257+
.map_err(VringPollError::HandleEventReadKick)?;
205258

206259
// If the vring is not enabled, it should not be processed.
207260
if !enabled {
@@ -210,16 +263,16 @@ where
210263
}
211264

212265
self.backend
213-
.handle_event(device_event, evset, &self.vrings, self.thread_id)
214-
.map_err(VringEpollError::HandleEventBackendHandling)?;
266+
.handle_event(device_event, event, &self.vrings, self.thread_id)
267+
.map_err(VringPollError::HandleEventBackendHandling)?;
215268

216269
Ok(false)
217270
}
218271
}
219272

220273
impl<T: VhostUserBackend> AsRawFd for VringEpollHandler<T> {
221274
fn as_raw_fd(&self) -> RawFd {
222-
self.epoll.as_raw_fd()
275+
self.poller.lock().unwrap().as_raw_fd()
223276
}
224277
}
225278

@@ -244,29 +297,32 @@ mod tests {
244297

245298
let eventfd = EventFd::new(0).unwrap();
246299
handler
247-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
300+
.register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3)
248301
.unwrap();
249302
// Register an already registered fd.
250303
handler
251-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
304+
.register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3)
252305
.unwrap_err();
253306
// Register an invalid data.
254307
handler
255-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 1)
308+
.register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 1)
256309
.unwrap_err();
257310

258311
handler
259-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
312+
.unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3)
260313
.unwrap();
261314
// unregister an already unregistered fd.
262315
handler
263-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
316+
.unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3)
264317
.unwrap_err();
265318
// unregister an invalid data.
266319
handler
267-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 1)
320+
.unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 1)
268321
.unwrap_err();
269322
// Check we retrieve the correct file descriptor
270-
assert_eq!(handler.as_raw_fd(), handler.epoll.as_raw_fd());
323+
assert_eq!(
324+
handler.as_raw_fd(),
325+
handler.poller.lock().unwrap().as_raw_fd()
326+
);
271327
}
272328
}

0 commit comments

Comments
 (0)