diff --git a/vhost-user-backend/CHANGELOG.md b/vhost-user-backend/CHANGELOG.md index 4c3e5fca..f9db1665 100644 --- a/vhost-user-backend/CHANGELOG.md +++ b/vhost-user-backend/CHANGELOG.md @@ -6,6 +6,7 @@ ### Changed - [[#308](https://github.com/rust-vmm/vhost/pull/308)] Replace Eventfd with EventNotifier/EventConsumer. - [[#321](https://github.com/rust-vmm/vhost/pull/321)] Don't take ownership of listener in `VhostUserDaemon::start`. +- [[316]](https://github.com/rust-vmm/vhost/pull/316) Use mio to replace Epoll. Expose event_loop::EventSet. ### Deprecated ### Fixed diff --git a/vhost-user-backend/Cargo.toml b/vhost-user-backend/Cargo.toml index 9e380a2d..546985ba 100644 --- a/vhost-user-backend/Cargo.toml +++ b/vhost-user-backend/Cargo.toml @@ -16,6 +16,7 @@ postcopy = ["vhost/postcopy", "userfaultfd"] libc = "0.2.39" log = "0.4.17" userfaultfd = { version = "0.9.0", optional = true } +mio = { version = "1.0.4", features = ["os-poll", "os-ext"] } vhost = { path = "../vhost", version = "0.14.0", features = ["vhost-user-backend"] } virtio-bindings = { workspace = true } virtio-queue = { workspace = true } diff --git a/vhost-user-backend/src/backend.rs b/vhost-user-backend/src/backend.rs index 20e7daf3..267b6466 100644 --- a/vhost-user-backend/src/backend.rs +++ b/vhost-user-backend/src/backend.rs @@ -29,7 +29,6 @@ use vhost::vhost_user::message::{ }; use vhost::vhost_user::Backend; use vm_memory::bitmap::Bitmap; -use vmm_sys_util::epoll::EventSet; use vmm_sys_util::event::{EventConsumer, EventNotifier}; use vhost::vhost_user::GpuBackend; @@ -37,6 +36,8 @@ use vhost::vhost_user::GpuBackend; use super::vring::VringT; use super::GM; +use crate::EventSet; + /// Trait with interior mutability for vhost user backend servers to implement concrete services. /// /// To support multi-threading and asynchronous IO, we enforce `Send + Sync` bound. @@ -144,7 +145,7 @@ pub trait VhostUserBackend: Send + Sync { /// do with events happening on custom listeners. fn handle_event( &self, - device_event: u16, + device_event: usize, evset: EventSet, vrings: &[Self::Vring], thread_id: usize, @@ -288,7 +289,7 @@ pub trait VhostUserBackendMut: Send + Sync { /// do with events happening on custom listeners. fn handle_event( &mut self, - device_event: u16, + device_event: usize, evset: EventSet, vrings: &[Self::Vring], thread_id: usize, @@ -390,7 +391,7 @@ impl VhostUserBackend for Arc { fn handle_event( &self, - device_event: u16, + device_event: usize, evset: EventSet, vrings: &[Self::Vring], thread_id: usize, @@ -479,7 +480,7 @@ impl VhostUserBackend for Mutex { fn handle_event( &self, - device_event: u16, + device_event: usize, evset: EventSet, vrings: &[Self::Vring], thread_id: usize, @@ -571,7 +572,7 @@ impl VhostUserBackend for RwLock { fn handle_event( &self, - device_event: u16, + device_event: usize, evset: EventSet, vrings: &[Self::Vring], thread_id: usize, @@ -711,7 +712,7 @@ pub mod tests { fn handle_event( &mut self, - _device_event: u16, + _device_event: usize, _evset: EventSet, _vrings: &[VringRwLock], _thread_id: usize, @@ -798,7 +799,7 @@ pub mod tests { let vring = VringRwLock::new(mem, 0x1000).unwrap(); backend - .handle_event(0x1, EventSet::IN, &[vring], 0) + .handle_event(0x1, EventSet::Readable, &[vring], 0) .unwrap(); backend.reset_device(); diff --git a/vhost-user-backend/src/event_loop.rs b/vhost-user-backend/src/event_loop.rs index 09e91042..0aeedc42 100644 --- a/vhost-user-backend/src/event_loop.rs +++ b/vhost-user-backend/src/event_loop.rs @@ -3,13 +3,17 @@ // // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashSet; use std::fmt::{Display, Formatter}; use std::io::{self, Result}; use std::marker::PhantomData; use std::os::fd::IntoRawFd; use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::Mutex; -use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; +use mio::event::Event; +use mio::unix::SourceFd; +use mio::{Events, Interest, Poll, Registry, Token}; use vmm_sys_util::event::EventNotifier; use super::backend::VhostUserBackend; @@ -17,39 +21,72 @@ use super::vring::VringT; /// Errors related to vring epoll event handling. #[derive(Debug)] -pub enum VringEpollError { +pub enum VringPollError { /// Failed to create epoll file descriptor. - EpollCreateFd(io::Error), + PollerCreate(io::Error), /// Failed while waiting for events. - EpollWait(io::Error), + PollerWait(io::Error), /// Could not register exit event RegisterExitEvent(io::Error), /// Failed to read the event from kick EventFd. HandleEventReadKick(io::Error), /// Failed to handle the event from the backend. HandleEventBackendHandling(io::Error), + /// Failed to clone registry. + RegistryClone(io::Error), } -impl Display for VringEpollError { +impl Display for VringPollError { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { - VringEpollError::EpollCreateFd(e) => write!(f, "cannot create epoll fd: {e}"), - VringEpollError::EpollWait(e) => write!(f, "failed to wait for epoll event: {e}"), - VringEpollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"), - VringEpollError::HandleEventReadKick(e) => { + VringPollError::PollerCreate(e) => write!(f, "cannot create poller: {e}"), + VringPollError::PollerWait(e) => write!(f, "failed to wait for poller event: {e}"), + VringPollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"), + VringPollError::HandleEventReadKick(e) => { write!(f, "cannot read vring kick event: {e}") } - VringEpollError::HandleEventBackendHandling(e) => { - write!(f, "failed to handle epoll event: {e}") + VringPollError::HandleEventBackendHandling(e) => { + write!(f, "failed to handle poll event: {e}") } + VringPollError::RegistryClone(e) => write!(f, "cannot clone poller's registry: {e}"), } } } -impl std::error::Error for VringEpollError {} +impl std::error::Error for VringPollError {} /// Result of vring epoll operations. -pub type VringEpollResult = std::result::Result; +pub type VringEpollResult = std::result::Result; + +#[derive(Debug, Clone, Copy)] +pub enum EventSet { + Readable, + Writable, + All, +} + +impl EventSet { + fn to_interest(self) -> Interest { + match self { + EventSet::Readable => Interest::READABLE, + EventSet::Writable => Interest::WRITABLE, + EventSet::All => Interest::READABLE | Interest::WRITABLE, + } + } +} + +fn event_to_event_set(evt: &Event) -> Option { + if evt.is_readable() && evt.is_writable() { + return Some(EventSet::All); + } + if evt.is_readable() { + return Some(EventSet::Readable); + } + if evt.is_writable() { + return Some(EventSet::Writable); + } + None +} /// Epoll event handler to manage and process epoll events for registered file descriptor. /// @@ -58,7 +95,11 @@ pub type VringEpollResult = std::result::Result; /// - remove registered file descriptors from the epoll fd /// - run the event loop to handle pending events on the epoll fd pub struct VringEpollHandler { - epoll: Epoll, + poller: Mutex, + registry: Registry, + // Record the registered fd. + // Because in mio, consecutive calls to register is unspecified behavior. + fd_set: Mutex>, backend: T, vrings: Vec, thread_id: usize, @@ -85,25 +126,35 @@ where vrings: Vec, thread_id: usize, ) -> VringEpollResult { - let epoll = Epoll::new().map_err(VringEpollError::EpollCreateFd)?; + let poller = Poll::new().map_err(VringPollError::PollerCreate)?; let exit_event_fd = backend.exit_event(thread_id); + let fd_set = Mutex::new(HashSet::new()); + let registry = poller + .registry() + .try_clone() + .map_err(VringPollError::RegistryClone)?; let exit_event_fd = if let Some((consumer, notifier)) = exit_event_fd { let id = backend.num_queues(); - epoll - .ctl( - ControlOperation::Add, - consumer.into_raw_fd(), - EpollEvent::new(EventSet::IN, id as u64), + + registry + .register( + &mut SourceFd(&consumer.as_raw_fd()), + Token(id), + Interest::READABLE, ) - .map_err(VringEpollError::RegisterExitEvent)?; + .map_err(VringPollError::RegisterExitEvent)?; + + fd_set.lock().unwrap().insert(consumer.into_raw_fd()); Some(notifier) } else { None }; Ok(VringEpollHandler { - epoll, + poller: Mutex::new(poller), + registry, + fd_set, backend, vrings, thread_id, @@ -116,9 +167,9 @@ where /// /// When this event is later triggered, the backend implementation of `handle_event` will be /// called. - pub fn register_listener(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + pub fn register_listener(&self, fd: RawFd, ev_type: EventSet, data: usize) -> Result<()> { // `data` range [0...num_queues] is reserved for queues and exit event. - if data <= self.backend.num_queues() as u64 { + if data <= self.backend.num_queues() { Err(io::Error::from_raw_os_error(libc::EINVAL)) } else { self.register_event(fd, ev_type, data) @@ -129,23 +180,37 @@ where /// /// If the event is triggered after this function has been called, the event will be silently /// dropped. - pub fn unregister_listener(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + pub fn unregister_listener(&self, fd: RawFd, data: usize) -> Result<()> { // `data` range [0...num_queues] is reserved for queues and exit event. - if data <= self.backend.num_queues() as u64 { + if data <= self.backend.num_queues() { Err(io::Error::from_raw_os_error(libc::EINVAL)) } else { - self.unregister_event(fd, ev_type, data) + self.unregister_event(fd) } } - pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { - self.epoll - .ctl(ControlOperation::Add, fd, EpollEvent::new(ev_type, data)) + pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: usize) -> Result<()> { + let mut fd_set = self.fd_set.lock().unwrap(); + if fd_set.contains(&fd) { + return Err(io::Error::from_raw_os_error(libc::EEXIST)); + } + self.registry + .register(&mut SourceFd(&fd), Token(data), ev_type.to_interest()) + .map_err(std::io::Error::other)?; + fd_set.insert(fd); + Ok(()) } - pub(crate) fn unregister_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { - self.epoll - .ctl(ControlOperation::Delete, fd, EpollEvent::new(ev_type, data)) + pub(crate) fn unregister_event(&self, fd: RawFd) -> Result<()> { + let mut fd_set = self.fd_set.lock().unwrap(); + if !fd_set.contains(&fd) { + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + self.registry + .deregister(&mut SourceFd(&fd)) + .map_err(|e| std::io::Error::other(format!("Failed to deregister fd {fd}: {e}")))?; + fd_set.remove(&fd); + Ok(()) } /// Run the event poll loop to handle all pending events on registered fds. @@ -154,41 +219,22 @@ where /// associated with the backend. pub(crate) fn run(&self) -> VringEpollResult<()> { const EPOLL_EVENTS_LEN: usize = 100; - let mut events = vec![EpollEvent::new(EventSet::empty(), 0); EPOLL_EVENTS_LEN]; - - 'epoll: loop { - let num_events = match self.epoll.wait(-1, &mut events[..]) { - Ok(res) => res, - Err(e) => { - if e.kind() == io::ErrorKind::Interrupted { - // It's well defined from the epoll_wait() syscall - // documentation that the epoll loop can be interrupted - // before any of the requested events occurred or the - // timeout expired. In both those cases, epoll_wait() - // returns an error of type EINTR, but this should not - // be considered as a regular error. Instead it is more - // appropriate to retry, by calling into epoll_wait(). - continue; - } - return Err(VringEpollError::EpollWait(e)); - } - }; - - for event in events.iter().take(num_events) { - let evset = match EventSet::from_bits(event.events) { - Some(evset) => evset, - None => { - let evbits = event.events; - println!("epoll: ignoring unknown event set: 0x{evbits:x}"); - continue; - } - }; - let ev_type = event.data() as u16; + let mut events = Events::with_capacity(EPOLL_EVENTS_LEN); + 'poll: loop { + self.poller + .lock() + .unwrap() + .poll(&mut events, None) + .map_err(VringPollError::PollerWait)?; - // handle_event() returns true if an event is received from the exit event fd. - if self.handle_event(ev_type, evset)? { - break 'epoll; + for event in &events { + let token = event.token(); + + if let Some(evt_set) = event_to_event_set(event) { + if self.handle_event(token.0, evt_set)? { + break 'poll; + } } } } @@ -196,16 +242,16 @@ where Ok(()) } - fn handle_event(&self, device_event: u16, evset: EventSet) -> VringEpollResult { - if self.exit_event_fd.is_some() && device_event as usize == self.backend.num_queues() { + fn handle_event(&self, device_event: usize, evset: EventSet) -> VringEpollResult { + if self.exit_event_fd.is_some() && device_event == self.backend.num_queues() { return Ok(true); } - if (device_event as usize) < self.vrings.len() { - let vring = &self.vrings[device_event as usize]; + if device_event < self.vrings.len() { + let vring = &self.vrings[device_event]; let enabled = vring .read_kick() - .map_err(VringEpollError::HandleEventReadKick)?; + .map_err(VringPollError::HandleEventReadKick)?; // If the vring is not enabled, it should not be processed. if !enabled { @@ -215,7 +261,7 @@ where self.backend .handle_event(device_event, evset, &self.vrings, self.thread_id) - .map_err(VringEpollError::HandleEventBackendHandling)?; + .map_err(VringPollError::HandleEventBackendHandling)?; Ok(false) } @@ -223,7 +269,7 @@ where impl AsRawFd for VringEpollHandler { fn as_raw_fd(&self) -> RawFd { - self.epoll.as_raw_fd() + self.poller.lock().unwrap().as_raw_fd() } } @@ -248,29 +294,32 @@ mod tests { let (consumer, _notifier) = new_event_consumer_and_notifier(EventFlag::empty()).unwrap(); handler - .register_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .register_listener(consumer.as_raw_fd(), EventSet::Readable, 3) .unwrap(); // Register an already registered fd. handler - .register_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .register_listener(consumer.as_raw_fd(), EventSet::Readable, 3) .unwrap_err(); // Register an invalid data. handler - .register_listener(consumer.as_raw_fd(), EventSet::IN, 1) + .register_listener(consumer.as_raw_fd(), EventSet::Readable, 1) .unwrap_err(); handler - .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unregister_listener(consumer.as_raw_fd(), 3) .unwrap(); // unregister an already unregistered fd. handler - .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unregister_listener(consumer.as_raw_fd(), 3) .unwrap_err(); // unregister an invalid data. handler - .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 1) + .unregister_listener(consumer.as_raw_fd(), 1) .unwrap_err(); // Check we retrieve the correct file descriptor - assert_eq!(handler.as_raw_fd(), handler.epoll.as_raw_fd()); + assert_eq!( + handler.as_raw_fd(), + handler.poller.lock().unwrap().as_raw_fd() + ); } } diff --git a/vhost-user-backend/src/handler.rs b/vhost-user-backend/src/handler.rs index 33008756..384ad9cf 100644 --- a/vhost-user-backend/src/handler.rs +++ b/vhost-user-backend/src/handler.rs @@ -14,6 +14,7 @@ use std::sync::Arc; use std::thread; use crate::bitmap::{BitmapReplace, MemRegionBitmap, MmapLogReg}; +use crate::event_loop::EventSet; #[cfg(feature = "postcopy")] use userfaultfd::{Uffd, UffdBuilder}; use vhost::vhost_user::message::{ @@ -31,11 +32,10 @@ use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use virtio_queue::{Error as VirtQueError, QueueT}; use vm_memory::mmap::NewBitmap; use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryMmap, GuestRegionMmap}; -use vmm_sys_util::epoll::EventSet; use super::backend::VhostUserBackend; use super::event_loop::VringEpollHandler; -use super::event_loop::{VringEpollError, VringEpollResult}; +use super::event_loop::{VringEpollResult, VringPollError}; use super::vring::VringT; use super::GM; @@ -50,7 +50,7 @@ pub enum VhostUserHandlerError { /// Failed to create a `Vring`. CreateVring(VirtQueError), /// Failed to create vring worker. - CreateEpollHandler(VringEpollError), + CreateEpollHandler(VringPollError), /// Failed to spawn vring worker. SpawnVringWorker(io::Error), /// Could not find the mapping from memory regions. @@ -212,7 +212,7 @@ where if shifted_queues_mask & 1u64 == 1u64 { let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); self.handlers[thread_index] - .register_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .register_event(fd.as_raw_fd(), EventSet::Readable, evt_idx as usize) .map_err(VhostUserError::ReqHandlerError)?; break; } @@ -434,9 +434,8 @@ where for (thread_index, queues_mask) in self.queues_per_thread.iter().enumerate() { let shifted_queues_mask = queues_mask >> index; if shifted_queues_mask & 1u64 == 1u64 { - let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); self.handlers[thread_index] - .unregister_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .unregister_event(fd.as_raw_fd()) .map_err(VhostUserError::ReqHandlerError)?; break; } diff --git a/vhost-user-backend/src/lib.rs b/vhost-user-backend/src/lib.rs index 7e1db9e4..c15518e0 100644 --- a/vhost-user-backend/src/lib.rs +++ b/vhost-user-backend/src/lib.rs @@ -23,7 +23,7 @@ mod backend; pub use self::backend::{VhostUserBackend, VhostUserBackendMut}; mod event_loop; -pub use self::event_loop::VringEpollHandler; +pub use self::event_loop::{EventSet, VringEpollHandler}; mod handler; pub use self::handler::VhostUserHandlerError; diff --git a/vhost-user-backend/tests/vhost-user-server.rs b/vhost-user-backend/tests/vhost-user-server.rs index 3c8205a5..a78e5a84 100644 --- a/vhost-user-backend/tests/vhost-user-server.rs +++ b/vhost-user-backend/tests/vhost-user-server.rs @@ -13,11 +13,10 @@ use vhost::vhost_user::message::{ }; use vhost::vhost_user::{Backend, Frontend, Listener, VhostUserFrontend}; use vhost::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; -use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock}; +use vhost_user_backend::{EventSet, VhostUserBackendMut, VhostUserDaemon, VringRwLock}; use vm_memory::{ FileOffset, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, GuestMemoryMmap, }; -use vmm_sys_util::epoll::EventSet; use vmm_sys_util::event::{ new_event_consumer_and_notifier, EventConsumer, EventFlag, EventNotifier, }; @@ -117,7 +116,7 @@ impl VhostUserBackendMut for MockVhostBackend { fn handle_event( &mut self, - _device_event: u16, + _device_event: usize, _evset: EventSet, _vrings: &[VringRwLock], _thread_id: usize,