Skip to content

Commit dfe87db

Browse files
bchaliosroypat
authored andcommitted
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Co-authored-by: Babis Chalios <[email protected]> Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent d9b83ba commit dfe87db

File tree

8 files changed

+208
-40
lines changed

8 files changed

+208
-40
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 175 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use vm_memory::{
99
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1010
};
1111

12+
use super::iov_deque::{IovDeque, IovDequeError};
1213
use crate::devices::virtio::queue::DescriptorChain;
1314
use crate::vstate::memory::GuestMemoryMmap;
1415

@@ -20,8 +21,12 @@ pub enum IoVecError {
2021
ReadOnlyDescriptor,
2122
/// Tried to create an `IoVec` or `IoVecMut` from a descriptor chain that was too large
2223
OverflowedDescriptor,
24+
/// Tried to push to full IovDeque.
25+
IovDequeOverflow,
2326
/// Guest memory error: {0}
2427
GuestMemory(#[from] GuestMemoryError),
28+
/// Error with underlying `IovDeque`: {0}
29+
IovDeque(#[from] IovDequeError),
2530
}
2631

2732
/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::writev`.
@@ -205,16 +210,26 @@ impl IoVecBuffer {
205210
}
206211
}
207212

213+
#[derive(Debug)]
214+
pub struct ParsedDescriptorChain {
215+
pub head_index: u16,
216+
pub length: u32,
217+
pub nr_iovecs: u16,
218+
}
219+
208220
/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::readv`.
209221
///
210222
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
211223
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
212224
/// of data from that buffer.
213-
#[derive(Debug, Default)]
225+
#[derive(Debug)]
214226
pub struct IoVecBufferMut {
215227
// container of the memory regions included in this IO vector
216-
vecs: Vec<iovec>,
228+
vecs: IovDeque,
217229
// Total length of the IoVecBufferMut
230+
// We use `u32` here because we use this type in devices which
231+
// should not give us huge buffers. In any case this
232+
// value will not overflow as we explicitly check for this case.
218233
len: u32,
219234
}
220235

@@ -223,50 +238,113 @@ pub struct IoVecBufferMut {
223238
unsafe impl Send for IoVecBufferMut {}
224239

225240
impl IoVecBufferMut {
226-
/// Create an `IoVecBuffer` from a `DescriptorChain`
241+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
227242
///
228243
/// # Safety
229244
///
230245
/// The descriptor chain cannot be referencing the same memory location as another chain
231-
pub unsafe fn load_descriptor_chain(
246+
pub unsafe fn append_descriptor_chain(
232247
&mut self,
233248
mem: &GuestMemoryMmap,
234249
head: DescriptorChain,
235-
) -> Result<(), IoVecError> {
236-
self.clear();
237-
250+
) -> Result<ParsedDescriptorChain, IoVecError> {
251+
let head_index = head.index;
238252
let mut next_descriptor = Some(head);
253+
let mut length = 0u32;
254+
let mut nr_iovecs = 0u16;
239255
while let Some(desc) = next_descriptor {
240256
if !desc.is_write_only() {
257+
self.vecs.pop_back(nr_iovecs);
241258
return Err(IoVecError::ReadOnlyDescriptor);
242259
}
243260

244261
// We use get_slice instead of `get_host_address` here in order to have the whole
245262
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
246263
// region in the GuestMemoryMmap.
247-
let slice = mem.get_slice(desc.addr, desc.len as usize)?;
248-
264+
let slice = mem.get_slice(desc.addr, desc.len as usize).map_err(|err| {
265+
self.vecs.pop_back(nr_iovecs);
266+
err
267+
})?;
249268
// We need to mark the area of guest memory that will be mutated through this
250269
// IoVecBufferMut as dirty ahead of time, as we loose access to all
251270
// vm-memory related information after converting down to iovecs.
252271
slice.bitmap().mark_dirty(0, desc.len as usize);
253-
254272
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
255-
self.vecs.push(iovec {
273+
274+
if self.vecs.is_full() {
275+
self.vecs.pop_back(nr_iovecs);
276+
return Err(IoVecError::IovDequeOverflow);
277+
}
278+
279+
self.vecs.push_back(iovec {
256280
iov_base,
257281
iov_len: desc.len as size_t,
258282
});
259-
self.len = self
260-
.len
283+
284+
nr_iovecs += 1;
285+
length = length
261286
.checked_add(desc.len)
262-
.ok_or(IoVecError::OverflowedDescriptor)?;
287+
.ok_or(IoVecError::OverflowedDescriptor)
288+
.map_err(|err| {
289+
self.vecs.pop_back(nr_iovecs);
290+
err
291+
})?;
263292

264293
next_descriptor = desc.next_descriptor();
265294
}
266295

296+
self.len = self.len.checked_add(length).ok_or_else(|| {
297+
self.vecs.pop_back(nr_iovecs);
298+
IoVecError::OverflowedDescriptor
299+
})?;
300+
301+
Ok(ParsedDescriptorChain {
302+
head_index,
303+
length,
304+
nr_iovecs,
305+
})
306+
}
307+
308+
/// Create an empty `IoVecBufferMut`.
309+
pub fn new() -> Result<Self, IovDequeError> {
310+
let vecs = IovDeque::new()?;
311+
Ok(Self { vecs, len: 0 })
312+
}
313+
314+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
315+
///
316+
/// This will clear any previous `iovec` objects in the buffer and load the new
317+
/// [`DescriptorChain`].
318+
///
319+
/// # Safety
320+
///
321+
/// The descriptor chain cannot be referencing the same memory location as another chain
322+
pub unsafe fn load_descriptor_chain(
323+
&mut self,
324+
mem: &GuestMemoryMmap,
325+
head: DescriptorChain,
326+
) -> Result<(), IoVecError> {
327+
self.clear();
328+
let _ = self.append_descriptor_chain(mem, head)?;
267329
Ok(())
268330
}
269331

332+
/// Drop descriptor chain from the `IoVecBufferMut` front
333+
///
334+
/// This will drop memory described by the `IoVecBufferMut` from the beginning.
335+
pub fn drop_chain_front(&mut self, parse_descriptor: &ParsedDescriptorChain) {
336+
self.vecs.pop_front(parse_descriptor.nr_iovecs);
337+
self.len -= parse_descriptor.length;
338+
}
339+
340+
/// Drop descriptor chain from the `IoVecBufferMut` back
341+
///
342+
/// This will drop memory described by the `IoVecBufferMut` from the beginning.
343+
pub fn drop_chain_back(&mut self, parse_descriptor: &ParsedDescriptorChain) {
344+
self.vecs.pop_back(parse_descriptor.nr_iovecs);
345+
self.len -= parse_descriptor.length;
346+
}
347+
270348
/// Create an `IoVecBuffer` from a `DescriptorChain`
271349
///
272350
/// # Safety
@@ -276,20 +354,32 @@ impl IoVecBufferMut {
276354
mem: &GuestMemoryMmap,
277355
head: DescriptorChain,
278356
) -> Result<Self, IoVecError> {
279-
let mut new_buffer = Self::default();
357+
let mut new_buffer = Self::new()?;
280358
new_buffer.load_descriptor_chain(mem, head)?;
281359
Ok(new_buffer)
282360
}
283361

284362
/// Get the total length of the memory regions covered by this `IoVecBuffer`
285-
pub(crate) fn len(&self) -> u32 {
363+
#[inline(always)]
364+
pub fn len(&self) -> u32 {
286365
self.len
287366
}
288367

368+
/// Returns true if buffer is empty.
369+
#[inline(always)]
370+
pub fn is_empty(&self) -> bool {
371+
self.len == 0
372+
}
373+
374+
/// Returns a pointer to the memory keeping the `iovec` structs
375+
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
376+
self.vecs.as_mut_slice()
377+
}
378+
289379
/// Clears the `iovec` array
290380
pub fn clear(&mut self) {
291381
self.vecs.clear();
292-
self.len = 0u32;
382+
self.len = 0;
293383
}
294384

295385
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -337,7 +427,7 @@ impl IoVecBufferMut {
337427
) -> Result<usize, VolatileMemoryError> {
338428
let mut total_bytes_read = 0;
339429

340-
for iov in &self.vecs {
430+
for iov in self.vecs.as_slice() {
341431
if len == 0 {
342432
break;
343433
}
@@ -381,11 +471,13 @@ impl IoVecBufferMut {
381471
}
382472

383473
#[cfg(test)]
474+
#[allow(clippy::cast_possible_truncation)]
384475
mod tests {
385476
use libc::{c_void, iovec};
386477
use vm_memory::VolatileMemoryError;
387478

388479
use super::{IoVecBuffer, IoVecBufferMut};
480+
use crate::devices::virtio::iov_deque::IovDeque;
389481
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
390482
use crate::devices::virtio::test_utils::VirtQueue;
391483
use crate::test_utils::multi_region_mem;
@@ -423,16 +515,36 @@ mod tests {
423515

424516
impl From<&mut [u8]> for IoVecBufferMut {
425517
fn from(buf: &mut [u8]) -> Self {
518+
let mut vecs = IovDeque::new().unwrap();
519+
vecs.push_back(iovec {
520+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
521+
iov_len: buf.len(),
522+
});
523+
426524
Self {
427-
vecs: vec![iovec {
428-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
429-
iov_len: buf.len(),
430-
}],
431-
len: buf.len().try_into().unwrap(),
525+
vecs,
526+
len: buf.len() as u32,
432527
}
433528
}
434529
}
435530

531+
impl From<Vec<&mut [u8]>> for IoVecBufferMut {
532+
fn from(buffer: Vec<&mut [u8]>) -> Self {
533+
let mut len = 0;
534+
let mut vecs = IovDeque::new().unwrap();
535+
for slice in buffer {
536+
len += slice.len() as u32;
537+
538+
vecs.push_back(iovec {
539+
iov_base: slice.as_ptr() as *mut c_void,
540+
iov_len: slice.len(),
541+
});
542+
}
543+
544+
Self { vecs, len }
545+
}
546+
}
547+
436548
fn default_mem() -> GuestMemoryMmap {
437549
multi_region_mem(&[
438550
(GuestAddress(0), 0x10000),
@@ -521,8 +633,19 @@ mod tests {
521633
let head = q.pop().unwrap();
522634

523635
// SAFETY: This descriptor chain is only loaded once in this test
524-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
636+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
525637
assert_eq!(iovec.len(), 4 * 64);
638+
639+
// We are creating a new queue where we can get descriptors from. Probably, this is not
640+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
641+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
642+
// the appending logic.
643+
let (mut q, _) = write_only_chain(&mem);
644+
let head = q.pop().unwrap();
645+
// SAFETY: it is actually unsafe, but we just want to check the length of the
646+
// `IoVecBufferMut` after appending.
647+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
648+
assert_eq!(iovec.len(), 8 * 64);
526649
}
527650

528651
#[test]
@@ -680,6 +803,7 @@ mod verification {
680803
use vm_memory::VolatileSlice;
681804

682805
use super::{IoVecBuffer, IoVecBufferMut};
806+
use crate::devices::virtio::iov_deque::IovDeque;
683807

684808
// Maximum memory size to use for our buffers. For the time being 1KB.
685809
const GUEST_MEMORY_SIZE: usize = 1 << 10;
@@ -695,10 +819,10 @@ mod verification {
695819
let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
696820
let mut len = 0u32;
697821
for _ in 0..nr_descs {
698-
// The `IoVecBuffer(Mut)` constructors ensure that the memory region described by every
822+
// The `IoVecBuffer` constructors ensure that the memory region described by every
699823
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
700824
// mmap. The assumption, here, that the last address is within the memory object's
701-
// bound substitutes these checks that `IoVecBuffer(Mut)::new() performs.`
825+
// bound substitutes these checks that `IoVecBuffer::new() performs.`
702826
let addr: usize = kani::any();
703827
let iov_len: usize =
704828
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
@@ -721,6 +845,26 @@ mod verification {
721845
}
722846
}
723847

848+
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque, u32) {
849+
let mut vecs = IovDeque::new().unwrap();
850+
let mut len = 0u32;
851+
for _ in 0..nr_descs {
852+
// The `IoVecBufferMut` constructors ensure that the memory region described by every
853+
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
854+
// mmap. The assumption, here, that the last address is within the memory object's
855+
// bound substitutes these checks that `IoVecBufferMut::new() performs.`
856+
let addr: usize = kani::any();
857+
let iov_len: usize =
858+
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
859+
let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
860+
861+
vecs.push_back(iovec { iov_base, iov_len });
862+
len += u32::try_from(iov_len).unwrap();
863+
}
864+
865+
(vecs, len)
866+
}
867+
724868
impl IoVecBufferMut {
725869
fn any_of_length(nr_descs: usize) -> Self {
726870
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
@@ -732,8 +876,11 @@ mod verification {
732876
))
733877
};
734878

735-
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
736-
Self { vecs, len }
879+
let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
880+
Self {
881+
vecs,
882+
len: len.try_into().unwrap(),
883+
}
737884
}
738885
}
739886

src/vmm/src/devices/virtio/rng/device.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use super::metrics::METRICS;
1313
use super::{RNG_NUM_QUEUES, RNG_QUEUE};
1414
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
1515
use crate::devices::virtio::gen::virtio_rng::VIRTIO_F_VERSION_1;
16+
use crate::devices::virtio::iov_deque::IovDequeError;
1617
use crate::devices::virtio::iovec::IoVecBufferMut;
1718
use crate::devices::virtio::queue::{Queue, FIRECRACKER_MAX_QUEUE_SIZE};
1819
use crate::devices::virtio::{ActivateError, TYPE_RNG};
@@ -31,6 +32,8 @@ pub enum EntropyError {
3132
GuestMemory(#[from] GuestMemoryError),
3233
/// Could not get random bytes: {0}
3334
Random(#[from] aws_lc_rs::error::Unspecified),
35+
/// Underlying IovDeque error: {0}
36+
IovDeque(#[from] IovDequeError),
3437
}
3538

3639
#[derive(Debug)]
@@ -77,7 +80,7 @@ impl Entropy {
7780
queue_events,
7881
irq_trigger,
7982
rate_limiter,
80-
buffer: IoVecBufferMut::default(),
83+
buffer: IoVecBufferMut::new()?,
8184
})
8285
}
8386

@@ -111,7 +114,7 @@ impl Entropy {
111114

112115
fn handle_one(&mut self) -> Result<u32, EntropyError> {
113116
// If guest provided us with an empty buffer just return directly
114-
if self.buffer.len() == 0 {
117+
if self.buffer.is_empty() {
115118
return Ok(0);
116119
}
117120

0 commit comments

Comments
 (0)