Skip to content

Commit d5d16fb

Browse files
committed
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Signed-off-by: Babis Chalios <[email protected]>
1 parent 11e71f6 commit d5d16fb

File tree

4 files changed

+184
-45
lines changed

4 files changed

+184
-45
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 168 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
use std::io::ErrorKind;
55

66
use libc::{c_void, iovec, size_t};
7+
#[cfg(not(kani))]
78
use smallvec::SmallVec;
89
use vm_memory::bitmap::Bitmap;
910
use vm_memory::{
1011
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1112
};
1213

14+
use super::iov_deque::{IovDeque, IovDequeError};
1315
use crate::devices::virtio::queue::DescriptorChain;
1416
use crate::vstate::memory::GuestMemoryMmap;
1517

@@ -23,6 +25,8 @@ pub enum IoVecError {
2325
OverflowedDescriptor,
2426
/// Guest memory error: {0}
2527
GuestMemory(#[from] GuestMemoryError),
28+
/// Error with underlying `IovDeque`: {0}
29+
IovDeque(#[from] IovDequeError),
2630
}
2731

2832
// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
@@ -214,33 +218,38 @@ impl IoVecBuffer {
214218
}
215219
}
216220

221+
#[derive(Debug)]
222+
pub struct ParsedDescriptorChain {
223+
pub head_index: u16,
224+
pub length: u32,
225+
pub nr_iovecs: u16,
226+
}
227+
217228
/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::readv`.
218229
///
219230
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
220231
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
221232
/// of data from that buffer.
222-
#[derive(Debug, Default, Clone)]
233+
#[derive(Debug)]
223234
pub struct IoVecBufferMut {
224235
// container of the memory regions included in this IO vector
225-
vecs: IoVecVec,
236+
vecs: IovDeque,
226237
// Total length of the IoVecBufferMut
227-
len: u32,
238+
len: usize,
228239
}
229240

230241
impl IoVecBufferMut {
231-
/// Create an `IoVecBuffer` from a `DescriptorChain`
232-
///
233-
/// # Safety
234-
///
235-
/// The descriptor chain cannot be referencing the same memory location as another chain
236-
pub unsafe fn load_descriptor_chain(
242+
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
243+
/// underlying ring buffer.
244+
fn parse_descriptor(
237245
&mut self,
238246
mem: &GuestMemoryMmap,
239247
head: DescriptorChain,
240-
) -> Result<(), IoVecError> {
241-
self.clear();
242-
248+
) -> Result<ParsedDescriptorChain, IoVecError> {
249+
let head_index = head.index;
243250
let mut next_descriptor = Some(head);
251+
let mut length = 0u32;
252+
let mut nr_iovecs = 0u16;
244253
while let Some(desc) = next_descriptor {
245254
if !desc.is_write_only() {
246255
return Err(IoVecError::ReadOnlyDescriptor);
@@ -257,21 +266,75 @@ impl IoVecBufferMut {
257266
slice.bitmap().mark_dirty(0, desc.len as usize);
258267

259268
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
260-
self.vecs.push(iovec {
269+
self.vecs.push_back(iovec {
261270
iov_base,
262271
iov_len: desc.len as size_t,
263272
});
264-
self.len = self
265-
.len
273+
length = length
266274
.checked_add(desc.len)
267275
.ok_or(IoVecError::OverflowedDescriptor)?;
268276

277+
nr_iovecs += 1;
269278
next_descriptor = desc.next_descriptor();
270279
}
271280

281+
self.len = self
282+
.len
283+
.checked_add(length as usize)
284+
.ok_or(IoVecError::OverflowedDescriptor)?;
285+
286+
Ok(ParsedDescriptorChain {
287+
head_index,
288+
length,
289+
nr_iovecs,
290+
})
291+
}
292+
293+
/// Create an empty `IoVecBufferMut`.
294+
pub(crate) fn new() -> Result<Self, IovDequeError> {
295+
let vecs = IovDeque::new()?;
296+
Ok(Self { vecs, len: 0 })
297+
}
298+
299+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
300+
///
301+
/// This will clear any previous `iovec` objects in the buffer and load the new
302+
/// [`DescriptorChain`].
303+
///
304+
/// # Safety
305+
///
306+
/// The descriptor chain cannot be referencing the same memory location as another chain
307+
pub unsafe fn load_descriptor_chain(
308+
&mut self,
309+
mem: &GuestMemoryMmap,
310+
head: DescriptorChain,
311+
) -> Result<(), IoVecError> {
312+
self.clear();
313+
let _ = self.parse_descriptor(mem, head)?;
272314
Ok(())
273315
}
274316

317+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
318+
///
319+
/// # Safety
320+
///
321+
/// The descriptor chain cannot be referencing the same memory location as another chain
322+
pub unsafe fn append_descriptor_chain(
323+
&mut self,
324+
mem: &GuestMemoryMmap,
325+
head: DescriptorChain,
326+
) -> Result<ParsedDescriptorChain, IoVecError> {
327+
self.parse_descriptor(mem, head)
328+
}
329+
330+
/// Drop memory from the `IoVecBufferMut`
331+
///
332+
/// This will drop memory described by the `IoVecBufferMut` from the beginning.
333+
pub fn drop_descriptor_chain(&mut self, parse_descriptor: &ParsedDescriptorChain) {
334+
let dropped = self.vecs.pop_front(parse_descriptor.nr_iovecs);
335+
self.len -= dropped;
336+
}
337+
275338
/// Create an `IoVecBuffer` from a `DescriptorChain`
276339
///
277340
/// # Safety
@@ -281,20 +344,34 @@ impl IoVecBufferMut {
281344
mem: &GuestMemoryMmap,
282345
head: DescriptorChain,
283346
) -> Result<Self, IoVecError> {
284-
let mut new_buffer = Self::default();
347+
let mut new_buffer = Self::new()?;
285348
new_buffer.load_descriptor_chain(mem, head)?;
286349
Ok(new_buffer)
287350
}
288351

289352
/// Get the total length of the memory regions covered by this `IoVecBuffer`
290-
pub(crate) fn len(&self) -> u32 {
353+
///
354+
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
355+
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
356+
/// the limit that the length of a buffer is limited by `u32`.
357+
pub(crate) fn len(&self) -> usize {
291358
self.len
292359
}
293360

361+
/// Returns a pointer to the memory keeping the `iovec` structs
362+
pub fn as_iovec_ptr(&mut self) -> *mut iovec {
363+
self.vecs.as_mut_ptr()
364+
}
365+
366+
/// Returns the length of the `iovec` array.
367+
pub fn iovec_count(&self) -> u16 {
368+
self.vecs.len()
369+
}
370+
294371
/// Clears the `iovec` array
295372
pub fn clear(&mut self) {
296373
self.vecs.clear();
297-
self.len = 0u32;
374+
self.len = 0;
298375
}
299376

300377
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -313,7 +390,7 @@ impl IoVecBufferMut {
313390
mut buf: &[u8],
314391
offset: usize,
315392
) -> Result<(), VolatileMemoryError> {
316-
if offset < self.len() as usize {
393+
if offset < self.len() {
317394
let expected = buf.len();
318395
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
319396

@@ -342,11 +419,14 @@ impl IoVecBufferMut {
342419
) -> Result<usize, VolatileMemoryError> {
343420
let mut total_bytes_read = 0;
344421

345-
for iov in &self.vecs {
422+
for i in 0..self.vecs.len() {
346423
if len == 0 {
347424
break;
348425
}
349426

427+
// It's ok to unwrap here because `i` is within range.
428+
let iov = self.vecs.peek(i).unwrap();
429+
350430
if offset >= iov.iov_len {
351431
offset -= iov.iov_len;
352432
continue;
@@ -391,6 +471,7 @@ mod tests {
391471
use vm_memory::VolatileMemoryError;
392472

393473
use super::{IoVecBuffer, IoVecBufferMut};
474+
use crate::devices::virtio::iov_deque::IovDeque;
394475
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
395476
use crate::devices::virtio::test_utils::VirtQueue;
396477
use crate::test_utils::multi_region_mem;
@@ -429,17 +510,36 @@ mod tests {
429510

430511
impl From<&mut [u8]> for IoVecBufferMut {
431512
fn from(buf: &mut [u8]) -> Self {
513+
let mut vecs = IovDeque::new().unwrap();
514+
vecs.push_back(iovec {
515+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
516+
iov_len: buf.len(),
517+
});
518+
432519
Self {
433-
vecs: vec![iovec {
434-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
435-
iov_len: buf.len(),
436-
}]
437-
.into(),
438-
len: buf.len().try_into().unwrap(),
520+
vecs,
521+
len: buf.len(),
439522
}
440523
}
441524
}
442525

526+
impl From<Vec<&mut [u8]>> for IoVecBufferMut {
527+
fn from(mut buffer: Vec<&mut [u8]>) -> Self {
528+
let mut len = 0usize;
529+
let mut vecs = IovDeque::new().unwrap();
530+
buffer.drain(..).for_each(|slice| {
531+
len += slice.len();
532+
533+
vecs.push_back(iovec {
534+
iov_base: slice.as_ptr() as *mut c_void,
535+
iov_len: slice.len(),
536+
});
537+
});
538+
539+
Self { vecs, len }
540+
}
541+
}
542+
443543
fn default_mem() -> GuestMemoryMmap {
444544
multi_region_mem(&[
445545
(GuestAddress(0), 0x10000),
@@ -528,8 +628,19 @@ mod tests {
528628
let head = q.pop().unwrap();
529629

530630
// SAFETY: This descriptor chain is only loaded once in this test
531-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
631+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
532632
assert_eq!(iovec.len(), 4 * 64);
633+
634+
// We are creating a new queue where we can get descriptors from. Probably, this is not
635+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
636+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
637+
// the appending logic.
638+
let (mut q, _) = write_only_chain(&mem);
639+
let head = q.pop().unwrap();
640+
// SAFETY: it is actually unsafe, but we just want to check the length of the
641+
// `IoVecBufferMut` after appending.
642+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
643+
assert_eq!(iovec.len(), 8 * 64);
533644
}
534645

535646
#[test]
@@ -687,6 +798,7 @@ mod verification {
687798
use vm_memory::VolatileSlice;
688799

689800
use super::{IoVecBuffer, IoVecBufferMut, IoVecVec};
801+
use crate::devices::virtio::iov_deque::IovDeque;
690802

691803
// Maximum memory size to use for our buffers. For the time being 1KB.
692804
const GUEST_MEMORY_SIZE: usize = 1 << 10;
@@ -702,10 +814,10 @@ mod verification {
702814
let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
703815
let mut len = 0u32;
704816
for _ in 0..nr_descs {
705-
// The `IoVecBuffer(Mut)` constructors ensure that the memory region described by every
817+
// The `IoVecBuffer` constructors ensure that the memory region described by every
706818
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
707819
// mmap. The assumption, here, that the last address is within the memory object's
708-
// bound substitutes these checks that `IoVecBuffer(Mut)::new() performs.`
820+
// bound substitutes these checks that `IoVecBuffer::new() performs.`
709821
let addr: usize = kani::any();
710822
let iov_len: usize =
711823
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
@@ -728,6 +840,26 @@ mod verification {
728840
}
729841
}
730842

843+
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque, u32) {
844+
let mut vecs = IovDeque::new().unwrap();
845+
let mut len = 0u32;
846+
for _ in 0..nr_descs {
847+
// The `IoVecBufferMut` constructors ensure that the memory region described by every
848+
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
849+
// mmap. The assumption, here, that the last address is within the memory object's
850+
// bound substitutes these checks that `IoVecBufferMut::new() performs.`
851+
let addr: usize = kani::any();
852+
let iov_len: usize =
853+
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
854+
let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
855+
856+
vecs.push_back(iovec { iov_base, iov_len });
857+
len += u32::try_from(iov_len).unwrap();
858+
}
859+
860+
(vecs, len)
861+
}
862+
731863
impl IoVecBufferMut {
732864
fn any_of_length(nr_descs: usize) -> Self {
733865
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
@@ -739,8 +871,11 @@ mod verification {
739871
))
740872
};
741873

742-
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
743-
Self { vecs, len }
874+
let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
875+
Self {
876+
vecs,
877+
len: len.try_into().unwrap(),
878+
}
744879
}
745880
}
746881

@@ -820,7 +955,7 @@ mod verification {
820955
let mut iov_mut = IoVecBufferMut::any_of_length(nr_descs);
821956

822957
let mut buf = kani::vec::any_vec::<u8, GUEST_MEMORY_SIZE>();
823-
let offset: u32 = kani::any();
958+
let offset: usize = kani::any();
824959

825960
// We can't really check the contents that the operation here writes into
826961
// `IoVecBufferMut`, because our `IoVecBufferMut` being completely arbitrary
@@ -835,11 +970,7 @@ mod verification {
835970
// Ok(...)
836971
assert_eq!(
837972
iov_mut
838-
.write_volatile_at(
839-
&mut KaniBuffer(&mut buf),
840-
offset as usize,
841-
GUEST_MEMORY_SIZE
842-
)
973+
.write_volatile_at(&mut KaniBuffer(&mut buf), offset, GUEST_MEMORY_SIZE)
843974
.unwrap(),
844975
buf.len().min(iov_mut.len().saturating_sub(offset) as usize)
845976
);

0 commit comments

Comments
 (0)