Skip to content

Commit 4838a39

Browse files
bchaliosShadowCurse
andcommitted
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Co-authored-by: Egor Lazarchuk <[email protected]> Signed-off-by: Babis Chalios <[email protected]>
1 parent baa4229 commit 4838a39

File tree

4 files changed

+191
-43
lines changed

4 files changed

+191
-43
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 175 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
use std::io::ErrorKind;
55

66
use libc::{c_void, iovec, size_t};
7+
#[cfg(not(kani))]
78
use smallvec::SmallVec;
89
use vm_memory::bitmap::Bitmap;
910
use vm_memory::{
1011
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1112
};
1213

14+
use super::iov_deque::{IovDeque, IovDequeError};
1315
use crate::devices::virtio::queue::DescriptorChain;
1416
use crate::vstate::memory::GuestMemoryMmap;
1517

@@ -23,6 +25,8 @@ pub enum IoVecError {
2325
OverflowedDescriptor,
2426
/// Guest memory error: {0}
2527
GuestMemory(#[from] GuestMemoryError),
28+
/// Error with underlying `IovDeque`: {0}
29+
IovDeque(#[from] IovDequeError),
2630
}
2731

2832
// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
@@ -214,64 +218,135 @@ impl IoVecBuffer {
214218
}
215219
}
216220

221+
#[derive(Debug)]
222+
pub struct ParsedDescriptorChain {
223+
pub head_index: u16,
224+
pub length: u32,
225+
pub nr_iovecs: u16,
226+
}
227+
217228
/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::readv`.
218229
///
219230
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
220231
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
221232
/// of data from that buffer.
222-
#[derive(Debug, Default, Clone)]
233+
#[derive(Debug)]
223234
pub struct IoVecBufferMut {
224235
// container of the memory regions included in this IO vector
225-
vecs: IoVecVec,
236+
vecs: IovDeque,
226237
// Total length of the IoVecBufferMut
227-
len: u32,
238+
len: usize,
228239
}
229240

230241
impl IoVecBufferMut {
231-
/// Create an `IoVecBuffer` from a `DescriptorChain`
242+
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
243+
/// underlying ring buffer.
232244
///
233245
/// # Safety
234246
///
235247
/// The descriptor chain cannot be referencing the same memory location as another chain
236-
pub unsafe fn load_descriptor_chain(
248+
unsafe fn parse_descriptor(
237249
&mut self,
238250
mem: &GuestMemoryMmap,
239251
head: DescriptorChain,
240-
) -> Result<(), IoVecError> {
241-
self.clear();
242-
252+
) -> Result<ParsedDescriptorChain, IoVecError> {
253+
let head_index = head.index;
243254
let mut next_descriptor = Some(head);
255+
let mut length = 0u32;
256+
let mut nr_iovecs = 0u16;
244257
while let Some(desc) = next_descriptor {
245258
if !desc.is_write_only() {
259+
self.vecs.pop_front(nr_iovecs);
246260
return Err(IoVecError::ReadOnlyDescriptor);
247261
}
248262

249263
// We use get_slice instead of `get_host_address` here in order to have the whole
250264
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
251265
// region in the GuestMemoryMmap.
252-
let slice = mem.get_slice(desc.addr, desc.len as usize)?;
266+
let slice = mem.get_slice(desc.addr, desc.len as usize).map_err(|err| {
267+
self.vecs.pop_front(nr_iovecs);
268+
err
269+
})?;
253270

254271
// We need to mark the area of guest memory that will be mutated through this
255272
// IoVecBufferMut as dirty ahead of time, as we loose access to all
256273
// vm-memory related information after converting down to iovecs.
257274
slice.bitmap().mark_dirty(0, desc.len as usize);
258275

259276
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
260-
self.vecs.push(iovec {
277+
self.vecs.push_back(iovec {
261278
iov_base,
262279
iov_len: desc.len as size_t,
263280
});
264-
self.len = self
265-
.len
281+
nr_iovecs += 1;
282+
length = length
266283
.checked_add(desc.len)
267-
.ok_or(IoVecError::OverflowedDescriptor)?;
284+
.ok_or(IoVecError::OverflowedDescriptor)
285+
.map_err(|err| {
286+
self.vecs.pop_front(nr_iovecs);
287+
err
288+
})?;
268289

269290
next_descriptor = desc.next_descriptor();
270291
}
271292

293+
self.len = self.len.checked_add(length as usize).ok_or_else(|| {
294+
self.vecs.pop_front(nr_iovecs);
295+
IoVecError::OverflowedDescriptor
296+
})?;
297+
298+
Ok(ParsedDescriptorChain {
299+
head_index,
300+
length,
301+
nr_iovecs,
302+
})
303+
}
304+
305+
/// Create an empty `IoVecBufferMut`.
306+
pub fn new() -> Result<Self, IovDequeError> {
307+
let vecs = IovDeque::new()?;
308+
Ok(Self { vecs, len: 0 })
309+
}
310+
311+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
312+
///
313+
/// This will clear any previous `iovec` objects in the buffer and load the new
314+
/// [`DescriptorChain`].
315+
///
316+
/// # Safety
317+
///
318+
/// The descriptor chain cannot be referencing the same memory location as another chain
319+
pub unsafe fn load_descriptor_chain(
320+
&mut self,
321+
mem: &GuestMemoryMmap,
322+
head: DescriptorChain,
323+
) -> Result<(), IoVecError> {
324+
self.clear();
325+
let _ = self.parse_descriptor(mem, head)?;
272326
Ok(())
273327
}
274328

329+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
330+
///
331+
/// # Safety
332+
///
333+
/// The descriptor chain cannot be referencing the same memory location as another chain
334+
pub unsafe fn append_descriptor_chain(
335+
&mut self,
336+
mem: &GuestMemoryMmap,
337+
head: DescriptorChain,
338+
) -> Result<ParsedDescriptorChain, IoVecError> {
339+
self.parse_descriptor(mem, head)
340+
}
341+
342+
/// Drop memory from the `IoVecBufferMut`
343+
///
344+
/// This will drop memory described by the `IoVecBufferMut` from the beginning.
345+
pub fn drop_descriptor_chain(&mut self, parse_descriptor: &ParsedDescriptorChain) {
346+
let dropped = self.vecs.pop_front(parse_descriptor.nr_iovecs);
347+
self.len -= dropped;
348+
}
349+
275350
/// Create an `IoVecBuffer` from a `DescriptorChain`
276351
///
277352
/// # Safety
@@ -281,20 +356,34 @@ impl IoVecBufferMut {
281356
mem: &GuestMemoryMmap,
282357
head: DescriptorChain,
283358
) -> Result<Self, IoVecError> {
284-
let mut new_buffer = Self::default();
359+
let mut new_buffer = Self::new()?;
285360
new_buffer.load_descriptor_chain(mem, head)?;
286361
Ok(new_buffer)
287362
}
288363

289364
/// Get the total length of the memory regions covered by this `IoVecBuffer`
290-
pub(crate) fn len(&self) -> u32 {
365+
///
366+
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
367+
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
368+
/// the limit that the length of a buffer is limited by `u32`.
369+
pub(crate) fn len(&self) -> usize {
291370
self.len
292371
}
293372

373+
/// Returns a pointer to the memory keeping the `iovec` structs
374+
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
375+
self.vecs.as_mut_slice()
376+
}
377+
378+
/// Returns the length of the `iovec` array.
379+
pub fn iovec_count(&self) -> u16 {
380+
self.vecs.len()
381+
}
382+
294383
/// Clears the `iovec` array
295384
pub fn clear(&mut self) {
296385
self.vecs.clear();
297-
self.len = 0u32;
386+
self.len = 0;
298387
}
299388

300389
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -313,7 +402,7 @@ impl IoVecBufferMut {
313402
mut buf: &[u8],
314403
offset: usize,
315404
) -> Result<(), VolatileMemoryError> {
316-
if offset < self.len() as usize {
405+
if offset < self.len() {
317406
let expected = buf.len();
318407
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
319408

@@ -342,7 +431,7 @@ impl IoVecBufferMut {
342431
) -> Result<usize, VolatileMemoryError> {
343432
let mut total_bytes_read = 0;
344433

345-
for iov in &self.vecs {
434+
for iov in self.vecs.as_slice() {
346435
if len == 0 {
347436
break;
348437
}
@@ -391,6 +480,7 @@ mod tests {
391480
use vm_memory::VolatileMemoryError;
392481

393482
use super::{IoVecBuffer, IoVecBufferMut};
483+
use crate::devices::virtio::iov_deque::IovDeque;
394484
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
395485
use crate::devices::virtio::test_utils::VirtQueue;
396486
use crate::test_utils::multi_region_mem;
@@ -429,17 +519,36 @@ mod tests {
429519

430520
impl From<&mut [u8]> for IoVecBufferMut {
431521
fn from(buf: &mut [u8]) -> Self {
522+
let mut vecs = IovDeque::new().unwrap();
523+
vecs.push_back(iovec {
524+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
525+
iov_len: buf.len(),
526+
});
527+
432528
Self {
433-
vecs: vec![iovec {
434-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
435-
iov_len: buf.len(),
436-
}]
437-
.into(),
438-
len: buf.len().try_into().unwrap(),
529+
vecs,
530+
len: buf.len(),
439531
}
440532
}
441533
}
442534

535+
impl From<Vec<&mut [u8]>> for IoVecBufferMut {
536+
fn from(buffer: Vec<&mut [u8]>) -> Self {
537+
let mut len = 0usize;
538+
let mut vecs = IovDeque::new().unwrap();
539+
for slice in buffer {
540+
len += slice.len();
541+
542+
vecs.push_back(iovec {
543+
iov_base: slice.as_ptr() as *mut c_void,
544+
iov_len: slice.len(),
545+
});
546+
}
547+
548+
Self { vecs, len }
549+
}
550+
}
551+
443552
fn default_mem() -> GuestMemoryMmap {
444553
multi_region_mem(&[
445554
(GuestAddress(0), 0x10000),
@@ -528,8 +637,19 @@ mod tests {
528637
let head = q.pop().unwrap();
529638

530639
// SAFETY: This descriptor chain is only loaded once in this test
531-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
640+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
532641
assert_eq!(iovec.len(), 4 * 64);
642+
643+
// We are creating a new queue where we can get descriptors from. Probably, this is not
644+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
645+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
646+
// the appending logic.
647+
let (mut q, _) = write_only_chain(&mem);
648+
let head = q.pop().unwrap();
649+
// SAFETY: it is actually unsafe, but we just want to check the length of the
650+
// `IoVecBufferMut` after appending.
651+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
652+
assert_eq!(iovec.len(), 8 * 64);
533653
}
534654

535655
#[test]
@@ -687,6 +807,7 @@ mod verification {
687807
use vm_memory::VolatileSlice;
688808

689809
use super::{IoVecBuffer, IoVecBufferMut, IoVecVec};
810+
use crate::devices::virtio::iov_deque::IovDeque;
690811

691812
// Maximum memory size to use for our buffers. For the time being 1KB.
692813
const GUEST_MEMORY_SIZE: usize = 1 << 10;
@@ -702,10 +823,10 @@ mod verification {
702823
let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
703824
let mut len = 0u32;
704825
for _ in 0..nr_descs {
705-
// The `IoVecBuffer(Mut)` constructors ensure that the memory region described by every
826+
// The `IoVecBuffer` constructors ensure that the memory region described by every
706827
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
707828
// mmap. The assumption, here, that the last address is within the memory object's
708-
// bound substitutes these checks that `IoVecBuffer(Mut)::new() performs.`
829+
// bound substitutes these checks that `IoVecBuffer::new() performs.`
709830
let addr: usize = kani::any();
710831
let iov_len: usize =
711832
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
@@ -728,6 +849,26 @@ mod verification {
728849
}
729850
}
730851

852+
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque, u32) {
853+
let mut vecs = IovDeque::new().unwrap();
854+
let mut len = 0u32;
855+
for _ in 0..nr_descs {
856+
// The `IoVecBufferMut` constructors ensure that the memory region described by every
857+
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
858+
// mmap. The assumption, here, that the last address is within the memory object's
859+
// bound substitutes these checks that `IoVecBufferMut::new() performs.`
860+
let addr: usize = kani::any();
861+
let iov_len: usize =
862+
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
863+
let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
864+
865+
vecs.push_back(iovec { iov_base, iov_len });
866+
len += u32::try_from(iov_len).unwrap();
867+
}
868+
869+
(vecs, len)
870+
}
871+
731872
impl IoVecBufferMut {
732873
fn any_of_length(nr_descs: usize) -> Self {
733874
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
@@ -739,8 +880,11 @@ mod verification {
739880
))
740881
};
741882

742-
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
743-
Self { vecs, len }
883+
let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
884+
Self {
885+
vecs,
886+
len: len.try_into().unwrap(),
887+
}
744888
}
745889
}
746890

@@ -820,7 +964,7 @@ mod verification {
820964
let mut iov_mut = IoVecBufferMut::any_of_length(nr_descs);
821965

822966
let mut buf = kani::vec::any_vec::<u8, GUEST_MEMORY_SIZE>();
823-
let offset: u32 = kani::any();
967+
let offset: usize = kani::any();
824968

825969
// We can't really check the contents that the operation here writes into
826970
// `IoVecBufferMut`, because our `IoVecBufferMut` being completely arbitrary
@@ -835,11 +979,7 @@ mod verification {
835979
// Ok(...)
836980
assert_eq!(
837981
iov_mut
838-
.write_volatile_at(
839-
&mut KaniBuffer(&mut buf),
840-
offset as usize,
841-
GUEST_MEMORY_SIZE
842-
)
982+
.write_volatile_at(&mut KaniBuffer(&mut buf), offset, GUEST_MEMORY_SIZE)
843983
.unwrap(),
844984
buf.len().min(iov_mut.len().saturating_sub(offset) as usize)
845985
);

0 commit comments

Comments
 (0)