Skip to content

Commit d23a1be

Browse files
committed
feat(iovec): add support for VIRTQ_DESC_F_INDIRECT to IoVecBuffer
Now IoVecBuffer/Mut can be built from descriptor chains utilizing VIRTQ_DESC_F_INDIRECT flag. The way indirect descriptors work is: - Descriptors from descriptor table instead of pointing to the buffers where data needs to be written to/read from now can point to buffers that contain other descriptor table. That 'indirect' descriptor table contains descriptor which point to actual buffers for data. - All descriptor in the 'indirect' descriptor table are processed sequentially. - The `VIRTQ_DESC_F_WRITE` flag is ignored for the main descriptor (the one from original descriptor table) Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent e9e0b27 commit d23a1be

File tree

3 files changed

+167
-50
lines changed

3 files changed

+167
-50
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 132 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ use std::io::ErrorKind;
66
use libc::{c_void, iovec, size_t};
77
use smallvec::SmallVec;
88
use vm_memory::{
9-
GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
9+
GuestAddress, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1010
};
1111

12+
use super::queue::Descriptor;
1213
use crate::devices::virtio::queue::DescriptorChain;
13-
use crate::vstate::memory::{Bitmap, GuestMemory};
14+
use crate::vstate::memory::{Bitmap, GuestMemory, GuestMemoryMmap};
1415

1516
#[derive(Debug, thiserror::Error, displaydoc::Display)]
1617
pub enum IoVecError {
@@ -20,6 +21,8 @@ pub enum IoVecError {
2021
ReadOnlyDescriptor,
2122
/// Tried to create an `IoVec` or `IoVecMut` from a descriptor chain that was too large
2223
OverflowedDescriptor,
24+
/// Nested indirect descriptor
25+
NestedIndirectDescriptor,
2326
/// Guest memory error: {0}
2427
GuestMemory(#[from] GuestMemoryError),
2528
}
@@ -63,34 +66,76 @@ impl IoVecBuffer {
6366

6467
let mut next_descriptor = Some(head);
6568
while let Some(desc) = next_descriptor {
66-
if desc.is_write_only() {
67-
return Err(IoVecError::WriteOnlyDescriptor);
68-
}
69+
if desc.is_indirect() {
70+
// We use get_slice instead of `get_host_address` here in order to have the whole
71+
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
72+
// region in the GuestMemoryMmap.
73+
let indirect_desc_slice = desc
74+
.mem
75+
.get_slice(desc.addr, desc.len as usize)
76+
.map_err(IoVecError::GuestMemory)?;
77+
78+
// SAFETY:
79+
// We checked the slice above. We just transform it into
80+
// a slice of Descriptors.
81+
let indirect_desc_slice: &[Descriptor] = unsafe {
82+
std::slice::from_raw_parts(
83+
indirect_desc_slice.ptr_guard().as_ptr().cast(),
84+
desc.len as usize / std::mem::size_of::<Descriptor>(),
85+
)
86+
};
87+
88+
for d in indirect_desc_slice.iter() {
89+
if desc.is_write_only() {
90+
return Err(IoVecError::WriteOnlyDescriptor);
91+
}
92+
if d.is_indirect() {
93+
return Err(IoVecError::NestedIndirectDescriptor);
94+
}
95+
self.add_descriptor(desc.mem, GuestAddress(d.addr), d.len)?;
96+
if !d.has_next() {
97+
break;
98+
}
99+
}
100+
} else {
101+
if desc.is_write_only() {
102+
return Err(IoVecError::WriteOnlyDescriptor);
103+
}
69104

70-
// We use get_slice instead of `get_host_address` here in order to have the whole
71-
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
72-
// region in the GuestMemoryMmap.
73-
let iov_base = desc
74-
.mem
75-
.get_slice(desc.addr, desc.len as usize)?
76-
.ptr_guard_mut()
77-
.as_ptr()
78-
.cast::<c_void>();
79-
self.vecs.push(iovec {
80-
iov_base,
81-
iov_len: desc.len as size_t,
82-
});
83-
self.len = self
84-
.len
85-
.checked_add(desc.len)
86-
.ok_or(IoVecError::OverflowedDescriptor)?;
105+
self.add_descriptor(desc.mem, desc.addr, desc.len)?;
106+
}
87107

88108
next_descriptor = desc.next_descriptor();
89109
}
90110

91111
Ok(())
92112
}
93113

114+
fn add_descriptor(
115+
&mut self,
116+
mem: &GuestMemoryMmap,
117+
addr: GuestAddress,
118+
len: u32,
119+
) -> Result<(), IoVecError> {
120+
// We use get_slice instead of `get_host_address` here in order to have the whole
121+
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
122+
// region in the GuestMemoryMmap.
123+
let iov_base = mem
124+
.get_slice(addr, len as usize)?
125+
.ptr_guard_mut()
126+
.as_ptr()
127+
.cast::<c_void>();
128+
self.vecs.push(iovec {
129+
iov_base,
130+
iov_len: len as size_t,
131+
});
132+
self.len = self
133+
.len
134+
.checked_add(len)
135+
.ok_or(IoVecError::OverflowedDescriptor)?;
136+
Ok(())
137+
}
138+
94139
/// Create an `IoVecBuffer` from a `DescriptorChain`
95140
///
96141
/// # Safety
@@ -240,36 +285,79 @@ impl IoVecBufferMut {
240285

241286
let mut next_descriptor = Some(head);
242287
while let Some(desc) = next_descriptor {
243-
if !desc.is_write_only() {
244-
return Err(IoVecError::ReadOnlyDescriptor);
245-
}
288+
if desc.is_indirect() {
289+
// We use get_slice instead of `get_host_address` here in order to have the whole
290+
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
291+
// region in the GuestMemoryMmap.
292+
let indirect_desc_slice = desc
293+
.mem
294+
.get_slice(desc.addr, desc.len as usize)
295+
.map_err(IoVecError::GuestMemory)?;
296+
297+
// SAFETY:
298+
// We checked the slice above. We just transform it into
299+
// a slice of Descriptors.
300+
let indirect_desc_slice: &[Descriptor] = unsafe {
301+
std::slice::from_raw_parts(
302+
indirect_desc_slice.ptr_guard().as_ptr().cast(),
303+
desc.len as usize / std::mem::size_of::<Descriptor>(),
304+
)
305+
};
306+
307+
for d in indirect_desc_slice.iter() {
308+
if !desc.is_write_only() {
309+
return Err(IoVecError::ReadOnlyDescriptor);
310+
}
311+
if d.is_indirect() {
312+
return Err(IoVecError::NestedIndirectDescriptor);
313+
}
314+
self.add_descriptor(desc.mem, GuestAddress(d.addr), d.len)?;
315+
if !d.has_next() {
316+
break;
317+
}
318+
}
319+
} else {
320+
if !desc.is_write_only() {
321+
return Err(IoVecError::ReadOnlyDescriptor);
322+
}
246323

247-
// We use get_slice instead of `get_host_address` here in order to have the whole
248-
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
249-
// region in the GuestMemoryMmap.
250-
let slice = desc.mem.get_slice(desc.addr, desc.len as usize)?;
251-
252-
// We need to mark the area of guest memory that will be mutated through this
253-
// IoVecBufferMut as dirty ahead of time, as we loose access to all
254-
// vm-memory related information after converting down to iovecs.
255-
slice.bitmap().mark_dirty(0, desc.len as usize);
256-
257-
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
258-
self.vecs.push(iovec {
259-
iov_base,
260-
iov_len: desc.len as size_t,
261-
});
262-
self.len = self
263-
.len
264-
.checked_add(desc.len)
265-
.ok_or(IoVecError::OverflowedDescriptor)?;
324+
self.add_descriptor(desc.mem, desc.addr, desc.len)?;
325+
}
266326

267327
next_descriptor = desc.next_descriptor();
268328
}
269329

270330
Ok(())
271331
}
272332

333+
fn add_descriptor(
334+
&mut self,
335+
mem: &GuestMemoryMmap,
336+
addr: GuestAddress,
337+
len: u32,
338+
) -> Result<(), IoVecError> {
339+
// We use get_slice instead of `get_host_address` here in order to have the whole
340+
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
341+
// region in the GuestMemoryMmap.
342+
let slice = mem.get_slice(addr, len as usize)?;
343+
344+
// We need to mark the area of guest memory that will be mutated through this
345+
// IoVecBufferMut as dirty ahead of time, as we loose access to all
346+
// vm-memory related information after converting down to iovecs.
347+
slice.bitmap().mark_dirty(0, len as usize);
348+
349+
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
350+
self.vecs.push(iovec {
351+
iov_base,
352+
iov_len: len as size_t,
353+
});
354+
self.len = self
355+
.len
356+
.checked_add(len)
357+
.ok_or(IoVecError::OverflowedDescriptor)?;
358+
Ok(())
359+
}
360+
273361
/// Create an `IoVecBuffer` from a `DescriptorChain`
274362
///
275363
/// # Safety

src/vmm/src/devices/virtio/queue.rs

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use crate::vstate::memory::{
1818

1919
pub(super) const VIRTQ_DESC_F_NEXT: u16 = 0x1;
2020
pub(super) const VIRTQ_DESC_F_WRITE: u16 = 0x2;
21+
pub(super) const VIRTQ_DESC_F_INDIRECT: u16 = 0x4;
2122

2223
/// Max size of virtio queues offered by firecracker's virtio devices.
2324
pub(super) const FIRECRACKER_MAX_QUEUE_SIZE: u16 = 256;
@@ -43,12 +44,32 @@ pub enum QueueError {
4344
/// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-430008
4445
/// 2.6.5 The Virtqueue Descriptor Table
4546
#[repr(C)]
46-
#[derive(Default, Clone, Copy)]
47-
struct Descriptor {
48-
addr: u64,
49-
len: u32,
50-
flags: u16,
51-
next: u16,
47+
#[derive(Debug, Default, Clone, Copy)]
48+
pub struct Descriptor {
49+
pub addr: u64,
50+
pub len: u32,
51+
pub flags: u16,
52+
pub next: u16,
53+
}
54+
55+
impl Descriptor {
56+
/// Gets if this descriptor chain has another descriptor chain linked after it.
57+
pub fn has_next(&self) -> bool {
58+
self.flags & VIRTQ_DESC_F_NEXT != 0
59+
}
60+
61+
/// If the driver designated this as a write only descriptor.
62+
///
63+
/// If this is false, this descriptor is read only.
64+
/// Write only means the emulated device can write and the driver can read.
65+
pub fn is_write_only(&self) -> bool {
66+
self.flags & VIRTQ_DESC_F_WRITE != 0
67+
}
68+
69+
/// If the driver designated this as a indirect descriptor.
70+
pub fn is_indirect(&self) -> bool {
71+
self.flags & VIRTQ_DESC_F_INDIRECT != 0
72+
}
5273
}
5374

5475
// SAFETY: `Descriptor` is a POD and contains no padding.
@@ -159,6 +180,11 @@ impl<'a, M: GuestMemory> DescriptorChain<'a, M> {
159180
self.flags & VIRTQ_DESC_F_WRITE != 0
160181
}
161182

183+
/// If the driver designated this as a indirect descriptor.
184+
pub fn is_indirect(&self) -> bool {
185+
self.flags & VIRTQ_DESC_F_INDIRECT != 0
186+
}
187+
162188
/// Gets the next descriptor in this descriptor chain, if there is one.
163189
///
164190
/// Note that this is distinct from the next descriptor chain returned by `AvailIter`, which is

src/vmm/src/devices/virtio/vsock/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ pub enum VsockError {
124124
DescChainTooShortForHeader(usize),
125125
/// The descriptor chain length was greater than the max ([u32::MAX])
126126
DescChainOverflow,
127+
/// Nested indirect descriptor
128+
NestedIndirectDescriptor,
127129
/// The vsock header `len` field holds an invalid value: {0}
128130
InvalidPktLen(u32),
129131
/// A data fetch was attempted when no data was available.
@@ -147,6 +149,7 @@ impl From<IoVecError> for VsockError {
147149
IoVecError::ReadOnlyDescriptor => VsockError::UnwritableDescriptor,
148150
IoVecError::GuestMemory(err) => VsockError::GuestMemoryMmap(err),
149151
IoVecError::OverflowedDescriptor => VsockError::DescChainOverflow,
152+
IoVecError::NestedIndirectDescriptor => VsockError::NestedIndirectDescriptor,
150153
}
151154
}
152155
}

0 commit comments

Comments
 (0)