Skip to content

Commit c247c94

Browse files
committed
feat(net): add MRG_RXBUF support to virtio-net device
Now virtio-net device VIRTIO_NET_F_MRG_RXBUF feature which allows it to write single packet into multiple descriptor chains. The amount of descriptor chains (also known as heads) is written into the `virtio_net_hdr_v1` structure which is located at the very begging of the packet. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 74fbe4e commit c247c94

File tree

3 files changed

+120
-69
lines changed

3 files changed

+120
-69
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,15 @@ impl IoVecBufferMut {
367367
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
368368
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
369369
/// the limit that the length of a buffer is limited by `u32`.
370-
pub(crate) fn len(&self) -> usize {
370+
pub fn len(&self) -> usize {
371371
self.len
372372
}
373373

374+
/// Returns true if there is buffer is empty.
375+
pub fn is_empty(&self) -> bool {
376+
self.len == 0
377+
}
378+
374379
/// Returns a pointer to the memory keeping the `iovec` structs
375380
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
376381
self.vecs.as_mut_slice()

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 110 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
// found in the THIRD-PARTY file.
77

88
use std::collections::VecDeque;
9-
use std::mem;
9+
use std::mem::{self};
1010
use std::net::Ipv4Addr;
11+
use std::num::Wrapping;
1112
use std::sync::{Arc, Mutex};
1213

13-
use libc::EAGAIN;
14+
use libc::{iovec, EAGAIN};
1415
use log::error;
1516
use vmm_sys_util::eventfd::EventFd;
1617

@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920
use crate::devices::virtio::gen::virtio_net::{
2021
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2122
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
22-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
23+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2324
};
2425
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2526
use crate::devices::virtio::iovec::{
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109
// A map of which part of the memory belongs to which `DescriptorChain` object
109110
pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
110111
// Buffers that we have used and they are ready to be given back to the guest.
111-
pub used_descriptor: Option<ParsedDescriptorChain>,
112+
pub used_descriptors: u16,
113+
pub used_bytes: u32,
112114
}
113115

114116
impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120
min_buffer_size: 0,
119121
iovec: IoVecBufferMut::new()?,
120122
parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()),
121-
used_descriptor: None,
123+
used_descriptors: 0,
124+
used_bytes: 0,
122125
})
123126
}
124127

@@ -141,16 +144,16 @@ impl RxBuffers {
141144
Ok(())
142145
}
143146

144-
/// Returns the number of available `iovec` objects.
147+
/// Returns the total size of available space in the buffer.
145148
#[inline(always)]
146-
fn len(&self) -> usize {
149+
fn capacity(&self) -> usize {
147150
self.iovec.len()
148151
}
149152

150153
/// Returns `true` if there aren't any available `iovec` objects.
151154
#[inline(always)]
152155
fn is_empty(&self) -> bool {
153-
self.len() == 0
156+
self.capacity() == 0
154157
}
155158

156159
/// Mark the first `size` bytes of available memory as used.
@@ -160,18 +163,48 @@ impl RxBuffers {
160163
/// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161164
/// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162165
/// in the `RxBuffers`.
163-
unsafe fn mark_used(&mut self, size: u32) {
164-
// Since we were able to write a frame in guest memory, we should have at least one
165-
// descriptor chain here. If not, we have a bug, so fail fast, since the device is
166+
unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) {
167+
// Since we were able to write a frame in guest memory, we should have some capacity for it.
168+
// If not, we have a bug, so fail fast, since the device is
166169
// fundamentally broken.
167-
let mut parsed_dc = self.parsed_descriptors.pop_front().expect(
168-
"net: internal bug. Mismatch between written frame size and available descriptors",
169-
);
170+
debug_assert!(!self.is_empty());
171+
172+
self.used_bytes = bytes_written;
173+
174+
let mut used_heads: u16 = 0;
175+
let mut write_used = |head_index: u16, bytes_written: u32, rx_queue: &mut Queue| {
176+
if let Err(err) = rx_queue.write_used_element(
177+
(rx_queue.next_used + Wrapping(self.used_descriptors)).0,
178+
head_index,
179+
bytes_written,
180+
) {
181+
error!(
182+
"net: Failed to add used descriptor {} of length {} to RX queue: {err}",
183+
head_index, bytes_written
184+
);
185+
}
186+
used_heads += 1;
187+
self.used_descriptors += 1;
188+
};
170189

171-
self.header_set_num_buffers(1);
172-
self.iovec.drop_descriptor_chain(&parsed_dc);
173-
parsed_dc.length = size;
174-
self.used_descriptor = Some(parsed_dc);
190+
for parsed_dc in self.parsed_descriptors.iter() {
191+
if bytes_written <= parsed_dc.length {
192+
write_used(parsed_dc.head_index, bytes_written, rx_queue);
193+
break;
194+
} else {
195+
write_used(parsed_dc.head_index, parsed_dc.length, rx_queue);
196+
bytes_written -= parsed_dc.length;
197+
}
198+
}
199+
200+
self.header_set_num_buffers(used_heads);
201+
for _ in 0..used_heads {
202+
let parsed_dc = self
203+
.parsed_descriptors
204+
.pop_front()
205+
.expect("This should never happen if write to the buffer succeded.");
206+
self.iovec.drop_descriptor_chain(&parsed_dc);
207+
}
175208
}
176209

177210
/// Write the number of descriptors used in VirtIO header
@@ -191,25 +224,23 @@ impl RxBuffers {
191224
/// This will let the guest know that about all the `DescriptorChain` object that has been
192225
/// used to receive a frame from the TAP.
193226
fn finish_frame(&mut self, rx_queue: &mut Queue) {
194-
if let Some(used_dc) = self.used_descriptor.take() {
195-
// It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
196-
// `head_index` is not a valid descriptor id. `head_index` here is a valid
197-
// `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
198-
// checks for its validity. In other words, if this unwrap() fails there's a bug in our
199-
// emulation logic which, most likely, we can't recover from. So, let's crash here
200-
// instead of logging an error and continuing.
201-
rx_queue
202-
.add_used(used_dc.head_index, used_dc.length)
203-
.unwrap();
204-
}
227+
rx_queue.advance_used_ring(self.used_descriptors);
228+
self.used_descriptors = 0;
229+
self.used_bytes = 0;
205230
}
206231

207-
/// Returns the number of bytes that have been used from the buffer
208-
fn used_bytes(&self) -> u32 {
209-
match self.used_descriptor {
210-
Some(ref dc) => dc.length,
211-
None => 0,
212-
}
232+
/// Return a slice of iovecs for the first slice in the buffer.
233+
///
234+
/// # Safety
235+
/// Buffer needs to have at least one descriptor chain parsed.
236+
unsafe fn single_chain_slice_mut(&mut self) -> &mut [iovec] {
237+
let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize;
238+
&mut self.iovec.as_iovec_mut_slice()[..nr_iovecs]
239+
}
240+
241+
/// Return a slice of iovecs for all descriptor chains in the buffer.
242+
fn all_chains_slice_mut(&mut self) -> &mut [iovec] {
243+
self.iovec.as_iovec_mut_slice()
213244
}
214245
}
215246

@@ -272,6 +303,7 @@ impl Net {
272303
| 1 << VIRTIO_NET_F_HOST_TSO6
273304
| 1 << VIRTIO_NET_F_HOST_UFO
274305
| 1 << VIRTIO_F_VERSION_1
306+
| 1 << VIRTIO_NET_F_MRG_RXBUF
275307
| 1 << VIRTIO_RING_F_EVENT_IDX;
276308

277309
let mut config_space = ConfigSpace::default();
@@ -433,13 +465,21 @@ impl Net {
433465
/// Returns the minimum size of buffer we expect the guest to provide us depending on the
434466
/// features we have negotiated with it
435467
fn minimum_rx_buffer_size(&self) -> u32 {
436-
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
437-
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
438-
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
439-
{
440-
65562
468+
if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
469+
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
470+
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
471+
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
472+
{
473+
65562
474+
} else {
475+
1526
476+
}
441477
} else {
442-
1526
478+
// header is 12 bytes long
479+
#[allow(clippy::cast_possible_truncation)]
480+
{
481+
vnet_hdr_len() as u32
482+
}
443483
}
444484
}
445485

@@ -454,6 +494,9 @@ impl Net {
454494
if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } {
455495
self.metrics.rx_fails.inc();
456496
error!("net: Could not parse an RX descriptor: {err}");
497+
// Notify queue about ready frames. We need this
498+
// to bring queue into up to date state.
499+
self.rx_buffer.finish_frame(queue);
457500
// Try to add the bad descriptor to the used ring.
458501
if let Err(err) = queue.add_used(index, 0) {
459502
error!(
@@ -541,15 +584,14 @@ impl Net {
541584

542585
// We currently prioritize packets from the MMDS over regular network packets.
543586
fn read_from_mmds_or_tap(&mut self) -> Result<usize, NetError> {
544-
// If we don't have any buffers available try to parse more from the RX queue. There might
545-
// be some buffers we didn't get the chance to process, because we got to handle the TAP
546-
// event before the RX queue event.
547-
if self.rx_buffer.is_empty() {
587+
// We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
588+
// this is the max size of 1 packet.
589+
if self.rx_buffer.capacity() < u16::MAX as usize {
548590
self.parse_rx_descriptors();
549591

550-
// If after parsing the RX queue we still don't have any buffers stop processing RX
592+
// If after parsing the RX queue we still don't have enough capacity, stop processing RX
551593
// frames.
552-
if self.rx_buffer.is_empty() {
594+
if self.rx_buffer.capacity() < u16::MAX as usize {
553595
return Ok(0);
554596
}
555597
}
@@ -570,8 +612,10 @@ impl Net {
570612
// * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
571613
// are at least that big.
572614
unsafe {
573-
self.rx_buffer
574-
.mark_used((vnet_hdr_len() + len).try_into().unwrap());
615+
self.rx_buffer.mark_used(
616+
(vnet_hdr_len() + len).try_into().unwrap(),
617+
&mut self.queues[RX_INDEX],
618+
);
575619
}
576620
return Ok(vnet_hdr_len() + len);
577621
}
@@ -586,7 +630,8 @@ impl Net {
586630
// * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
587631
// bytes than its capacity.
588632
unsafe {
589-
self.rx_buffer.mark_used(len.try_into().unwrap());
633+
self.rx_buffer
634+
.mark_used(len.try_into().unwrap(), &mut self.queues[RX_INDEX]);
590635
}
591636
Ok(len)
592637
}
@@ -630,13 +675,14 @@ impl Net {
630675
}
631676

632677
fn has_deferred_frame(&self) -> bool {
633-
self.rx_buffer.used_descriptor.is_some()
678+
self.rx_buffer.used_descriptors != 0
634679
}
635680

636681
// Process the deferred frame first, then continue reading from tap.
637682
fn handle_deferred_frame(&mut self) -> Result<(), DeviceError> {
638-
let used_bytes = self.rx_buffer.used_bytes();
639-
if self.rate_limited_rx_single_frame(used_bytes as usize) {
683+
if self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes as usize) {
684+
// Finish with rate limitted packet.
685+
self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]);
640686
// process_rx() was interrupted possibly before consuming all
641687
// packets in the tap; try continuing now.
642688
return self.process_rx();
@@ -797,9 +843,13 @@ impl Net {
797843
///
798844
/// `self.rx_buffer` needs to have at least one descriptor chain parsed
799845
pub unsafe fn read_tap(&mut self) -> std::io::Result<usize> {
800-
let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize;
801-
self.tap
802-
.read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs])
846+
let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
847+
self.rx_buffer.all_chains_slice_mut()
848+
} else {
849+
// SAFETY: we only call this if `rx_buffer` is not empty.
850+
unsafe { self.rx_buffer.single_chain_slice_mut() }
851+
};
852+
self.tap.read_iovec(slice)
803853
}
804854

805855
fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
@@ -1846,11 +1896,8 @@ pub mod tests {
18461896
unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
18471897

18481898
// The RX queue is empty and there is a deferred frame.
1849-
th.net().rx_buffer.used_descriptor = Some(ParsedDescriptorChain {
1850-
head_index: 1,
1851-
length: 100,
1852-
nr_iovecs: 1,
1853-
});
1899+
th.net().rx_buffer.used_descriptors = 1;
1900+
th.net().rx_buffer.used_bytes = 100;
18541901
check_metric_after_block!(
18551902
th.net().metrics.no_rx_avail_buffer,
18561903
1,
@@ -1860,7 +1907,8 @@ pub mod tests {
18601907
// We need to set this here to false, otherwise the device will try to
18611908
// handle a deferred frame, it will fail and will never try to read from
18621909
// the tap.
1863-
th.net().rx_buffer.used_descriptor = None;
1910+
th.net().rx_buffer.used_descriptors = 0;
1911+
th.net().rx_buffer.used_bytes = 0;
18641912

18651913
th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]);
18661914
check_metric_after_block!(

src/vmm/src/devices/virtio/net/persist.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize};
1212
use super::device::{Net, RxBuffers};
1313
use super::{TapError, NET_NUM_QUEUES, RX_INDEX};
1414
use crate::devices::virtio::device::DeviceState;
15-
use crate::devices::virtio::iovec::ParsedDescriptorChain;
1615
use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState};
1716
use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;
1817
use crate::devices::virtio::TYPE_NET;
@@ -37,14 +36,15 @@ pub struct NetConfigSpaceState {
3736
pub struct RxBufferState {
3837
// Number of iovecs we have parsed from the guest
3938
parsed_descriptor_chains_nr: u16,
40-
used_descriptor: Option<ParsedDescriptorChain>,
39+
// Number of deferred_descriptors
40+
deferred_descriptors: u16,
4141
}
4242

4343
impl RxBufferState {
4444
fn from_rx_buffers(rx_buffer: &RxBuffers) -> Self {
4545
RxBufferState {
4646
parsed_descriptor_chains_nr: rx_buffer.parsed_descriptors.len().try_into().unwrap(),
47-
used_descriptor: rx_buffer.used_descriptor.clone(),
47+
deferred_descriptors: rx_buffer.used_descriptors,
4848
}
4949
}
5050
}
@@ -162,9 +162,7 @@ impl Persist<'_> for Net {
162162
// rolling back `next_avail` in the RX queue and call `parse_rx_descriptors`.
163163
net.queues[RX_INDEX].next_avail -= state.rx_buffers_state.parsed_descriptor_chains_nr;
164164
net.parse_rx_descriptors();
165-
net.rx_buffer
166-
.used_descriptor
167-
.clone_from(&state.rx_buffers_state.used_descriptor);
165+
net.rx_buffer.used_descriptors = state.rx_buffers_state.deferred_descriptors;
168166
}
169167

170168
Ok(net)

0 commit comments

Comments
 (0)