Skip to content

Commit 7f2ed1c

Browse files
committed
feat(net): add MRG_RXBUF support to virtio-net device
Now virtio-net device VIRTIO_NET_F_MRG_RXBUF feature which allows it to write single packet into multiple descriptor chains. The amount of descriptor chains (also known as heads) is written into the `virtio_net_hdr_v1` structure which is located at the very begging of the packet. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 74fbe4e commit 7f2ed1c

File tree

3 files changed

+118
-68
lines changed

3 files changed

+118
-68
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,15 @@ impl IoVecBufferMut {
367367
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
368368
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
369369
/// the limit that the length of a buffer is limited by `u32`.
370-
pub(crate) fn len(&self) -> usize {
370+
pub fn len(&self) -> usize {
371371
self.len
372372
}
373373

374+
/// Returns true if there is buffer is empty.
375+
pub fn is_empty(&self) -> bool {
376+
self.len == 0
377+
}
378+
374379
/// Returns a pointer to the memory keeping the `iovec` structs
375380
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
376381
self.vecs.as_mut_slice()

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
// found in the THIRD-PARTY file.
77

88
use std::collections::VecDeque;
9-
use std::mem;
9+
use std::mem::{self, offset_of};
1010
use std::net::Ipv4Addr;
11+
use std::num::Wrapping;
1112
use std::sync::{Arc, Mutex};
1213

13-
use libc::EAGAIN;
14+
use libc::{iovec, EAGAIN};
1415
use log::error;
1516
use vmm_sys_util::eventfd::EventFd;
1617

@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920
use crate::devices::virtio::gen::virtio_net::{
2021
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2122
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
22-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
23+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2324
};
2425
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2526
use crate::devices::virtio::iovec::{
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109
// A map of which part of the memory belongs to which `DescriptorChain` object
109110
pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
110111
// Buffers that we have used and they are ready to be given back to the guest.
111-
pub used_descriptor: Option<ParsedDescriptorChain>,
112+
pub used_descriptors: u16,
113+
pub used_bytes: u32,
112114
}
113115

114116
impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120
min_buffer_size: 0,
119121
iovec: IoVecBufferMut::new()?,
120122
parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()),
121-
used_descriptor: None,
123+
used_descriptors: 0,
124+
used_bytes: 0,
122125
})
123126
}
124127

@@ -141,37 +144,61 @@ impl RxBuffers {
141144
Ok(())
142145
}
143146

144-
/// Returns the number of available `iovec` objects.
147+
/// Returns the total size of available space in the buffer.
145148
#[inline(always)]
146-
fn len(&self) -> usize {
149+
fn capacity(&self) -> usize {
147150
self.iovec.len()
148151
}
149152

150-
/// Returns `true` if there aren't any available `iovec` objects.
151-
#[inline(always)]
152-
fn is_empty(&self) -> bool {
153-
self.len() == 0
154-
}
155-
156153
/// Mark the first `size` bytes of available memory as used.
157154
///
158155
/// # Safety:
159156
///
160157
/// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161158
/// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162159
/// in the `RxBuffers`.
163-
unsafe fn mark_used(&mut self, size: u32) {
160+
unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) {
164161
// Since we were able to write a frame in guest memory, we should have at least one
165162
// descriptor chain here. If not, we have a bug, so fail fast, since the device is
166163
// fundamentally broken.
167-
let mut parsed_dc = self.parsed_descriptors.pop_front().expect(
168-
"net: internal bug. Mismatch between written frame size and available descriptors",
169-
);
164+
debug_assert!(!self.iovec.is_empty());
165+
166+
self.used_bytes = bytes_written;
170167

171-
self.header_set_num_buffers(1);
172-
self.iovec.drop_descriptor_chain(&parsed_dc);
173-
parsed_dc.length = size;
174-
self.used_descriptor = Some(parsed_dc);
168+
let mut used_heads: u16 = 0;
169+
let mut write_used = |head_index: u16, bytes_written: u32, rx_queue: &mut Queue| {
170+
if let Err(err) = rx_queue.write_used_element(
171+
(rx_queue.next_used + Wrapping(self.used_descriptors)).0,
172+
head_index,
173+
bytes_written,
174+
) {
175+
error!(
176+
"net: Failed to add used descriptor {} of length {} to RX queue: {err}",
177+
head_index, bytes_written
178+
);
179+
}
180+
used_heads += 1;
181+
self.used_descriptors += 1;
182+
};
183+
184+
for parsed_dc in self.parsed_descriptors.iter() {
185+
if bytes_written <= parsed_dc.length {
186+
write_used(parsed_dc.head_index, bytes_written, rx_queue);
187+
break;
188+
} else {
189+
write_used(parsed_dc.head_index, parsed_dc.length, rx_queue);
190+
bytes_written -= parsed_dc.length;
191+
}
192+
}
193+
194+
self.header_set_num_buffers(used_heads);
195+
for _ in 0..used_heads {
196+
let parsed_dc = self
197+
.parsed_descriptors
198+
.pop_front()
199+
.expect("This should never happen if write to the buffer succeded.");
200+
self.iovec.drop_descriptor_chain(&parsed_dc);
201+
}
175202
}
176203

177204
/// Write the number of descriptors used in VirtIO header
@@ -191,25 +218,28 @@ impl RxBuffers {
191218
/// This will let the guest know that about all the `DescriptorChain` object that has been
192219
/// used to receive a frame from the TAP.
193220
fn finish_frame(&mut self, rx_queue: &mut Queue) {
194-
if let Some(used_dc) = self.used_descriptor.take() {
195-
// It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
196-
// `head_index` is not a valid descriptor id. `head_index` here is a valid
197-
// `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
198-
// checks for its validity. In other words, if this unwrap() fails there's a bug in our
199-
// emulation logic which, most likely, we can't recover from. So, let's crash here
200-
// instead of logging an error and continuing.
201-
rx_queue
202-
.add_used(used_dc.head_index, used_dc.length)
203-
.unwrap();
204-
}
221+
rx_queue.advance_used_ring(self.used_descriptors);
222+
self.used_descriptors = 0;
223+
self.used_bytes = 0;
205224
}
206225

207226
/// Returns the number of bytes that have been used from the buffer
208227
fn used_bytes(&self) -> u32 {
209-
match self.used_descriptor {
210-
Some(ref dc) => dc.length,
211-
None => 0,
212-
}
228+
self.used_bytes
229+
}
230+
231+
/// Return a slice of iovecs for the first slice in the buffer.
232+
///
233+
/// # Safety
234+
/// Buffer needs to have at least one descriptor chain parsed.
235+
unsafe fn single_chain_slice_mut(&mut self) -> &mut [iovec] {
236+
let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize;
237+
&mut self.iovec.as_iovec_mut_slice()[..nr_iovecs]
238+
}
239+
240+
/// Return a slice of iovecs for all descriptor chains in the buffer.
241+
fn all_chains_slice_mut(&mut self) -> &mut [iovec] {
242+
self.iovec.as_iovec_mut_slice()
213243
}
214244
}
215245

@@ -272,6 +302,7 @@ impl Net {
272302
| 1 << VIRTIO_NET_F_HOST_TSO6
273303
| 1 << VIRTIO_NET_F_HOST_UFO
274304
| 1 << VIRTIO_F_VERSION_1
305+
| 1 << VIRTIO_NET_F_MRG_RXBUF
275306
| 1 << VIRTIO_RING_F_EVENT_IDX;
276307

277308
let mut config_space = ConfigSpace::default();
@@ -433,13 +464,21 @@ impl Net {
433464
/// Returns the minimum size of buffer we expect the guest to provide us depending on the
434465
/// features we have negotiated with it
435466
fn minimum_rx_buffer_size(&self) -> u32 {
436-
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
437-
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
438-
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
439-
{
440-
65562
467+
if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
468+
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
469+
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
470+
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
471+
{
472+
65562
473+
} else {
474+
1526
475+
}
441476
} else {
442-
1526
477+
// header is 12 bytes long
478+
#[allow(clippy::cast_possible_truncation)]
479+
{
480+
vnet_hdr_len() as u32
481+
}
443482
}
444483
}
445484

@@ -454,6 +493,9 @@ impl Net {
454493
if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } {
455494
self.metrics.rx_fails.inc();
456495
error!("net: Could not parse an RX descriptor: {err}");
496+
// Notify queue about ready frames. We need this
497+
// to bring queue into up to date state.
498+
self.rx_buffer.finish_frame(queue);
457499
// Try to add the bad descriptor to the used ring.
458500
if let Err(err) = queue.add_used(index, 0) {
459501
error!(
@@ -541,15 +583,14 @@ impl Net {
541583

542584
// We currently prioritize packets from the MMDS over regular network packets.
543585
fn read_from_mmds_or_tap(&mut self) -> Result<usize, NetError> {
544-
// If we don't have any buffers available try to parse more from the RX queue. There might
545-
// be some buffers we didn't get the chance to process, because we got to handle the TAP
546-
// event before the RX queue event.
547-
if self.rx_buffer.is_empty() {
586+
// We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
587+
// this is the max size of 1 packet.
588+
if self.rx_buffer.capacity() < u16::MAX as usize {
548589
self.parse_rx_descriptors();
549590

550-
// If after parsing the RX queue we still don't have any buffers stop processing RX
591+
// If after parsing the RX queue we still don't have enough capacity, stop processing RX
551592
// frames.
552-
if self.rx_buffer.is_empty() {
593+
if self.rx_buffer.capacity() < u16::MAX as usize {
553594
return Ok(0);
554595
}
555596
}
@@ -570,8 +611,10 @@ impl Net {
570611
// * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
571612
// are at least that big.
572613
unsafe {
573-
self.rx_buffer
574-
.mark_used((vnet_hdr_len() + len).try_into().unwrap());
614+
self.rx_buffer.mark_used(
615+
(vnet_hdr_len() + len).try_into().unwrap(),
616+
&mut self.queues[RX_INDEX],
617+
);
575618
}
576619
return Ok(vnet_hdr_len() + len);
577620
}
@@ -586,7 +629,8 @@ impl Net {
586629
// * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
587630
// bytes than its capacity.
588631
unsafe {
589-
self.rx_buffer.mark_used(len.try_into().unwrap());
632+
self.rx_buffer
633+
.mark_used(len.try_into().unwrap(), &mut self.queues[RX_INDEX]);
590634
}
591635
Ok(len)
592636
}
@@ -630,13 +674,15 @@ impl Net {
630674
}
631675

632676
fn has_deferred_frame(&self) -> bool {
633-
self.rx_buffer.used_descriptor.is_some()
677+
self.rx_buffer.used_descriptors != 0
634678
}
635679

636680
// Process the deferred frame first, then continue reading from tap.
637681
fn handle_deferred_frame(&mut self) -> Result<(), DeviceError> {
638682
let used_bytes = self.rx_buffer.used_bytes();
639683
if self.rate_limited_rx_single_frame(used_bytes as usize) {
684+
// Finish with rate limitted packet.
685+
self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]);
640686
// process_rx() was interrupted possibly before consuming all
641687
// packets in the tap; try continuing now.
642688
return self.process_rx();
@@ -797,9 +843,12 @@ impl Net {
797843
///
798844
/// `self.rx_buffer` needs to have at least one descriptor chain parsed
799845
pub unsafe fn read_tap(&mut self) -> std::io::Result<usize> {
800-
let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize;
801-
self.tap
802-
.read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs])
846+
if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
847+
self.tap.read_iovec(self.rx_buffer.all_chains_slice_mut())
848+
} else {
849+
// SAFETY: we only call this if `rx_buffer` is not empty.
850+
unsafe { self.tap.read_iovec(self.rx_buffer.single_chain_slice_mut()) }
851+
}
803852
}
804853

805854
fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
@@ -1846,11 +1895,8 @@ pub mod tests {
18461895
unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
18471896

18481897
// The RX queue is empty and there is a deferred frame.
1849-
th.net().rx_buffer.used_descriptor = Some(ParsedDescriptorChain {
1850-
head_index: 1,
1851-
length: 100,
1852-
nr_iovecs: 1,
1853-
});
1898+
th.net().rx_buffer.used_descriptors = 1;
1899+
th.net().rx_buffer.used_bytes = 100;
18541900
check_metric_after_block!(
18551901
th.net().metrics.no_rx_avail_buffer,
18561902
1,
@@ -1860,7 +1906,8 @@ pub mod tests {
18601906
// We need to set this here to false, otherwise the device will try to
18611907
// handle a deferred frame, it will fail and will never try to read from
18621908
// the tap.
1863-
th.net().rx_buffer.used_descriptor = None;
1909+
th.net().rx_buffer.used_descriptors = 0;
1910+
th.net().rx_buffer.used_bytes = 0;
18641911

18651912
th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]);
18661913
check_metric_after_block!(

src/vmm/src/devices/virtio/net/persist.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize};
1212
use super::device::{Net, RxBuffers};
1313
use super::{TapError, NET_NUM_QUEUES, RX_INDEX};
1414
use crate::devices::virtio::device::DeviceState;
15-
use crate::devices::virtio::iovec::ParsedDescriptorChain;
1615
use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState};
1716
use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;
1817
use crate::devices::virtio::TYPE_NET;
@@ -37,14 +36,15 @@ pub struct NetConfigSpaceState {
3736
pub struct RxBufferState {
3837
// Number of iovecs we have parsed from the guest
3938
parsed_descriptor_chains_nr: u16,
40-
used_descriptor: Option<ParsedDescriptorChain>,
39+
// Number of deferred_descriptors
40+
deferred_descriptors: u16,
4141
}
4242

4343
impl RxBufferState {
4444
fn from_rx_buffers(rx_buffer: &RxBuffers) -> Self {
4545
RxBufferState {
4646
parsed_descriptor_chains_nr: rx_buffer.parsed_descriptors.len().try_into().unwrap(),
47-
used_descriptor: rx_buffer.used_descriptor.clone(),
47+
deferred_descriptors: rx_buffer.used_descriptors,
4848
}
4949
}
5050
}
@@ -162,9 +162,7 @@ impl Persist<'_> for Net {
162162
// rolling back `next_avail` in the RX queue and call `parse_rx_descriptors`.
163163
net.queues[RX_INDEX].next_avail -= state.rx_buffers_state.parsed_descriptor_chains_nr;
164164
net.parse_rx_descriptors();
165-
net.rx_buffer
166-
.used_descriptor
167-
.clone_from(&state.rx_buffers_state.used_descriptor);
165+
net.rx_buffer.used_descriptors = state.rx_buffers_state.deferred_descriptors;
168166
}
169167

170168
Ok(net)

0 commit comments

Comments
 (0)