Skip to content

Commit 9ecef97

Browse files
committed
feat(net): add MRG_RXBUF support to virtio-net device
Now virtio-net device VIRTIO_NET_F_MRG_RXBUF feature which allows it to write single packet into multiple descriptor chains. The amount of descriptor chains (also known as heads) is written into the `virtio_net_hdr_v1` structure which is located at the very begging of the packet. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 1525139 commit 9ecef97

File tree

3 files changed

+140
-79
lines changed

3 files changed

+140
-79
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,15 @@ impl IoVecBufferMut {
367367
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
368368
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
369369
/// the limit that the length of a buffer is limited by `u32`.
370-
pub(crate) fn len(&self) -> usize {
370+
pub fn len(&self) -> usize {
371371
self.len
372372
}
373373

374+
/// Returns true if there is buffer is empty.
375+
pub fn is_empty(&self) -> bool {
376+
self.len == 0
377+
}
378+
374379
/// Returns a pointer to the memory keeping the `iovec` structs
375380
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
376381
self.vecs.as_mut_slice()

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 130 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
// found in the THIRD-PARTY file.
77

88
use std::collections::VecDeque;
9-
use std::mem;
9+
use std::mem::{self};
1010
use std::net::Ipv4Addr;
11+
use std::num::{NonZeroU32, Wrapping};
1112
use std::sync::{Arc, Mutex};
1213

13-
use libc::EAGAIN;
14+
use libc::{iovec, EAGAIN};
1415
use log::error;
1516
use vmm_sys_util::eventfd::EventFd;
1617

@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920
use crate::devices::virtio::gen::virtio_net::{
2021
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2122
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
22-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
23+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2324
};
2425
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2526
use crate::devices::virtio::iovec::{
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109
// A map of which part of the memory belongs to which `DescriptorChain` object
109110
pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
110111
// Buffers that we have used and they are ready to be given back to the guest.
111-
pub deferred_descriptor: Option<ParsedDescriptorChain>,
112+
pub used_descriptors: u16,
113+
pub used_bytes: u32,
112114
}
113115

114116
impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120
min_buffer_size: 0,
119121
iovec: IoVecBufferMut::new()?,
120122
parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()),
121-
deferred_descriptor: None,
123+
used_descriptors: 0,
124+
used_bytes: 0,
122125
})
123126
}
124127

@@ -141,37 +144,56 @@ impl RxBuffers {
141144
Ok(())
142145
}
143146

144-
/// Returns the number of available `iovec` objects.
147+
/// Returns the total size of available space in the buffer.
145148
#[inline(always)]
146-
fn len(&self) -> usize {
149+
fn capacity(&self) -> usize {
147150
self.iovec.len()
148151
}
149152

150-
/// Returns `true` if there aren't any available `iovec` objects.
151-
#[inline(always)]
152-
fn is_empty(&self) -> bool {
153-
self.len() == 0
154-
}
155-
156153
/// Mark the first `size` bytes of available memory as used.
157154
///
158155
/// # Safety:
159156
///
160157
/// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161158
/// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162159
/// in the `RxBuffers`.
163-
unsafe fn mark_used(&mut self, size: u32) -> ParsedDescriptorChain {
164-
// Since we were able to write a frame in guest memory, we should have at least one
165-
// descriptor chain here. If not, we have a bug, so fail fast, since the device is
166-
// fundamentally broken.
167-
let mut parsed_dc = self.parsed_descriptors.pop_front().expect(
168-
"net: internal bug. Mismatch between written frame size and available descriptors",
169-
);
160+
unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) {
161+
self.used_bytes = bytes_written;
162+
163+
let mut used_heads: u16 = 0;
164+
let mut write_used = |head_index: u16, bytes_written: u32, rx_queue: &mut Queue| {
165+
if let Err(err) = rx_queue.write_used_element(
166+
(rx_queue.next_used + Wrapping(self.used_descriptors)).0,
167+
head_index,
168+
bytes_written,
169+
) {
170+
error!(
171+
"net: Failed to add used descriptor {} of length {} to RX queue: {err}",
172+
head_index, bytes_written
173+
);
174+
}
175+
used_heads += 1;
176+
self.used_descriptors += 1;
177+
};
170178

171-
self.header_set_num_buffers(1);
172-
self.iovec.drop_descriptor_chain(&parsed_dc);
173-
parsed_dc.length = size;
174-
parsed_dc
179+
for parsed_dc in self.parsed_descriptors.iter() {
180+
if bytes_written <= parsed_dc.length {
181+
write_used(parsed_dc.head_index, bytes_written, rx_queue);
182+
break;
183+
} else {
184+
write_used(parsed_dc.head_index, parsed_dc.length, rx_queue);
185+
bytes_written -= parsed_dc.length;
186+
}
187+
}
188+
189+
self.header_set_num_buffers(used_heads);
190+
for _ in 0..used_heads {
191+
let parsed_dc = self
192+
.parsed_descriptors
193+
.pop_front()
194+
.expect("This should never happen if write to the buffer succeded.");
195+
self.iovec.drop_descriptor_chain(&parsed_dc);
196+
}
175197
}
176198

177199
/// Write the number of descriptors used in VirtIO header
@@ -190,14 +212,24 @@ impl RxBuffers {
190212

191213
/// This will let the guest know that about all the `DescriptorChain` object that has been
192214
/// used to receive a frame from the TAP.
193-
fn finish_frame(&mut self, dc: &ParsedDescriptorChain, rx_queue: &mut Queue) {
194-
// It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
195-
// `head_index` is not a valid descriptor id. `head_index` here is a valid
196-
// `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
197-
// checks for its validity. In other words, if this unwrap() fails there's a bug in our
198-
// emulation logic which, most likely, we can't recover from. So, let's crash here
199-
// instead of logging an error and continuing.
200-
rx_queue.add_used(dc.head_index, dc.length).unwrap();
215+
fn finish_frame(&mut self, rx_queue: &mut Queue) {
216+
rx_queue.advance_used_ring(self.used_descriptors);
217+
self.used_descriptors = 0;
218+
self.used_bytes = 0;
219+
}
220+
221+
/// Return a slice of iovecs for the first slice in the buffer.
222+
///
223+
/// # Safety
224+
/// Buffer needs to have at least one descriptor chain parsed.
225+
unsafe fn single_chain_slice_mut(&mut self) -> &mut [iovec] {
226+
let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize;
227+
&mut self.iovec.as_iovec_mut_slice()[..nr_iovecs]
228+
}
229+
230+
/// Return a slice of iovecs for all descriptor chains in the buffer.
231+
fn all_chains_slice_mut(&mut self) -> &mut [iovec] {
232+
self.iovec.as_iovec_mut_slice()
201233
}
202234
}
203235

@@ -260,6 +292,7 @@ impl Net {
260292
| 1 << VIRTIO_NET_F_HOST_TSO6
261293
| 1 << VIRTIO_NET_F_HOST_UFO
262294
| 1 << VIRTIO_F_VERSION_1
295+
| 1 << VIRTIO_NET_F_MRG_RXBUF
263296
| 1 << VIRTIO_RING_F_EVENT_IDX;
264297

265298
let mut config_space = ConfigSpace::default();
@@ -407,27 +440,35 @@ impl Net {
407440
// Attempts to copy a single frame into the guest if there is enough
408441
// rate limiting budget.
409442
// Returns true on successful frame delivery.
410-
fn rate_limited_rx_single_frame(&mut self, dc: &ParsedDescriptorChain) -> bool {
443+
fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool {
411444
let rx_queue = &mut self.queues[RX_INDEX];
412-
if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, dc.length as u64) {
445+
if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) {
413446
self.metrics.rx_rate_limiter_throttled.inc();
414447
return false;
415448
}
416449

417-
self.rx_buffer.finish_frame(dc, rx_queue);
450+
self.rx_buffer.finish_frame(rx_queue);
418451
true
419452
}
420453

421454
/// Returns the minimum size of buffer we expect the guest to provide us depending on the
422455
/// features we have negotiated with it
423456
fn minimum_rx_buffer_size(&self) -> u32 {
424-
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
425-
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
426-
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
427-
{
428-
65562
457+
if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
458+
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
459+
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
460+
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
461+
{
462+
65562
463+
} else {
464+
1526
465+
}
429466
} else {
430-
1526
467+
// header is 12 bytes long
468+
#[allow(clippy::cast_possible_truncation)]
469+
{
470+
vnet_hdr_len() as u32
471+
}
431472
}
432473
}
433474

@@ -442,6 +483,9 @@ impl Net {
442483
if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } {
443484
self.metrics.rx_fails.inc();
444485
error!("net: Could not parse an RX descriptor: {err}");
486+
// Notify queue about ready frames. We need this
487+
// to bring queue into up to date state.
488+
self.rx_buffer.finish_frame(queue);
445489
// Try to add the bad descriptor to the used ring.
446490
if let Err(err) = queue.add_used(index, 0) {
447491
error!(
@@ -528,16 +572,15 @@ impl Net {
528572
}
529573

530574
// We currently prioritize packets from the MMDS over regular network packets.
531-
fn read_from_mmds_or_tap(&mut self) -> Result<Option<ParsedDescriptorChain>, NetError> {
532-
// If we don't have any buffers available try to parse more from the RX queue. There might
533-
// be some buffers we didn't get the chance to process, because we got to handle the TAP
534-
// event before the RX queue event.
535-
if self.rx_buffer.is_empty() {
575+
fn read_from_mmds_or_tap(&mut self) -> Result<Option<NonZeroU32>, NetError> {
576+
// We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
577+
// this is the max size of 1 packet.
578+
if self.rx_buffer.capacity() < u16::MAX as usize {
536579
self.parse_rx_descriptors();
537580

538-
// If after parsing the RX queue we still don't have any buffers stop processing RX
581+
// If after parsing the RX queue we still don't have enough capacity, stop processing RX
539582
// frames.
540-
if self.rx_buffer.is_empty() {
583+
if self.rx_buffer.capacity() < u16::MAX as usize {
541584
return Ok(None);
542585
}
543586
}
@@ -553,29 +596,44 @@ impl Net {
553596
self.rx_buffer
554597
.iovec
555598
.write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?;
599+
600+
// SAFETY:
601+
// * len is never 0
602+
// * Maximum size of the packet is 65562 which fits into u32
603+
#[allow(clippy::cast_possible_truncation)]
604+
let len = unsafe { NonZeroU32::new_unchecked((vnet_hdr_len() + len) as u32) };
605+
556606
// SAFETY: This is safe:
557607
// * We checked that `rx_buffer` includes at least one `DescriptorChain`
558608
// * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
559609
// are at least that big.
560-
let dc = unsafe {
610+
unsafe {
561611
self.rx_buffer
562-
.mark_used((vnet_hdr_len() + len).try_into().unwrap())
563-
};
564-
565-
return Ok(Some(dc));
612+
.mark_used(len.get(), &mut self.queues[RX_INDEX]);
613+
}
614+
return Ok(Some(len));
566615
}
567616
}
568617

569618
// SAFETY: this is safe because we ensured that `self.rx_buffer` has at least one
570619
// DescriptorChain parsed in it.
571620
let len = unsafe { self.read_tap().map_err(NetError::IO) }?;
572621

622+
// SAFETY:
623+
// * len is never 0
624+
// * Maximum size of the packet is 65562 which fits into u32
625+
#[allow(clippy::cast_possible_truncation)]
626+
let len = unsafe { NonZeroU32::new_unchecked(len as u32) };
627+
573628
// SAFETY: This is safe,
574629
// * `rx_buffer` has at least one `DescriptorChain`
575630
// * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
576631
// bytes than its capacity.
577-
let dc = unsafe { self.rx_buffer.mark_used(len.try_into().unwrap()) };
578-
Ok(Some(dc))
632+
unsafe {
633+
self.rx_buffer
634+
.mark_used(len.get(), &mut self.queues[RX_INDEX]);
635+
}
636+
Ok(Some(len))
579637
}
580638

581639
/// Read as many frames as possible.
@@ -586,12 +644,11 @@ impl Net {
586644
self.metrics.no_rx_avail_buffer.inc();
587645
break;
588646
}
589-
Ok(Some(dc)) => {
647+
Ok(Some(bytes)) => {
590648
self.metrics.rx_count.inc();
591-
self.metrics.rx_bytes_count.add(dc.length as u64);
649+
self.metrics.rx_bytes_count.add(bytes.get() as u64);
592650
self.metrics.rx_packets_count.inc();
593-
if !self.rate_limited_rx_single_frame(&dc) {
594-
self.rx_buffer.deferred_descriptor = Some(dc);
651+
if !self.rate_limited_rx_single_frame(bytes.get()) {
595652
break;
596653
}
597654
}
@@ -619,11 +676,10 @@ impl Net {
619676

620677
fn resume_rx(&mut self) -> Result<(), DeviceError> {
621678
// First try to handle any deferred frame
622-
if let Some(deferred_descriptor) = self.rx_buffer.deferred_descriptor.take() {
679+
if self.rx_buffer.used_bytes != 0 {
623680
// If can't finish sending this frame, re-set it as deferred and return; we can't
624681
// process any more frames from the TAP.
625-
if !self.rate_limited_rx_single_frame(&deferred_descriptor) {
626-
self.rx_buffer.deferred_descriptor = Some(deferred_descriptor);
682+
if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) {
627683
return Ok(());
628684
}
629685
}
@@ -689,7 +745,7 @@ impl Net {
689745
&self.metrics,
690746
)
691747
.unwrap_or(false);
692-
if frame_consumed_by_mmds && self.rx_buffer.deferred_descriptor.is_none() {
748+
if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 {
693749
// MMDS consumed this frame/request, let's also try to process the response.
694750
process_rx_for_mmds = true;
695751
}
@@ -774,9 +830,13 @@ impl Net {
774830
///
775831
/// `self.rx_buffer` needs to have at least one descriptor chain parsed
776832
pub unsafe fn read_tap(&mut self) -> std::io::Result<usize> {
777-
let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize;
778-
self.tap
779-
.read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs])
833+
let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
834+
self.rx_buffer.all_chains_slice_mut()
835+
} else {
836+
// SAFETY: we only call this if `rx_buffer` is not empty.
837+
unsafe { self.rx_buffer.single_chain_slice_mut() }
838+
};
839+
self.tap.read_iovec(slice)
780840
}
781841

782842
fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
@@ -1815,11 +1875,8 @@ pub mod tests {
18151875
unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
18161876

18171877
// The RX queue is empty and there is a deferred frame.
1818-
th.net().rx_buffer.deferred_descriptor = Some(ParsedDescriptorChain {
1819-
head_index: 1,
1820-
length: 100,
1821-
nr_iovecs: 1,
1822-
});
1878+
th.net().rx_buffer.used_descriptors = 1;
1879+
th.net().rx_buffer.used_bytes = 100;
18231880
check_metric_after_block!(
18241881
th.net().metrics.no_rx_avail_buffer,
18251882
1,
@@ -1829,7 +1886,8 @@ pub mod tests {
18291886
// We need to set this here to false, otherwise the device will try to
18301887
// handle a deferred frame, it will fail and will never try to read from
18311888
// the tap.
1832-
th.net().rx_buffer.deferred_descriptor = None;
1889+
th.net().rx_buffer.used_descriptors = 0;
1890+
th.net().rx_buffer.used_bytes = 0;
18331891

18341892
th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]);
18351893
check_metric_after_block!(

0 commit comments

Comments
 (0)