Skip to content

Commit 7a64d6f

Browse files
committed
feat(net): add MRG_RXBUF support to virtio-net device
Now virtio-net device VIRTIO_NET_F_MRG_RXBUF feature which allows it to write single packet into multiple descriptor chains. The amount of descriptor chains (also known as heads) is written into the `virtio_net_hdr_v1` structure which is located at the very begging of the packet. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 1525139 commit 7a64d6f

File tree

3 files changed

+146
-72
lines changed

3 files changed

+146
-72
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,15 @@ impl IoVecBufferMut {
367367
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
368368
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
369369
/// the limit that the length of a buffer is limited by `u32`.
370-
pub(crate) fn len(&self) -> usize {
370+
pub fn len(&self) -> usize {
371371
self.len
372372
}
373373

374+
/// Returns true if there is buffer is empty.
375+
pub fn is_empty(&self) -> bool {
376+
self.len == 0
377+
}
378+
374379
/// Returns a pointer to the memory keeping the `iovec` structs
375380
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
376381
self.vecs.as_mut_slice()

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 136 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
// found in the THIRD-PARTY file.
77

88
use std::collections::VecDeque;
9-
use std::mem;
9+
use std::mem::{self};
1010
use std::net::Ipv4Addr;
11+
use std::num::{NonZeroU32, Wrapping};
1112
use std::sync::{Arc, Mutex};
1213

13-
use libc::EAGAIN;
14+
use libc::{iovec, EAGAIN};
1415
use log::error;
1516
use vmm_sys_util::eventfd::EventFd;
1617

@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920
use crate::devices::virtio::gen::virtio_net::{
2021
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2122
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
22-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
23+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2324
};
2425
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2526
use crate::devices::virtio::iovec::{
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109
// A map of which part of the memory belongs to which `DescriptorChain` object
109110
pub parsed_descriptors: VecDeque<ParsedDescriptorChain>,
110111
// Buffers that we have used and they are ready to be given back to the guest.
111-
pub deferred_descriptor: Option<ParsedDescriptorChain>,
112+
pub used_descriptors: u16,
113+
pub used_bytes: u32,
112114
}
113115

114116
impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120
min_buffer_size: 0,
119121
iovec: IoVecBufferMut::new()?,
120122
parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()),
121-
deferred_descriptor: None,
123+
used_descriptors: 0,
124+
used_bytes: 0,
122125
})
123126
}
124127

@@ -141,16 +144,16 @@ impl RxBuffers {
141144
Ok(())
142145
}
143146

144-
/// Returns the number of available `iovec` objects.
147+
/// Returns the total size of available space in the buffer.
145148
#[inline(always)]
146-
fn len(&self) -> usize {
149+
fn capacity(&self) -> usize {
147150
self.iovec.len()
148151
}
149152

150153
/// Returns `true` if there aren't any available `iovec` objects.
151154
#[inline(always)]
152155
fn is_empty(&self) -> bool {
153-
self.len() == 0
156+
self.capacity() == 0
154157
}
155158

156159
/// Mark the first `size` bytes of available memory as used.
@@ -160,18 +163,48 @@ impl RxBuffers {
160163
/// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161164
/// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162165
/// in the `RxBuffers`.
163-
unsafe fn mark_used(&mut self, size: u32) -> ParsedDescriptorChain {
164-
// Since we were able to write a frame in guest memory, we should have at least one
165-
// descriptor chain here. If not, we have a bug, so fail fast, since the device is
166+
unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) {
167+
// Since we were able to write a frame in guest memory, we should have some capacity for it.
168+
// If not, we have a bug, so fail fast, since the device is
166169
// fundamentally broken.
167-
let mut parsed_dc = self.parsed_descriptors.pop_front().expect(
168-
"net: internal bug. Mismatch between written frame size and available descriptors",
169-
);
170+
debug_assert!(!self.is_empty());
171+
172+
self.used_bytes = bytes_written;
170173

171-
self.header_set_num_buffers(1);
172-
self.iovec.drop_descriptor_chain(&parsed_dc);
173-
parsed_dc.length = size;
174-
parsed_dc
174+
let mut used_heads: u16 = 0;
175+
let mut write_used = |head_index: u16, bytes_written: u32, rx_queue: &mut Queue| {
176+
if let Err(err) = rx_queue.write_used_element(
177+
(rx_queue.next_used + Wrapping(self.used_descriptors)).0,
178+
head_index,
179+
bytes_written,
180+
) {
181+
error!(
182+
"net: Failed to add used descriptor {} of length {} to RX queue: {err}",
183+
head_index, bytes_written
184+
);
185+
}
186+
used_heads += 1;
187+
self.used_descriptors += 1;
188+
};
189+
190+
for parsed_dc in self.parsed_descriptors.iter() {
191+
if bytes_written <= parsed_dc.length {
192+
write_used(parsed_dc.head_index, bytes_written, rx_queue);
193+
break;
194+
} else {
195+
write_used(parsed_dc.head_index, parsed_dc.length, rx_queue);
196+
bytes_written -= parsed_dc.length;
197+
}
198+
}
199+
200+
self.header_set_num_buffers(used_heads);
201+
for _ in 0..used_heads {
202+
let parsed_dc = self
203+
.parsed_descriptors
204+
.pop_front()
205+
.expect("This should never happen if write to the buffer succeded.");
206+
self.iovec.drop_descriptor_chain(&parsed_dc);
207+
}
175208
}
176209

177210
/// Write the number of descriptors used in VirtIO header
@@ -190,14 +223,24 @@ impl RxBuffers {
190223

191224
/// This will let the guest know that about all the `DescriptorChain` object that has been
192225
/// used to receive a frame from the TAP.
193-
fn finish_frame(&mut self, dc: &ParsedDescriptorChain, rx_queue: &mut Queue) {
194-
// It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
195-
// `head_index` is not a valid descriptor id. `head_index` here is a valid
196-
// `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
197-
// checks for its validity. In other words, if this unwrap() fails there's a bug in our
198-
// emulation logic which, most likely, we can't recover from. So, let's crash here
199-
// instead of logging an error and continuing.
200-
rx_queue.add_used(dc.head_index, dc.length).unwrap();
226+
fn finish_frame(&mut self, rx_queue: &mut Queue) {
227+
rx_queue.advance_used_ring(self.used_descriptors);
228+
self.used_descriptors = 0;
229+
self.used_bytes = 0;
230+
}
231+
232+
/// Return a slice of iovecs for the first slice in the buffer.
233+
///
234+
/// # Safety
235+
/// Buffer needs to have at least one descriptor chain parsed.
236+
unsafe fn single_chain_slice_mut(&mut self) -> &mut [iovec] {
237+
let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize;
238+
&mut self.iovec.as_iovec_mut_slice()[..nr_iovecs]
239+
}
240+
241+
/// Return a slice of iovecs for all descriptor chains in the buffer.
242+
fn all_chains_slice_mut(&mut self) -> &mut [iovec] {
243+
self.iovec.as_iovec_mut_slice()
201244
}
202245
}
203246

@@ -260,6 +303,7 @@ impl Net {
260303
| 1 << VIRTIO_NET_F_HOST_TSO6
261304
| 1 << VIRTIO_NET_F_HOST_UFO
262305
| 1 << VIRTIO_F_VERSION_1
306+
| 1 << VIRTIO_NET_F_MRG_RXBUF
263307
| 1 << VIRTIO_RING_F_EVENT_IDX;
264308

265309
let mut config_space = ConfigSpace::default();
@@ -407,27 +451,35 @@ impl Net {
407451
// Attempts to copy a single frame into the guest if there is enough
408452
// rate limiting budget.
409453
// Returns true on successful frame delivery.
410-
fn rate_limited_rx_single_frame(&mut self, dc: &ParsedDescriptorChain) -> bool {
454+
fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool {
411455
let rx_queue = &mut self.queues[RX_INDEX];
412-
if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, dc.length as u64) {
456+
if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) {
413457
self.metrics.rx_rate_limiter_throttled.inc();
414458
return false;
415459
}
416460

417-
self.rx_buffer.finish_frame(dc, rx_queue);
461+
self.rx_buffer.finish_frame(rx_queue);
418462
true
419463
}
420464

421465
/// Returns the minimum size of buffer we expect the guest to provide us depending on the
422466
/// features we have negotiated with it
423467
fn minimum_rx_buffer_size(&self) -> u32 {
424-
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
425-
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
426-
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
427-
{
428-
65562
468+
if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
469+
if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64)
470+
|| self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64)
471+
|| self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64)
472+
{
473+
65562
474+
} else {
475+
1526
476+
}
429477
} else {
430-
1526
478+
// header is 12 bytes long
479+
#[allow(clippy::cast_possible_truncation)]
480+
{
481+
vnet_hdr_len() as u32
482+
}
431483
}
432484
}
433485

@@ -442,6 +494,9 @@ impl Net {
442494
if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } {
443495
self.metrics.rx_fails.inc();
444496
error!("net: Could not parse an RX descriptor: {err}");
497+
// Notify queue about ready frames. We need this
498+
// to bring queue into up to date state.
499+
self.rx_buffer.finish_frame(queue);
445500
// Try to add the bad descriptor to the used ring.
446501
if let Err(err) = queue.add_used(index, 0) {
447502
error!(
@@ -528,16 +583,15 @@ impl Net {
528583
}
529584

530585
// We currently prioritize packets from the MMDS over regular network packets.
531-
fn read_from_mmds_or_tap(&mut self) -> Result<Option<ParsedDescriptorChain>, NetError> {
532-
// If we don't have any buffers available try to parse more from the RX queue. There might
533-
// be some buffers we didn't get the chance to process, because we got to handle the TAP
534-
// event before the RX queue event.
535-
if self.rx_buffer.is_empty() {
586+
fn read_from_mmds_or_tap(&mut self) -> Result<Option<NonZeroU32>, NetError> {
587+
// We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
588+
// this is the max size of 1 packet.
589+
if self.rx_buffer.capacity() < u16::MAX as usize {
536590
self.parse_rx_descriptors();
537591

538-
// If after parsing the RX queue we still don't have any buffers stop processing RX
592+
// If after parsing the RX queue we still don't have enough capacity, stop processing RX
539593
// frames.
540-
if self.rx_buffer.is_empty() {
594+
if self.rx_buffer.capacity() < u16::MAX as usize {
541595
return Ok(None);
542596
}
543597
}
@@ -553,29 +607,46 @@ impl Net {
553607
self.rx_buffer
554608
.iovec
555609
.write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?;
610+
611+
// SAFETY:
612+
// * len is never 0
613+
// * Maximum size of the packet is 65562 which fits into u32
614+
#[allow(clippy::cast_possible_truncation)]
615+
let len = unsafe { NonZeroU32::new_unchecked((vnet_hdr_len() + len) as u32) };
616+
556617
// SAFETY: This is safe:
557618
// * We checked that `rx_buffer` includes at least one `DescriptorChain`
558619
// * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
559620
// are at least that big.
560-
let dc = unsafe {
621+
unsafe {
561622
self.rx_buffer
562-
.mark_used((vnet_hdr_len() + len).try_into().unwrap())
563-
};
564-
565-
return Ok(Some(dc));
623+
.mark_used(len.get(), &mut self.queues[RX_INDEX]);
624+
}
625+
return Ok(Some(len));
566626
}
567627
}
568628

569629
// SAFETY: this is safe because we ensured that `self.rx_buffer` has at least one
570630
// DescriptorChain parsed in it.
571631
let len = unsafe { self.read_tap().map_err(NetError::IO) }?;
572632

633+
// SAFETY:
634+
// * len is never 0
635+
// * Maximum size of the packet is 65562 which fits into u32
636+
#[allow(clippy::cast_possible_truncation)]
637+
let len = unsafe { NonZeroU32::new_unchecked((vnet_hdr_len() + len) as u32) };
638+
573639
// SAFETY: This is safe,
574640
// * `rx_buffer` has at least one `DescriptorChain`
575641
// * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
576642
// bytes than its capacity.
577-
let dc = unsafe { self.rx_buffer.mark_used(len.try_into().unwrap()) };
578-
Ok(Some(dc))
643+
// * Maximum size of the packet is 65562 which fits into u32
644+
#[allow(clippy::cast_possible_truncation)]
645+
unsafe {
646+
self.rx_buffer
647+
.mark_used(len.get(), &mut self.queues[RX_INDEX]);
648+
}
649+
Ok(Some(len))
579650
}
580651

581652
/// Read as many frames as possible.
@@ -586,12 +657,11 @@ impl Net {
586657
self.metrics.no_rx_avail_buffer.inc();
587658
break;
588659
}
589-
Ok(Some(dc)) => {
660+
Ok(Some(bytes)) => {
590661
self.metrics.rx_count.inc();
591-
self.metrics.rx_bytes_count.add(dc.length as u64);
662+
self.metrics.rx_bytes_count.add(bytes.get() as u64);
592663
self.metrics.rx_packets_count.inc();
593-
if !self.rate_limited_rx_single_frame(&dc) {
594-
self.rx_buffer.deferred_descriptor = Some(dc);
664+
if !self.rate_limited_rx_single_frame(bytes.get()) {
595665
break;
596666
}
597667
}
@@ -619,11 +689,10 @@ impl Net {
619689

620690
fn resume_rx(&mut self) -> Result<(), DeviceError> {
621691
// First try to handle any deferred frame
622-
if let Some(deferred_descriptor) = self.rx_buffer.deferred_descriptor.take() {
692+
if self.rx_buffer.used_bytes != 0 {
623693
// If can't finish sending this frame, re-set it as deferred and return; we can't
624694
// process any more frames from the TAP.
625-
if !self.rate_limited_rx_single_frame(&deferred_descriptor) {
626-
self.rx_buffer.deferred_descriptor = Some(deferred_descriptor);
695+
if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) {
627696
return Ok(());
628697
}
629698
}
@@ -774,9 +843,13 @@ impl Net {
774843
///
775844
/// `self.rx_buffer` needs to have at least one descriptor chain parsed
776845
pub unsafe fn read_tap(&mut self) -> std::io::Result<usize> {
777-
let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize;
778-
self.tap
779-
.read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs])
846+
let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) {
847+
self.rx_buffer.all_chains_slice_mut()
848+
} else {
849+
// SAFETY: we only call this if `rx_buffer` is not empty.
850+
unsafe { self.rx_buffer.single_chain_slice_mut() }
851+
};
852+
self.tap.read_iovec(slice)
780853
}
781854

782855
fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
@@ -1815,11 +1888,8 @@ pub mod tests {
18151888
unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) };
18161889

18171890
// The RX queue is empty and there is a deferred frame.
1818-
th.net().rx_buffer.deferred_descriptor = Some(ParsedDescriptorChain {
1819-
head_index: 1,
1820-
length: 100,
1821-
nr_iovecs: 1,
1822-
});
1891+
th.net().rx_buffer.used_descriptors = 1;
1892+
th.net().rx_buffer.used_bytes = 100;
18231893
check_metric_after_block!(
18241894
th.net().metrics.no_rx_avail_buffer,
18251895
1,
@@ -1829,7 +1899,8 @@ pub mod tests {
18291899
// We need to set this here to false, otherwise the device will try to
18301900
// handle a deferred frame, it will fail and will never try to read from
18311901
// the tap.
1832-
th.net().rx_buffer.deferred_descriptor = None;
1902+
th.net().rx_buffer.used_descriptors = 0;
1903+
th.net().rx_buffer.used_bytes = 0;
18331904

18341905
th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]);
18351906
check_metric_after_block!(

0 commit comments

Comments
 (0)