Skip to content

Commit 6b80c11

Browse files
committed
feat: add support for VIRTIO_NET_F_MRG_RXBUF to virtio-net
Now virtio-net device can split incoming packets across multiple descriptor chains if VIRTIO_NET_F_MRG_RXBUF is enabled by the guest. The amount of descriptor chains (also known as heads) is written into the `virtio_net_hdr_v1` structure which is located at the very begging of the packet. Virtio spec states that the number of heads used should always be correct: - 1 - if VIRTIO_NET_F_MRG_RXBUF is not negotiated - N - if VIRTIO_NET_F_MRG_RXBUF is negotiated Prior to this commit Firecracker never set the number of used heads to 1, but Linux was fine with it. Now we always set correct number of heads. Because of this some changes were introduced into the unit test code that was generating testing frames. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 44fef9f commit 6b80c11

File tree

2 files changed

+221
-101
lines changed

2 files changed

+221
-101
lines changed

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 202 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,22 @@
99
use std::io::Read;
1010
use std::mem;
1111
use std::net::Ipv4Addr;
12+
use std::num::Wrapping;
1213
use std::sync::{Arc, Mutex};
1314

1415
use libc::EAGAIN;
15-
use log::{error, warn};
16+
use log::error;
1617
use utils::eventfd::EventFd;
1718
use utils::net::mac::MacAddr;
18-
use utils::u64_to_usize;
19-
use vm_memory::GuestMemoryError;
19+
use utils::{u64_to_usize, usize_to_u64};
20+
use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError};
2021

2122
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
2223
use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
2324
use crate::devices::virtio::gen::virtio_net::{
2425
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4,
2526
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4,
26-
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC,
27+
VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF,
2728
};
2829
use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
2930
use crate::devices::virtio::iovec::IoVecBuffer;
@@ -32,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
3233
use crate::devices::virtio::net::{
3334
gen, NetError, NetQueue, MAX_BUFFER_SIZE, NET_QUEUE_SIZES, RX_INDEX, TX_INDEX,
3435
};
35-
use crate::devices::virtio::queue::{DescriptorChain, Queue};
36+
use crate::devices::virtio::queue::{Queue, UsedElement};
3637
use crate::devices::virtio::{ActivateError, TYPE_NET};
3738
use crate::devices::{report_net_event_fail, DeviceError};
3839
use crate::dumbo::pdu::arp::ETH_IPV4_FRAME_LEN;
@@ -47,14 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
4748

4849
#[derive(Debug, thiserror::Error, displaydoc::Display)]
4950
enum FrontendError {
50-
/// Add user.
51-
AddUsed,
52-
/// Descriptor chain too mall.
53-
DescriptorChainTooSmall,
5451
/// Empty queue.
5552
EmptyQueue,
5653
/// Guest memory error: {0}
5754
GuestMemory(GuestMemoryError),
55+
/// Attempt to write an empty packet.
56+
AttemptToWriteEmptyPacket,
57+
/// Attempt to use more descriptor chains(heads) than it is allowed.
58+
MaxHeadsUsed,
5859
/// Read only descriptor.
5960
ReadOnlyDescriptor,
6061
}
@@ -103,6 +104,20 @@ pub struct ConfigSpace {
103104
// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
104105
unsafe impl ByteValued for ConfigSpace {}
105106

107+
// This struct contains information about partially
108+
// written packet.
109+
#[derive(Debug)]
110+
struct PartialWrite {
111+
// Amount of bytes written so far.
112+
bytes_written: usize,
113+
// Amount of descriptor heads used for the packet.
114+
used_heads: u16,
115+
// Guest address of the first buffer used for the packet.
116+
// This will be used to set number of descriptors heads used
117+
// to store the whole packet.
118+
packet_start_addr: GuestAddress,
119+
}
120+
106121
/// VirtIO network device.
107122
///
108123
/// It emulates a network device able to exchange L2 frames between the guest
@@ -127,6 +142,7 @@ pub struct Net {
127142

128143
rx_bytes_read: usize,
129144
rx_frame_buf: [u8; MAX_BUFFER_SIZE],
145+
rx_partial_write: Option<PartialWrite>,
130146

131147
tx_frame_headers: [u8; frame_hdr_len()],
132148

@@ -161,6 +177,7 @@ impl Net {
161177
| 1 << VIRTIO_NET_F_HOST_TSO4
162178
| 1 << VIRTIO_NET_F_HOST_TSO6
163179
| 1 << VIRTIO_NET_F_HOST_UFO
180+
| 1 << VIRTIO_NET_F_MRG_RXBUF
164181
| 1 << VIRTIO_F_VERSION_1
165182
| 1 << VIRTIO_RING_F_EVENT_IDX;
166183

@@ -191,6 +208,7 @@ impl Net {
191208
rx_deferred_frame: false,
192209
rx_bytes_read: 0,
193210
rx_frame_buf: [0u8; MAX_BUFFER_SIZE],
211+
rx_partial_write: None,
194212
tx_frame_headers: [0u8; frame_hdr_len()],
195213
irq_trigger: IrqTrigger::new().map_err(NetError::EventFd)?,
196214
config_space,
@@ -319,7 +337,17 @@ impl Net {
319337
}
320338

321339
// Attempt frame delivery.
322-
let success = self.write_frame_to_guest();
340+
let success = loop {
341+
// We retry to write a frame if there were internal errors.
342+
// Each new write will use new descriptor chains up to the
343+
// point of consuming all available descriptors, if they are
344+
// all bad.
345+
match self.write_frame_to_guest() {
346+
Ok(()) => break true,
347+
Err(FrontendError::EmptyQueue) => break false,
348+
_ => (),
349+
};
350+
};
323351

324352
// Undo the tokens consumption if guest delivery failed.
325353
if !success {
@@ -330,108 +358,188 @@ impl Net {
330358
success
331359
}
332360

333-
/// Write a slice in a descriptor chain
334-
///
335-
/// # Errors
336-
///
337-
/// Returns an error if the descriptor chain is too short or
338-
/// an inappropriate (read only) descriptor is found in the chain
339-
fn write_to_descriptor_chain(
340-
mem: &GuestMemoryMmap,
341-
data: &[u8],
342-
head: DescriptorChain,
343-
net_metrics: &NetDeviceMetrics,
344-
) -> Result<(), FrontendError> {
345-
let mut chunk = data;
346-
let mut next_descriptor = Some(head);
361+
/// Write packet contained in the internal buffer into guest provided
362+
/// descriptor chains.
363+
fn write_frame_to_guest(&mut self) -> Result<(), FrontendError> {
364+
// This is safe since we checked in the event handler that the device is activated.
365+
let mem = self.device_state.mem().unwrap();
366+
367+
if self.queues[RX_INDEX].is_empty(mem) {
368+
self.metrics.no_rx_avail_buffer.inc();
369+
return Err(FrontendError::EmptyQueue);
370+
}
371+
372+
let next_used = self.queues[RX_INDEX].next_used;
373+
let actual_size = self.queues[RX_INDEX].actual_size();
374+
375+
let (mut slice, mut packet_start_addr, mut used_heads) =
376+
if let Some(pw) = &self.rx_partial_write {
377+
(
378+
&self.rx_frame_buf[pw.bytes_written..self.rx_bytes_read],
379+
Some(pw.packet_start_addr),
380+
pw.used_heads,
381+
)
382+
} else {
383+
(&self.rx_frame_buf[..self.rx_bytes_read], None, 0)
384+
};
347385

348-
while let Some(descriptor) = &next_descriptor {
349-
if !descriptor.is_write_only() {
350-
return Err(FrontendError::ReadOnlyDescriptor);
386+
let max_used_heads = if self.has_feature(u64::from(VIRTIO_NET_F_MRG_RXBUF)) {
387+
// There is no real limit on how much heads we can use, but we will
388+
// never use more than the queue has.
389+
u16::MAX
390+
} else {
391+
// Without VIRTIO_NET_F_MRG_RXBUF only 1 head can be used for the packet.
392+
1
393+
};
394+
395+
let mut error = None;
396+
while !slice.is_empty() && error.is_none() {
397+
if used_heads == max_used_heads {
398+
error = Some(FrontendError::MaxHeadsUsed);
399+
break;
400+
}
401+
402+
let Some(head_desc) = self.queues[RX_INDEX].pop(mem) else {
403+
break;
404+
};
405+
406+
let head_desc_index = head_desc.index;
407+
let mut desc_len = 0;
408+
409+
// If this is the first head of the packet, save it for later.
410+
if packet_start_addr.is_none() {
411+
packet_start_addr = Some(head_desc.addr);
351412
}
352413

353-
let len = std::cmp::min(chunk.len(), descriptor.len as usize);
354-
match mem.write_slice(&chunk[..len], descriptor.addr) {
355-
Ok(()) => {
356-
net_metrics.rx_count.inc();
357-
chunk = &chunk[len..];
414+
// Write to the descriptor chain as much as possible.
415+
let mut desc = Some(head_desc);
416+
while !slice.is_empty() && desc.is_some() {
417+
let d = desc.unwrap();
418+
419+
if !d.is_write_only() {
420+
error = Some(FrontendError::ReadOnlyDescriptor);
421+
break;
358422
}
359-
Err(err) => {
360-
error!("Failed to write slice: {:?}", err);
423+
let len = slice.len().min(d.len as usize);
424+
if let Err(err) = mem.write_slice(&slice[..len], d.addr) {
361425
if let GuestMemoryError::PartialBuffer { .. } = err {
362-
net_metrics.rx_partial_writes.inc();
426+
self.metrics.rx_partial_writes.inc();
363427
}
364-
return Err(FrontendError::GuestMemory(err));
428+
error = Some(FrontendError::GuestMemory(err));
429+
break;
430+
} else {
431+
desc_len += len;
432+
slice = &slice[len..];
365433
}
366-
}
367434

368-
// If chunk is empty we are done here.
369-
if chunk.is_empty() {
370-
let len = data.len() as u64;
371-
net_metrics.rx_bytes_count.add(len);
372-
net_metrics.rx_packets_count.inc();
373-
return Ok(());
435+
desc = d.next_descriptor();
374436
}
375437

376-
next_descriptor = descriptor.next_descriptor();
438+
// At this point descriptor chain was processed.
439+
// We add it to the used_ring.
440+
let next_used_index = (next_used + Wrapping(used_heads)).0 % actual_size;
441+
let used_element = UsedElement {
442+
id: u32::from(head_desc_index),
443+
len: u32::try_from(desc_len).unwrap(),
444+
};
445+
// SAFETY:
446+
// This should never panic as we provide index in
447+
// correct bounds.
448+
self.queues[RX_INDEX]
449+
.write_used_ring(mem, next_used_index, used_element)
450+
.unwrap();
451+
452+
used_heads += 1;
377453
}
378454

379-
warn!("Receiving buffer is too small to hold frame of current size");
380-
Err(FrontendError::DescriptorChainTooSmall)
381-
}
455+
let packet_start_addr =
456+
packet_start_addr.ok_or(FrontendError::AttemptToWriteEmptyPacket)?;
457+
458+
let mut end_packet_processing = || {
459+
// We only update queues internals when packet processing has
460+
// finished. This is done to prevent giving information to the guest
461+
// about descriptor heads used for partialy written packets.
462+
// Otherwise guest will see that we used those descriptors and
463+
// will try to process them.
464+
self.queues[RX_INDEX].next_used += Wrapping(used_heads);
465+
self.queues[RX_INDEX].num_added += Wrapping(used_heads);
466+
467+
// Update used ring with what we used to process the packet
468+
self.queues[RX_INDEX].set_used_ring_idx((next_used + Wrapping(used_heads)).0, mem);
469+
let next_avail = self.queues[RX_INDEX].next_avail.0;
470+
self.queues[RX_INDEX].set_used_ring_avail_event(next_avail, mem);
471+
472+
// Clear partial write info if there was one
473+
self.rx_partial_write = None;
474+
};
382475

383-
// Copies a single frame from `self.rx_frame_buf` into the guest.
384-
fn do_write_frame_to_guest(&mut self) -> Result<(), FrontendError> {
385-
// This is safe since we checked in the event handler that the device is activated.
386-
let mem = self.device_state.mem().unwrap();
476+
if let Some(err) = error {
477+
// There was a error during writing.
478+
end_packet_processing();
387479

388-
let queue = &mut self.queues[RX_INDEX];
389-
let head_descriptor = queue.pop_or_enable_notification(mem).ok_or_else(|| {
390-
self.metrics.no_rx_avail_buffer.inc();
391-
FrontendError::EmptyQueue
392-
})?;
393-
let head_index = head_descriptor.index;
394-
395-
let result = Self::write_to_descriptor_chain(
396-
mem,
397-
&self.rx_frame_buf[..self.rx_bytes_read],
398-
head_descriptor,
399-
&self.metrics,
400-
);
401-
// Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
402-
let used_len = if result.is_err() {
403480
self.metrics.rx_fails.inc();
404-
0
405-
} else {
406-
// Safe to unwrap because a frame must be smaller than 2^16 bytes.
407-
u32::try_from(self.rx_bytes_read).unwrap()
408-
};
409-
queue.add_used(mem, head_index, used_len).map_err(|err| {
410-
error!("Failed to add available descriptor {}: {}", head_index, err);
411-
FrontendError::AddUsed
412-
})?;
413481

414-
result
415-
}
482+
// `next_used` is pointing at the first descriptor used to process the packet.
483+
// We used `used_heads` descriptors to process the packet. Go over all of them
484+
// and overwrite them with 0 len to discard them.
485+
for i in 0..used_heads {
486+
let next_used_index = (next_used + Wrapping(i)).0 % actual_size;
487+
488+
// SAFETY:
489+
// This should never panic as we provide index in
490+
// correct bounds.
491+
let mut used_element = self.queues[RX_INDEX]
492+
.read_used_ring(mem, next_used_index)
493+
.unwrap();
494+
used_element.len = 0;
495+
self.queues[RX_INDEX]
496+
.write_used_ring(mem, next_used_index, used_element)
497+
.unwrap();
498+
}
416499

417-
// Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
418-
// the operation if possible. Returns true if the operation was successfull.
419-
fn write_frame_to_guest(&mut self) -> bool {
420-
let max_iterations = self.queues[RX_INDEX].actual_size();
421-
for _ in 0..max_iterations {
422-
match self.do_write_frame_to_guest() {
423-
Ok(()) => return true,
424-
Err(FrontendError::EmptyQueue) | Err(FrontendError::AddUsed) => {
425-
return false;
426-
}
427-
Err(_) => {
428-
// retry
429-
continue;
430-
}
500+
Err(err)
501+
} else if slice.is_empty() {
502+
// Packet was fully written.
503+
end_packet_processing();
504+
505+
self.metrics
506+
.rx_bytes_count
507+
.add(usize_to_u64(self.rx_bytes_read));
508+
self.metrics.rx_packets_count.inc();
509+
510+
// Update number of descriptor heads used to store a packet.
511+
// SAFETY:
512+
// The packet_start_addr is valid guest address and we check
513+
// memory boundaries.
514+
#[allow(clippy::transmute_ptr_to_ref)]
515+
let header: &mut virtio_net_hdr_v1 = unsafe {
516+
let header_slice = mem
517+
.get_slice(packet_start_addr, std::mem::size_of::<virtio_net_hdr_v1>())
518+
.map_err(FrontendError::GuestMemory)?;
519+
std::mem::transmute(header_slice.ptr_guard_mut().as_ptr())
520+
};
521+
header.num_buffers = used_heads;
522+
523+
Ok(())
524+
} else {
525+
// Packet could not be fully written to the guest
526+
// Save necessary info to use it during next invocation.
527+
self.metrics.rx_partial_writes.inc();
528+
529+
if let Some(pw) = &mut self.rx_partial_write {
530+
pw.bytes_written = self.rx_bytes_read - slice.len();
531+
pw.used_heads = used_heads;
532+
} else {
533+
let pw = PartialWrite {
534+
bytes_written: self.rx_bytes_read - slice.len(),
535+
used_heads,
536+
packet_start_addr,
537+
};
538+
self.rx_partial_write = Some(pw);
431539
}
432-
}
433540

434-
false
541+
Err(FrontendError::EmptyQueue)
542+
}
435543
}
436544

437545
// Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
@@ -1027,6 +1135,7 @@ pub mod tests {
10271135
| 1 << VIRTIO_NET_F_HOST_TSO4
10281136
| 1 << VIRTIO_NET_F_HOST_TSO6
10291137
| 1 << VIRTIO_NET_F_HOST_UFO
1138+
| 1 << VIRTIO_NET_F_MRG_RXBUF
10301139
| 1 << VIRTIO_F_VERSION_1
10311140
| 1 << VIRTIO_RING_F_EVENT_IDX;
10321141

0 commit comments

Comments
 (0)