66// found in the THIRD-PARTY file.
77
88use std:: collections:: VecDeque ;
9- use std:: mem;
9+ use std:: mem:: { self , offset_of } ;
1010use std:: net:: Ipv4Addr ;
11+ use std:: num:: Wrapping ;
1112use std:: sync:: { Arc , Mutex } ;
1213
13- use libc:: EAGAIN ;
14+ use libc:: { iovec , EAGAIN } ;
1415use log:: error;
1516use vmm_sys_util:: eventfd:: EventFd ;
1617
@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920use crate :: devices:: virtio:: gen:: virtio_net:: {
2021 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2122 VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
22- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
23+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
2324} ;
2425use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2526use crate :: devices:: virtio:: iovec:: {
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109 // A map of which part of the memory belongs to which `DescriptorChain` object
109110 pub parsed_descriptors : VecDeque < ParsedDescriptorChain > ,
110111 // Buffers that we have used and they are ready to be given back to the guest.
111- pub used_descriptor : Option < ParsedDescriptorChain > ,
112+ pub used_descriptors : u16 ,
113+ pub used_bytes : u32 ,
112114}
113115
114116impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120 min_buffer_size : 0 ,
119121 iovec : IoVecBufferMut :: new ( ) ?,
120122 parsed_descriptors : VecDeque :: with_capacity ( FIRECRACKER_MAX_QUEUE_SIZE . into ( ) ) ,
121- used_descriptor : None ,
123+ used_descriptors : 0 ,
124+ used_bytes : 0 ,
122125 } )
123126 }
124127
@@ -141,75 +144,114 @@ impl RxBuffers {
141144 Ok ( ( ) )
142145 }
143146
144- /// Returns the number of available `iovec` objects .
147+ /// Returns the total size of available space in the buffer .
145148 #[ inline( always) ]
146- fn len ( & self ) -> usize {
149+ fn capacity ( & self ) -> usize {
147150 self . iovec . len ( )
148151 }
149152
150- /// Returns `true` if there aren't any available `iovec` objects.
151- #[ inline( always) ]
152- fn is_empty ( & self ) -> bool {
153- self . len ( ) == 0
154- }
155-
156153 /// Mark the first `size` bytes of available memory as used.
157154 ///
158155 /// # Safety:
159156 ///
160157 /// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161158 /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162159 /// in the `RxBuffers`.
163- unsafe fn mark_used ( & mut self , size : u32 ) {
160+ unsafe fn mark_used ( & mut self , mut bytes_written : u32 , rx_queue : & mut Queue ) {
164161 // Since we were able to write a frame in guest memory, we should have at least one
165162 // descriptor chain here. If not, we have a bug, so fail fast, since the device is
166163 // fundamentally broken.
167- let mut parsed_dc = self . parsed_descriptors . pop_front ( ) . expect (
168- "net: internal bug. Mismatch between written frame size and available descriptors" ,
164+ debug_assert ! ( !self . iovec. is_empty( ) ) ;
165+ let header_ptr: * mut virtio_net_hdr_v1 = self . iovec . vecs . as_mut_slice ( ) [ 0 ] . iov_base . cast ( ) ;
166+ let header_buff_len = self . iovec . vecs . as_mut_slice ( ) [ 0 ] . iov_len ;
167+ assert ! (
168+ vnet_hdr_len( ) <= header_buff_len,
169+ "Network buffer should be big enough for virtio_net_hdr_v1 object"
169170 ) ;
170171
171- self . header_set_num_buffers ( 1 ) ;
172- self . iovec . drop_descriptor_chain ( & parsed_dc) ;
173- parsed_dc. length = size;
174- self . used_descriptor = Some ( parsed_dc) ;
172+ self . used_bytes = bytes_written;
173+
174+ let mut used_heads: u16 = 0 ;
175+ let mut write_used = |head_index : u16 , bytes_written : u32 , rx_queue : & mut Queue | {
176+ if let Err ( err) = rx_queue. write_used_element (
177+ ( rx_queue. next_used + Wrapping ( self . used_descriptors ) ) . 0 ,
178+ head_index,
179+ bytes_written,
180+ ) {
181+ error ! (
182+ "net: Failed to add used descriptor {} of length {} to RX queue: {err}" ,
183+ head_index, bytes_written
184+ ) ;
185+ }
186+ used_heads += 1 ;
187+ self . used_descriptors += 1 ;
188+ } ;
189+
190+ loop {
191+ let parsed_dc = self
192+ . parsed_descriptors
193+ . pop_front ( )
194+ . expect ( "This should never happen if write to the buffer succeded." ) ;
195+ self . iovec . drop_descriptor_chain ( & parsed_dc) ;
196+
197+ if bytes_written <= parsed_dc. length {
198+ write_used ( parsed_dc. head_index , bytes_written, rx_queue) ;
199+ break ;
200+ } else {
201+ write_used ( parsed_dc. head_index , parsed_dc. length , rx_queue) ;
202+ bytes_written -= parsed_dc. length ;
203+ }
204+ }
205+ // SAFETY: The user space pointer was verified at the point of creation and
206+ // we verified the alignment and header buffer size.
207+ unsafe {
208+ Self :: header_set_num_buffers ( header_ptr, used_heads) ;
209+ }
175210 }
176211
177- /// Write the number of descriptors used in VirtIO header
178- fn header_set_num_buffers ( & mut self , nr_descs : u16 ) {
179- // We can unwrap here, because we have checked before that the `IoVecBufferMut` holds at
180- // least one buffer with the proper size, depending on the feature negotiation. In any
181- // case, the buffer holds memory of at least `std::mem::size_of::<virtio_net_hdr_v1>()`
182- // bytes.
183- self . iovec
184- . write_all_volatile_at (
185- & nr_descs. to_le_bytes ( ) ,
186- std:: mem:: offset_of!( virtio_net_hdr_v1, num_buffers) ,
187- )
188- . unwrap ( )
212+ /// Writes number of buffers to the [`num_buffers`] field of a virtio_net_hdr_v1 struct
213+ /// pointed by the [`ptr`].
214+ ///
215+ /// # Safety
216+ /// Memory area needs to be big enoug for virtio_net_hdr_v1 to fit.
217+ unsafe fn header_set_num_buffers ( ptr : * mut virtio_net_hdr_v1 , num_buffers : u16 ) {
218+ debug_assert ! (
219+ ptr. is_aligned( ) ,
220+ "Pointer should have at least 0x2 aligment"
221+ ) ;
222+
223+ let ptr: * mut u8 = ptr. cast ( ) ;
224+ let ptr = ptr. add ( offset_of ! ( virtio_net_hdr_v1, num_buffers) ) ;
225+ let bytes = num_buffers. to_le_bytes ( ) ;
226+ let ptr: * mut [ u8 ; 2 ] = ptr. cast ( ) ;
227+ ptr. write_volatile ( bytes) ;
189228 }
190229
191230 /// This will let the guest know that about all the `DescriptorChain` object that has been
192231 /// used to receive a frame from the TAP.
193232 fn finish_frame ( & mut self , rx_queue : & mut Queue ) {
194- if let Some ( used_dc) = self . used_descriptor . take ( ) {
195- // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
196- // `head_index` is not a valid descriptor id. `head_index` here is a valid
197- // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
198- // checks for its validity. In other words, if this unwrap() fails there's a bug in our
199- // emulation logic which, most likely, we can't recover from. So, let's crash here
200- // instead of logging an error and continuing.
201- rx_queue
202- . add_used ( used_dc. head_index , used_dc. length )
203- . unwrap ( ) ;
204- }
233+ rx_queue. advance_used_ring ( self . used_descriptors ) ;
234+ self . used_descriptors = 0 ;
235+ self . used_bytes = 0 ;
205236 }
206237
207238 /// Returns the number of bytes that have been used from the buffer
208239 fn used_bytes ( & self ) -> u32 {
209- match self . used_descriptor {
210- Some ( ref dc) => dc. length ,
211- None => 0 ,
212- }
240+ self . used_bytes
241+ }
242+
243+ /// Return a slice of iovecs for the first slice in the buffer.
244+ ///
245+ /// # Safety
246+ /// Buffer needs to have at least one descriptor chain parsed.
247+ unsafe fn single_chain_slice_mut ( & mut self ) -> & mut [ iovec ] {
248+ let nr_iovecs = self . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
249+ & mut self . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs]
250+ }
251+
252+ /// Return a slice of iovecs for all descriptor chains in the buffer.
253+ fn all_chains_slice_mut ( & mut self ) -> & mut [ iovec ] {
254+ self . iovec . as_iovec_mut_slice ( )
213255 }
214256}
215257
@@ -272,6 +314,7 @@ impl Net {
272314 | 1 << VIRTIO_NET_F_HOST_TSO6
273315 | 1 << VIRTIO_NET_F_HOST_UFO
274316 | 1 << VIRTIO_F_VERSION_1
317+ | 1 << VIRTIO_NET_F_MRG_RXBUF
275318 | 1 << VIRTIO_RING_F_EVENT_IDX ;
276319
277320 let mut config_space = ConfigSpace :: default ( ) ;
@@ -433,13 +476,21 @@ impl Net {
433476 /// Returns the minimum size of buffer we expect the guest to provide us depending on the
434477 /// features we have negotiated with it
435478 fn minimum_rx_buffer_size ( & self ) -> u32 {
436- if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
437- || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
438- || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
439- {
440- 65562
479+ if !self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
480+ if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
481+ || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
482+ || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
483+ {
484+ 65562
485+ } else {
486+ 1526
487+ }
441488 } else {
442- 1526
489+ // header is 12 bytes long
490+ #[ allow( clippy:: cast_possible_truncation) ]
491+ {
492+ vnet_hdr_len ( ) as u32
493+ }
443494 }
444495 }
445496
@@ -454,6 +505,9 @@ impl Net {
454505 if let Err ( err) = unsafe { self . rx_buffer . add_buffer ( mem, head) } {
455506 self . metrics . rx_fails . inc ( ) ;
456507 error ! ( "net: Could not parse an RX descriptor: {err}" ) ;
508+ // Notify queue about ready frames. We need this
509+ // to bring queue into up to date state.
510+ self . rx_buffer . finish_frame ( queue) ;
457511 // Try to add the bad descriptor to the used ring.
458512 if let Err ( err) = queue. add_used ( index, 0 ) {
459513 error ! (
@@ -541,15 +595,14 @@ impl Net {
541595
542596 // We currently prioritize packets from the MMDS over regular network packets.
543597 fn read_from_mmds_or_tap ( & mut self ) -> Result < usize , NetError > {
544- // If we don't have any buffers available try to parse more from the RX queue. There might
545- // be some buffers we didn't get the chance to process, because we got to handle the TAP
546- // event before the RX queue event.
547- if self . rx_buffer . is_empty ( ) {
598+ // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
599+ // this is the max size of 1 packet.
600+ if self . rx_buffer . capacity ( ) < u16:: MAX as usize {
548601 self . parse_rx_descriptors ( ) ;
549602
550- // If after parsing the RX queue we still don't have any buffers stop processing RX
603+ // If after parsing the RX queue we still don't have enough capacity, stop processing RX
551604 // frames.
552- if self . rx_buffer . is_empty ( ) {
605+ if self . rx_buffer . capacity ( ) < u16 :: MAX as usize {
553606 return Ok ( 0 ) ;
554607 }
555608 }
@@ -570,8 +623,10 @@ impl Net {
570623 // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
571624 // are at least that big.
572625 unsafe {
573- self . rx_buffer
574- . mark_used ( ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) ) ;
626+ self . rx_buffer . mark_used (
627+ ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) ,
628+ & mut self . queues [ RX_INDEX ] ,
629+ ) ;
575630 }
576631 return Ok ( vnet_hdr_len ( ) + len) ;
577632 }
@@ -586,7 +641,8 @@ impl Net {
586641 // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
587642 // bytes than its capacity.
588643 unsafe {
589- self . rx_buffer . mark_used ( len. try_into ( ) . unwrap ( ) ) ;
644+ self . rx_buffer
645+ . mark_used ( len. try_into ( ) . unwrap ( ) , & mut self . queues [ RX_INDEX ] ) ;
590646 }
591647 Ok ( len)
592648 }
@@ -630,13 +686,15 @@ impl Net {
630686 }
631687
632688 fn has_deferred_frame ( & self ) -> bool {
633- self . rx_buffer . used_descriptor . is_some ( )
689+ self . rx_buffer . used_descriptors != 0
634690 }
635691
636692 // Process the deferred frame first, then continue reading from tap.
637693 fn handle_deferred_frame ( & mut self ) -> Result < ( ) , DeviceError > {
638694 let used_bytes = self . rx_buffer . used_bytes ( ) ;
639695 if self . rate_limited_rx_single_frame ( used_bytes as usize ) {
696+ // Finish with rate limitted packet.
697+ self . rx_buffer . finish_frame ( & mut self . queues [ RX_INDEX ] ) ;
640698 // process_rx() was interrupted possibly before consuming all
641699 // packets in the tap; try continuing now.
642700 return self . process_rx ( ) ;
@@ -797,9 +855,12 @@ impl Net {
797855 ///
798856 /// `self.rx_buffer` needs to have at least one descriptor chain parsed
799857 pub unsafe fn read_tap ( & mut self ) -> std:: io:: Result < usize > {
800- let nr_iovecs = self . rx_buffer . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
801- self . tap
802- . read_iovec ( & mut self . rx_buffer . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs] )
858+ if self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
859+ self . tap . read_iovec ( self . rx_buffer . all_chains_slice_mut ( ) )
860+ } else {
861+ // SAFETY: we only call this if `rx_buffer` is not empty.
862+ unsafe { self . tap . read_iovec ( self . rx_buffer . single_chain_slice_mut ( ) ) }
863+ }
803864 }
804865
805866 fn write_tap ( tap : & mut Tap , buf : & IoVecBuffer ) -> std:: io:: Result < usize > {
@@ -1846,11 +1907,8 @@ pub mod tests {
18461907 unsafe { libc:: close ( th. net . lock ( ) . unwrap ( ) . tap . as_raw_fd ( ) ) } ;
18471908
18481909 // The RX queue is empty and there is a deferred frame.
1849- th. net ( ) . rx_buffer . used_descriptor = Some ( ParsedDescriptorChain {
1850- head_index : 1 ,
1851- length : 100 ,
1852- nr_iovecs : 1 ,
1853- } ) ;
1910+ th. net ( ) . rx_buffer . used_descriptors = 1 ;
1911+ th. net ( ) . rx_buffer . used_bytes = 100 ;
18541912 check_metric_after_block ! (
18551913 th. net( ) . metrics. no_rx_avail_buffer,
18561914 1 ,
@@ -1860,7 +1918,8 @@ pub mod tests {
18601918 // We need to set this here to false, otherwise the device will try to
18611919 // handle a deferred frame, it will fail and will never try to read from
18621920 // the tap.
1863- th. net ( ) . rx_buffer . used_descriptor = None ;
1921+ th. net ( ) . rx_buffer . used_descriptors = 0 ;
1922+ th. net ( ) . rx_buffer . used_bytes = 0 ;
18641923
18651924 th. add_desc_chain ( NetQueue :: Rx , 0 , & [ ( 0 , 4096 , VIRTQ_DESC_F_WRITE ) ] ) ;
18661925 check_metric_after_block ! (
0 commit comments