66// found in the THIRD-PARTY file.
77
88use std:: collections:: VecDeque ;
9- use std:: mem;
9+ use std:: mem:: { self , offset_of } ;
1010use std:: net:: Ipv4Addr ;
11+ use std:: num:: Wrapping ;
1112use std:: sync:: { Arc , Mutex } ;
1213
13- use libc:: EAGAIN ;
14+ use libc:: { iovec , EAGAIN } ;
1415use log:: error;
1516use vmm_sys_util:: eventfd:: EventFd ;
1617
@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920use crate :: devices:: virtio:: gen:: virtio_net:: {
2021 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2122 VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
22- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
23+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
2324} ;
2425use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2526use crate :: devices:: virtio:: iovec:: {
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109 // A map of which part of the memory belongs to which `DescriptorChain` object
109110 pub parsed_descriptors : VecDeque < ParsedDescriptorChain > ,
110111 // Buffers that we have used and they are ready to be given back to the guest.
111- pub used_descriptor : Option < ParsedDescriptorChain > ,
112+ pub used_descriptors : u16 ,
113+ pub used_bytes : u32 ,
112114}
113115
114116impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120 min_buffer_size : 0 ,
119121 iovec : IoVecBufferMut :: new ( ) ?,
120122 parsed_descriptors : VecDeque :: with_capacity ( FIRECRACKER_MAX_QUEUE_SIZE . into ( ) ) ,
121- used_descriptor : None ,
123+ used_descriptors : 0 ,
124+ used_bytes : 0 ,
122125 } )
123126 }
124127
@@ -141,37 +144,61 @@ impl RxBuffers {
141144 Ok ( ( ) )
142145 }
143146
144- /// Returns the number of available `iovec` objects .
147+ /// Returns the total size of available space in the buffer .
145148 #[ inline( always) ]
146- fn len ( & self ) -> usize {
149+ fn capacity ( & self ) -> usize {
147150 self . iovec . len ( )
148151 }
149152
150- /// Returns `true` if there aren't any available `iovec` objects.
151- #[ inline( always) ]
152- fn is_empty ( & self ) -> bool {
153- self . len ( ) == 0
154- }
155-
156153 /// Mark the first `size` bytes of available memory as used.
157154 ///
158155 /// # Safety:
159156 ///
160157 /// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161158 /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162159 /// in the `RxBuffers`.
163- unsafe fn mark_used ( & mut self , size : u32 ) {
160+ unsafe fn mark_used ( & mut self , mut bytes_written : u32 , rx_queue : & mut Queue ) {
164161 // Since we were able to write a frame in guest memory, we should have at least one
165162 // descriptor chain here. If not, we have a bug, so fail fast, since the device is
166163 // fundamentally broken.
167- let mut parsed_dc = self . parsed_descriptors . pop_front ( ) . expect (
168- "net: internal bug. Mismatch between written frame size and available descriptors" ,
169- ) ;
164+ debug_assert ! ( ! self . iovec . is_empty ( ) ) ;
165+
166+ self . used_bytes = bytes_written ;
170167
171- self . header_set_num_buffers ( 1 ) ;
172- self . iovec . drop_descriptor_chain ( & parsed_dc) ;
173- parsed_dc. length = size;
174- self . used_descriptor = Some ( parsed_dc) ;
168+ let mut used_heads: u16 = 0 ;
169+ let mut write_used = |head_index : u16 , bytes_written : u32 , rx_queue : & mut Queue | {
170+ if let Err ( err) = rx_queue. write_used_element (
171+ ( rx_queue. next_used + Wrapping ( self . used_descriptors ) ) . 0 ,
172+ head_index,
173+ bytes_written,
174+ ) {
175+ error ! (
176+ "net: Failed to add used descriptor {} of length {} to RX queue: {err}" ,
177+ head_index, bytes_written
178+ ) ;
179+ }
180+ used_heads += 1 ;
181+ self . used_descriptors += 1 ;
182+ } ;
183+
184+ for parsed_dc in self . parsed_descriptors . iter ( ) {
185+ if bytes_written <= parsed_dc. length {
186+ write_used ( parsed_dc. head_index , bytes_written, rx_queue) ;
187+ break ;
188+ } else {
189+ write_used ( parsed_dc. head_index , parsed_dc. length , rx_queue) ;
190+ bytes_written -= parsed_dc. length ;
191+ }
192+ }
193+
194+ self . header_set_num_buffers ( used_heads) ;
195+ for _ in 0 ..used_heads {
196+ let parsed_dc = self
197+ . parsed_descriptors
198+ . pop_front ( )
199+ . expect ( "This should never happen if write to the buffer succeded." ) ;
200+ self . iovec . drop_descriptor_chain ( & parsed_dc) ;
201+ }
175202 }
176203
177204 /// Write the number of descriptors used in VirtIO header
@@ -191,25 +218,28 @@ impl RxBuffers {
191218 /// This will let the guest know that about all the `DescriptorChain` object that has been
192219 /// used to receive a frame from the TAP.
193220 fn finish_frame ( & mut self , rx_queue : & mut Queue ) {
194- if let Some ( used_dc) = self . used_descriptor . take ( ) {
195- // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
196- // `head_index` is not a valid descriptor id. `head_index` here is a valid
197- // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
198- // checks for its validity. In other words, if this unwrap() fails there's a bug in our
199- // emulation logic which, most likely, we can't recover from. So, let's crash here
200- // instead of logging an error and continuing.
201- rx_queue
202- . add_used ( used_dc. head_index , used_dc. length )
203- . unwrap ( ) ;
204- }
221+ rx_queue. advance_used_ring ( self . used_descriptors ) ;
222+ self . used_descriptors = 0 ;
223+ self . used_bytes = 0 ;
205224 }
206225
207226 /// Returns the number of bytes that have been used from the buffer
208227 fn used_bytes ( & self ) -> u32 {
209- match self . used_descriptor {
210- Some ( ref dc) => dc. length ,
211- None => 0 ,
212- }
228+ self . used_bytes
229+ }
230+
231+ /// Return a slice of iovecs for the first slice in the buffer.
232+ ///
233+ /// # Safety
234+ /// Buffer needs to have at least one descriptor chain parsed.
235+ unsafe fn single_chain_slice_mut ( & mut self ) -> & mut [ iovec ] {
236+ let nr_iovecs = self . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
237+ & mut self . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs]
238+ }
239+
240+ /// Return a slice of iovecs for all descriptor chains in the buffer.
241+ fn all_chains_slice_mut ( & mut self ) -> & mut [ iovec ] {
242+ self . iovec . as_iovec_mut_slice ( )
213243 }
214244}
215245
@@ -272,6 +302,7 @@ impl Net {
272302 | 1 << VIRTIO_NET_F_HOST_TSO6
273303 | 1 << VIRTIO_NET_F_HOST_UFO
274304 | 1 << VIRTIO_F_VERSION_1
305+ | 1 << VIRTIO_NET_F_MRG_RXBUF
275306 | 1 << VIRTIO_RING_F_EVENT_IDX ;
276307
277308 let mut config_space = ConfigSpace :: default ( ) ;
@@ -433,13 +464,21 @@ impl Net {
433464 /// Returns the minimum size of buffer we expect the guest to provide us depending on the
434465 /// features we have negotiated with it
435466 fn minimum_rx_buffer_size ( & self ) -> u32 {
436- if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
437- || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
438- || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
439- {
440- 65562
467+ if !self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
468+ if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
469+ || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
470+ || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
471+ {
472+ 65562
473+ } else {
474+ 1526
475+ }
441476 } else {
442- 1526
477+ // header is 12 bytes long
478+ #[ allow( clippy:: cast_possible_truncation) ]
479+ {
480+ vnet_hdr_len ( ) as u32
481+ }
443482 }
444483 }
445484
@@ -454,6 +493,9 @@ impl Net {
454493 if let Err ( err) = unsafe { self . rx_buffer . add_buffer ( mem, head) } {
455494 self . metrics . rx_fails . inc ( ) ;
456495 error ! ( "net: Could not parse an RX descriptor: {err}" ) ;
496+ // Notify queue about ready frames. We need this
497+ // to bring queue into up to date state.
498+ self . rx_buffer . finish_frame ( queue) ;
457499 // Try to add the bad descriptor to the used ring.
458500 if let Err ( err) = queue. add_used ( index, 0 ) {
459501 error ! (
@@ -541,15 +583,14 @@ impl Net {
541583
542584 // We currently prioritize packets from the MMDS over regular network packets.
543585 fn read_from_mmds_or_tap ( & mut self ) -> Result < usize , NetError > {
544- // If we don't have any buffers available try to parse more from the RX queue. There might
545- // be some buffers we didn't get the chance to process, because we got to handle the TAP
546- // event before the RX queue event.
547- if self . rx_buffer . is_empty ( ) {
586+ // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
587+ // this is the max size of 1 packet.
588+ if self . rx_buffer . capacity ( ) < u16:: MAX as usize {
548589 self . parse_rx_descriptors ( ) ;
549590
550- // If after parsing the RX queue we still don't have any buffers stop processing RX
591+ // If after parsing the RX queue we still don't have enough capacity, stop processing RX
551592 // frames.
552- if self . rx_buffer . is_empty ( ) {
593+ if self . rx_buffer . capacity ( ) < u16 :: MAX as usize {
553594 return Ok ( 0 ) ;
554595 }
555596 }
@@ -570,8 +611,10 @@ impl Net {
570611 // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
571612 // are at least that big.
572613 unsafe {
573- self . rx_buffer
574- . mark_used ( ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) ) ;
614+ self . rx_buffer . mark_used (
615+ ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) ,
616+ & mut self . queues [ RX_INDEX ] ,
617+ ) ;
575618 }
576619 return Ok ( vnet_hdr_len ( ) + len) ;
577620 }
@@ -586,7 +629,8 @@ impl Net {
586629 // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
587630 // bytes than its capacity.
588631 unsafe {
589- self . rx_buffer . mark_used ( len. try_into ( ) . unwrap ( ) ) ;
632+ self . rx_buffer
633+ . mark_used ( len. try_into ( ) . unwrap ( ) , & mut self . queues [ RX_INDEX ] ) ;
590634 }
591635 Ok ( len)
592636 }
@@ -630,13 +674,15 @@ impl Net {
630674 }
631675
632676 fn has_deferred_frame ( & self ) -> bool {
633- self . rx_buffer . used_descriptor . is_some ( )
677+ self . rx_buffer . used_descriptors != 0
634678 }
635679
636680 // Process the deferred frame first, then continue reading from tap.
637681 fn handle_deferred_frame ( & mut self ) -> Result < ( ) , DeviceError > {
638682 let used_bytes = self . rx_buffer . used_bytes ( ) ;
639683 if self . rate_limited_rx_single_frame ( used_bytes as usize ) {
684+ // Finish with rate limitted packet.
685+ self . rx_buffer . finish_frame ( & mut self . queues [ RX_INDEX ] ) ;
640686 // process_rx() was interrupted possibly before consuming all
641687 // packets in the tap; try continuing now.
642688 return self . process_rx ( ) ;
@@ -797,9 +843,12 @@ impl Net {
797843 ///
798844 /// `self.rx_buffer` needs to have at least one descriptor chain parsed
799845 pub unsafe fn read_tap ( & mut self ) -> std:: io:: Result < usize > {
800- let nr_iovecs = self . rx_buffer . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
801- self . tap
802- . read_iovec ( & mut self . rx_buffer . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs] )
846+ if self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
847+ self . tap . read_iovec ( self . rx_buffer . all_chains_slice_mut ( ) )
848+ } else {
849+ // SAFETY: we only call this if `rx_buffer` is not empty.
850+ unsafe { self . tap . read_iovec ( self . rx_buffer . single_chain_slice_mut ( ) ) }
851+ }
803852 }
804853
805854 fn write_tap ( tap : & mut Tap , buf : & IoVecBuffer ) -> std:: io:: Result < usize > {
@@ -1846,11 +1895,8 @@ pub mod tests {
18461895 unsafe { libc:: close ( th. net . lock ( ) . unwrap ( ) . tap . as_raw_fd ( ) ) } ;
18471896
18481897 // The RX queue is empty and there is a deferred frame.
1849- th. net ( ) . rx_buffer . used_descriptor = Some ( ParsedDescriptorChain {
1850- head_index : 1 ,
1851- length : 100 ,
1852- nr_iovecs : 1 ,
1853- } ) ;
1898+ th. net ( ) . rx_buffer . used_descriptors = 1 ;
1899+ th. net ( ) . rx_buffer . used_bytes = 100 ;
18541900 check_metric_after_block ! (
18551901 th. net( ) . metrics. no_rx_avail_buffer,
18561902 1 ,
@@ -1860,7 +1906,8 @@ pub mod tests {
18601906 // We need to set this here to false, otherwise the device will try to
18611907 // handle a deferred frame, it will fail and will never try to read from
18621908 // the tap.
1863- th. net ( ) . rx_buffer . used_descriptor = None ;
1909+ th. net ( ) . rx_buffer . used_descriptors = 0 ;
1910+ th. net ( ) . rx_buffer . used_bytes = 0 ;
18641911
18651912 th. add_desc_chain ( NetQueue :: Rx , 0 , & [ ( 0 , 4096 , VIRTQ_DESC_F_WRITE ) ] ) ;
18661913 check_metric_after_block ! (
0 commit comments