66// found in the THIRD-PARTY file.
77
88use std:: collections:: VecDeque ;
9- use std:: mem;
9+ use std:: mem:: { self } ;
1010use std:: net:: Ipv4Addr ;
11+ use std:: num:: { NonZeroU32 , Wrapping } ;
1112use std:: sync:: { Arc , Mutex } ;
1213
13- use libc:: EAGAIN ;
14+ use libc:: { iovec , EAGAIN } ;
1415use log:: error;
1516use vmm_sys_util:: eventfd:: EventFd ;
1617
@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920use crate :: devices:: virtio:: gen:: virtio_net:: {
2021 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2122 VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
22- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
23+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
2324} ;
2425use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2526use crate :: devices:: virtio:: iovec:: {
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109 // A map of which part of the memory belongs to which `DescriptorChain` object
109110 pub parsed_descriptors : VecDeque < ParsedDescriptorChain > ,
110111 // Buffers that we have used and they are ready to be given back to the guest.
111- pub deferred_descriptor : Option < ParsedDescriptorChain > ,
112+ pub used_descriptors : u16 ,
113+ pub used_bytes : u32 ,
112114}
113115
114116impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120 min_buffer_size : 0 ,
119121 iovec : IoVecBufferMut :: new ( ) ?,
120122 parsed_descriptors : VecDeque :: with_capacity ( FIRECRACKER_MAX_QUEUE_SIZE . into ( ) ) ,
121- deferred_descriptor : None ,
123+ used_descriptors : 0 ,
124+ used_bytes : 0 ,
122125 } )
123126 }
124127
@@ -141,37 +144,56 @@ impl RxBuffers {
141144 Ok ( ( ) )
142145 }
143146
144- /// Returns the number of available `iovec` objects .
147+ /// Returns the total size of available space in the buffer .
145148 #[ inline( always) ]
146- fn len ( & self ) -> usize {
149+ fn capacity ( & self ) -> usize {
147150 self . iovec . len ( )
148151 }
149152
150- /// Returns `true` if there aren't any available `iovec` objects.
151- #[ inline( always) ]
152- fn is_empty ( & self ) -> bool {
153- self . len ( ) == 0
154- }
155-
156153 /// Mark the first `size` bytes of available memory as used.
157154 ///
158155 /// # Safety:
159156 ///
160157 /// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161158 /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162159 /// in the `RxBuffers`.
163- unsafe fn mark_used ( & mut self , size : u32 ) -> ParsedDescriptorChain {
164- // Since we were able to write a frame in guest memory, we should have at least one
165- // descriptor chain here. If not, we have a bug, so fail fast, since the device is
166- // fundamentally broken.
167- let mut parsed_dc = self . parsed_descriptors . pop_front ( ) . expect (
168- "net: internal bug. Mismatch between written frame size and available descriptors" ,
169- ) ;
160+ unsafe fn mark_used ( & mut self , mut bytes_written : u32 , rx_queue : & mut Queue ) {
161+ self . used_bytes = bytes_written;
162+
163+ let mut used_heads: u16 = 0 ;
164+ let mut write_used = |head_index : u16 , bytes_written : u32 , rx_queue : & mut Queue | {
165+ if let Err ( err) = rx_queue. write_used_element (
166+ ( rx_queue. next_used + Wrapping ( self . used_descriptors ) ) . 0 ,
167+ head_index,
168+ bytes_written,
169+ ) {
170+ error ! (
171+ "net: Failed to add used descriptor {} of length {} to RX queue: {err}" ,
172+ head_index, bytes_written
173+ ) ;
174+ }
175+ used_heads += 1 ;
176+ self . used_descriptors += 1 ;
177+ } ;
170178
171- self . header_set_num_buffers ( 1 ) ;
172- self . iovec . drop_descriptor_chain ( & parsed_dc) ;
173- parsed_dc. length = size;
174- parsed_dc
179+ for parsed_dc in self . parsed_descriptors . iter ( ) {
180+ if bytes_written <= parsed_dc. length {
181+ write_used ( parsed_dc. head_index , bytes_written, rx_queue) ;
182+ break ;
183+ } else {
184+ write_used ( parsed_dc. head_index , parsed_dc. length , rx_queue) ;
185+ bytes_written -= parsed_dc. length ;
186+ }
187+ }
188+
189+ self . header_set_num_buffers ( used_heads) ;
190+ for _ in 0 ..used_heads {
191+ let parsed_dc = self
192+ . parsed_descriptors
193+ . pop_front ( )
194+ . expect ( "This should never happen if write to the buffer succeded." ) ;
195+ self . iovec . drop_descriptor_chain ( & parsed_dc) ;
196+ }
175197 }
176198
177199 /// Write the number of descriptors used in VirtIO header
@@ -190,14 +212,24 @@ impl RxBuffers {
190212
191213 /// This will let the guest know that about all the `DescriptorChain` object that has been
192214 /// used to receive a frame from the TAP.
193- fn finish_frame ( & mut self , dc : & ParsedDescriptorChain , rx_queue : & mut Queue ) {
194- // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
195- // `head_index` is not a valid descriptor id. `head_index` here is a valid
196- // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
197- // checks for its validity. In other words, if this unwrap() fails there's a bug in our
198- // emulation logic which, most likely, we can't recover from. So, let's crash here
199- // instead of logging an error and continuing.
200- rx_queue. add_used ( dc. head_index , dc. length ) . unwrap ( ) ;
215+ fn finish_frame ( & mut self , rx_queue : & mut Queue ) {
216+ rx_queue. advance_used_ring ( self . used_descriptors ) ;
217+ self . used_descriptors = 0 ;
218+ self . used_bytes = 0 ;
219+ }
220+
221+ /// Return a slice of iovecs for the first slice in the buffer.
222+ ///
223+ /// # Safety
224+ /// Buffer needs to have at least one descriptor chain parsed.
225+ unsafe fn single_chain_slice_mut ( & mut self ) -> & mut [ iovec ] {
226+ let nr_iovecs = self . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
227+ & mut self . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs]
228+ }
229+
230+ /// Return a slice of iovecs for all descriptor chains in the buffer.
231+ fn all_chains_slice_mut ( & mut self ) -> & mut [ iovec ] {
232+ self . iovec . as_iovec_mut_slice ( )
201233 }
202234}
203235
@@ -260,6 +292,7 @@ impl Net {
260292 | 1 << VIRTIO_NET_F_HOST_TSO6
261293 | 1 << VIRTIO_NET_F_HOST_UFO
262294 | 1 << VIRTIO_F_VERSION_1
295+ | 1 << VIRTIO_NET_F_MRG_RXBUF
263296 | 1 << VIRTIO_RING_F_EVENT_IDX ;
264297
265298 let mut config_space = ConfigSpace :: default ( ) ;
@@ -407,27 +440,35 @@ impl Net {
407440 // Attempts to copy a single frame into the guest if there is enough
408441 // rate limiting budget.
409442 // Returns true on successful frame delivery.
410- fn rate_limited_rx_single_frame ( & mut self , dc : & ParsedDescriptorChain ) -> bool {
443+ fn rate_limited_rx_single_frame ( & mut self , frame_size : u32 ) -> bool {
411444 let rx_queue = & mut self . queues [ RX_INDEX ] ;
412- if !Self :: rate_limiter_consume_op ( & mut self . rx_rate_limiter , dc . length as u64 ) {
445+ if !Self :: rate_limiter_consume_op ( & mut self . rx_rate_limiter , frame_size as u64 ) {
413446 self . metrics . rx_rate_limiter_throttled . inc ( ) ;
414447 return false ;
415448 }
416449
417- self . rx_buffer . finish_frame ( dc , rx_queue) ;
450+ self . rx_buffer . finish_frame ( rx_queue) ;
418451 true
419452 }
420453
421454 /// Returns the minimum size of buffer we expect the guest to provide us depending on the
422455 /// features we have negotiated with it
423456 fn minimum_rx_buffer_size ( & self ) -> u32 {
424- if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
425- || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
426- || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
427- {
428- 65562
457+ if !self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
458+ if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
459+ || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
460+ || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
461+ {
462+ 65562
463+ } else {
464+ 1526
465+ }
429466 } else {
430- 1526
467+ // header is 12 bytes long
468+ #[ allow( clippy:: cast_possible_truncation) ]
469+ {
470+ vnet_hdr_len ( ) as u32
471+ }
431472 }
432473 }
433474
@@ -442,6 +483,9 @@ impl Net {
442483 if let Err ( err) = unsafe { self . rx_buffer . add_buffer ( mem, head) } {
443484 self . metrics . rx_fails . inc ( ) ;
444485 error ! ( "net: Could not parse an RX descriptor: {err}" ) ;
486+ // Notify queue about ready frames. We need this
487+ // to bring queue into up to date state.
488+ self . rx_buffer . finish_frame ( queue) ;
445489 // Try to add the bad descriptor to the used ring.
446490 if let Err ( err) = queue. add_used ( index, 0 ) {
447491 error ! (
@@ -528,16 +572,15 @@ impl Net {
528572 }
529573
530574 // We currently prioritize packets from the MMDS over regular network packets.
531- fn read_from_mmds_or_tap ( & mut self ) -> Result < Option < ParsedDescriptorChain > , NetError > {
532- // If we don't have any buffers available try to parse more from the RX queue. There might
533- // be some buffers we didn't get the chance to process, because we got to handle the TAP
534- // event before the RX queue event.
535- if self . rx_buffer . is_empty ( ) {
575+ fn read_from_mmds_or_tap ( & mut self ) -> Result < Option < NonZeroU32 > , NetError > {
576+ // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
577+ // this is the max size of 1 packet.
578+ if self . rx_buffer . capacity ( ) < u16:: MAX as usize {
536579 self . parse_rx_descriptors ( ) ;
537580
538- // If after parsing the RX queue we still don't have any buffers stop processing RX
581+ // If after parsing the RX queue we still don't have enough capacity, stop processing RX
539582 // frames.
540- if self . rx_buffer . is_empty ( ) {
583+ if self . rx_buffer . capacity ( ) < u16 :: MAX as usize {
541584 return Ok ( None ) ;
542585 }
543586 }
@@ -553,29 +596,44 @@ impl Net {
553596 self . rx_buffer
554597 . iovec
555598 . write_all_volatile_at ( & self . rx_frame_buf [ ..vnet_hdr_len ( ) + len] , 0 ) ?;
599+
600+ // SAFETY:
601+ // * len is never 0
602+ // * Maximum size of the packet is 65562 which fits into u32
603+ #[ allow( clippy:: cast_possible_truncation) ]
604+ let len = unsafe { NonZeroU32 :: new_unchecked ( ( vnet_hdr_len ( ) + len) as u32 ) } ;
605+
556606 // SAFETY: This is safe:
557607 // * We checked that `rx_buffer` includes at least one `DescriptorChain`
558608 // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
559609 // are at least that big.
560- let dc = unsafe {
610+ unsafe {
561611 self . rx_buffer
562- . mark_used ( ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) )
563- } ;
564-
565- return Ok ( Some ( dc) ) ;
612+ . mark_used ( len. get ( ) , & mut self . queues [ RX_INDEX ] ) ;
613+ }
614+ return Ok ( Some ( len) ) ;
566615 }
567616 }
568617
569618 // SAFETY: this is safe because we ensured that `self.rx_buffer` has at least one
570619 // DescriptorChain parsed in it.
571620 let len = unsafe { self . read_tap ( ) . map_err ( NetError :: IO ) } ?;
572621
622+ // SAFETY:
623+ // * len is never 0
624+ // * Maximum size of the packet is 65562 which fits into u32
625+ #[ allow( clippy:: cast_possible_truncation) ]
626+ let len = unsafe { NonZeroU32 :: new_unchecked ( len as u32 ) } ;
627+
573628 // SAFETY: This is safe,
574629 // * `rx_buffer` has at least one `DescriptorChain`
575630 // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
576631 // bytes than its capacity.
577- let dc = unsafe { self . rx_buffer . mark_used ( len. try_into ( ) . unwrap ( ) ) } ;
578- Ok ( Some ( dc) )
632+ unsafe {
633+ self . rx_buffer
634+ . mark_used ( len. get ( ) , & mut self . queues [ RX_INDEX ] ) ;
635+ }
636+ Ok ( Some ( len) )
579637 }
580638
581639 /// Read as many frames as possible.
@@ -586,12 +644,11 @@ impl Net {
586644 self . metrics . no_rx_avail_buffer . inc ( ) ;
587645 break ;
588646 }
589- Ok ( Some ( dc ) ) => {
647+ Ok ( Some ( bytes ) ) => {
590648 self . metrics . rx_count . inc ( ) ;
591- self . metrics . rx_bytes_count . add ( dc . length as u64 ) ;
649+ self . metrics . rx_bytes_count . add ( bytes . get ( ) as u64 ) ;
592650 self . metrics . rx_packets_count . inc ( ) ;
593- if !self . rate_limited_rx_single_frame ( & dc) {
594- self . rx_buffer . deferred_descriptor = Some ( dc) ;
651+ if !self . rate_limited_rx_single_frame ( bytes. get ( ) ) {
595652 break ;
596653 }
597654 }
@@ -619,11 +676,10 @@ impl Net {
619676
620677 fn resume_rx ( & mut self ) -> Result < ( ) , DeviceError > {
621678 // First try to handle any deferred frame
622- if let Some ( deferred_descriptor ) = self . rx_buffer . deferred_descriptor . take ( ) {
679+ if self . rx_buffer . used_bytes != 0 {
623680 // If can't finish sending this frame, re-set it as deferred and return; we can't
624681 // process any more frames from the TAP.
625- if !self . rate_limited_rx_single_frame ( & deferred_descriptor) {
626- self . rx_buffer . deferred_descriptor = Some ( deferred_descriptor) ;
682+ if !self . rate_limited_rx_single_frame ( self . rx_buffer . used_bytes ) {
627683 return Ok ( ( ) ) ;
628684 }
629685 }
@@ -689,7 +745,7 @@ impl Net {
689745 & self . metrics ,
690746 )
691747 . unwrap_or ( false ) ;
692- if frame_consumed_by_mmds && self . rx_buffer . deferred_descriptor . is_none ( ) {
748+ if frame_consumed_by_mmds && self . rx_buffer . used_bytes == 0 {
693749 // MMDS consumed this frame/request, let's also try to process the response.
694750 process_rx_for_mmds = true ;
695751 }
@@ -774,9 +830,13 @@ impl Net {
774830 ///
775831 /// `self.rx_buffer` needs to have at least one descriptor chain parsed
776832 pub unsafe fn read_tap ( & mut self ) -> std:: io:: Result < usize > {
777- let nr_iovecs = self . rx_buffer . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
778- self . tap
779- . read_iovec ( & mut self . rx_buffer . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs] )
833+ let slice = if self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
834+ self . rx_buffer . all_chains_slice_mut ( )
835+ } else {
836+ // SAFETY: we only call this if `rx_buffer` is not empty.
837+ unsafe { self . rx_buffer . single_chain_slice_mut ( ) }
838+ } ;
839+ self . tap . read_iovec ( slice)
780840 }
781841
782842 fn write_tap ( tap : & mut Tap , buf : & IoVecBuffer ) -> std:: io:: Result < usize > {
@@ -1815,11 +1875,8 @@ pub mod tests {
18151875 unsafe { libc:: close ( th. net . lock ( ) . unwrap ( ) . tap . as_raw_fd ( ) ) } ;
18161876
18171877 // The RX queue is empty and there is a deferred frame.
1818- th. net ( ) . rx_buffer . deferred_descriptor = Some ( ParsedDescriptorChain {
1819- head_index : 1 ,
1820- length : 100 ,
1821- nr_iovecs : 1 ,
1822- } ) ;
1878+ th. net ( ) . rx_buffer . used_descriptors = 1 ;
1879+ th. net ( ) . rx_buffer . used_bytes = 100 ;
18231880 check_metric_after_block ! (
18241881 th. net( ) . metrics. no_rx_avail_buffer,
18251882 1 ,
@@ -1829,7 +1886,8 @@ pub mod tests {
18291886 // We need to set this here to false, otherwise the device will try to
18301887 // handle a deferred frame, it will fail and will never try to read from
18311888 // the tap.
1832- th. net ( ) . rx_buffer . deferred_descriptor = None ;
1889+ th. net ( ) . rx_buffer . used_descriptors = 0 ;
1890+ th. net ( ) . rx_buffer . used_bytes = 0 ;
18331891
18341892 th. add_desc_chain ( NetQueue :: Rx , 0 , & [ ( 0 , 4096 , VIRTQ_DESC_F_WRITE ) ] ) ;
18351893 check_metric_after_block ! (
0 commit comments