66// found in the THIRD-PARTY file.
77
88use std:: collections:: VecDeque ;
9- use std:: mem;
9+ use std:: mem:: { self } ;
1010use std:: net:: Ipv4Addr ;
11+ use std:: num:: { NonZeroU32 , Wrapping } ;
1112use std:: sync:: { Arc , Mutex } ;
1213
13- use libc:: EAGAIN ;
14+ use libc:: { iovec , EAGAIN } ;
1415use log:: error;
1516use vmm_sys_util:: eventfd:: EventFd ;
1617
@@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1;
1920use crate :: devices:: virtio:: gen:: virtio_net:: {
2021 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2122 VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
22- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
23+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
2324} ;
2425use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2526use crate :: devices:: virtio:: iovec:: {
@@ -108,7 +109,8 @@ pub struct RxBuffers {
108109 // A map of which part of the memory belongs to which `DescriptorChain` object
109110 pub parsed_descriptors : VecDeque < ParsedDescriptorChain > ,
110111 // Buffers that we have used and they are ready to be given back to the guest.
111- pub deferred_descriptor : Option < ParsedDescriptorChain > ,
112+ pub used_descriptors : u16 ,
113+ pub used_bytes : u32 ,
112114}
113115
114116impl RxBuffers {
@@ -118,7 +120,8 @@ impl RxBuffers {
118120 min_buffer_size : 0 ,
119121 iovec : IoVecBufferMut :: new ( ) ?,
120122 parsed_descriptors : VecDeque :: with_capacity ( FIRECRACKER_MAX_QUEUE_SIZE . into ( ) ) ,
121- deferred_descriptor : None ,
123+ used_descriptors : 0 ,
124+ used_bytes : 0 ,
122125 } )
123126 }
124127
@@ -141,16 +144,16 @@ impl RxBuffers {
141144 Ok ( ( ) )
142145 }
143146
144- /// Returns the number of available `iovec` objects .
147+ /// Returns the total size of available space in the buffer .
145148 #[ inline( always) ]
146- fn len ( & self ) -> usize {
149+ fn capacity ( & self ) -> usize {
147150 self . iovec . len ( )
148151 }
149152
150153 /// Returns `true` if there aren't any available `iovec` objects.
151154 #[ inline( always) ]
152155 fn is_empty ( & self ) -> bool {
153- self . len ( ) == 0
156+ self . capacity ( ) == 0
154157 }
155158
156159 /// Mark the first `size` bytes of available memory as used.
@@ -160,18 +163,48 @@ impl RxBuffers {
160163 /// * The `RxBuffers` should include at least one parsed `DescriptorChain`.
161164 /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored
162165 /// in the `RxBuffers`.
163- unsafe fn mark_used ( & mut self , size : u32 ) -> ParsedDescriptorChain {
164- // Since we were able to write a frame in guest memory, we should have at least one
165- // descriptor chain here. If not, we have a bug, so fail fast, since the device is
166+ unsafe fn mark_used ( & mut self , mut bytes_written : u32 , rx_queue : & mut Queue ) {
167+ // Since we were able to write a frame in guest memory, we should have some capacity for it.
168+ // If not, we have a bug, so fail fast, since the device is
166169 // fundamentally broken.
167- let mut parsed_dc = self . parsed_descriptors . pop_front ( ) . expect (
168- "net: internal bug. Mismatch between written frame size and available descriptors" ,
169- ) ;
170+ debug_assert ! ( ! self . is_empty ( ) ) ;
171+
172+ self . used_bytes = bytes_written ;
170173
171- self . header_set_num_buffers ( 1 ) ;
172- self . iovec . drop_descriptor_chain ( & parsed_dc) ;
173- parsed_dc. length = size;
174- parsed_dc
174+ let mut used_heads: u16 = 0 ;
175+ let mut write_used = |head_index : u16 , bytes_written : u32 , rx_queue : & mut Queue | {
176+ if let Err ( err) = rx_queue. write_used_element (
177+ ( rx_queue. next_used + Wrapping ( self . used_descriptors ) ) . 0 ,
178+ head_index,
179+ bytes_written,
180+ ) {
181+ error ! (
182+ "net: Failed to add used descriptor {} of length {} to RX queue: {err}" ,
183+ head_index, bytes_written
184+ ) ;
185+ }
186+ used_heads += 1 ;
187+ self . used_descriptors += 1 ;
188+ } ;
189+
190+ for parsed_dc in self . parsed_descriptors . iter ( ) {
191+ if bytes_written <= parsed_dc. length {
192+ write_used ( parsed_dc. head_index , bytes_written, rx_queue) ;
193+ break ;
194+ } else {
195+ write_used ( parsed_dc. head_index , parsed_dc. length , rx_queue) ;
196+ bytes_written -= parsed_dc. length ;
197+ }
198+ }
199+
200+ self . header_set_num_buffers ( used_heads) ;
201+ for _ in 0 ..used_heads {
202+ let parsed_dc = self
203+ . parsed_descriptors
204+ . pop_front ( )
205+ . expect ( "This should never happen if write to the buffer succeded." ) ;
206+ self . iovec . drop_descriptor_chain ( & parsed_dc) ;
207+ }
175208 }
176209
177210 /// Write the number of descriptors used in VirtIO header
@@ -190,14 +223,24 @@ impl RxBuffers {
190223
191224 /// This will let the guest know that about all the `DescriptorChain` object that has been
192225 /// used to receive a frame from the TAP.
193- fn finish_frame ( & mut self , dc : & ParsedDescriptorChain , rx_queue : & mut Queue ) {
194- // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the
195- // `head_index` is not a valid descriptor id. `head_index` here is a valid
196- // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which
197- // checks for its validity. In other words, if this unwrap() fails there's a bug in our
198- // emulation logic which, most likely, we can't recover from. So, let's crash here
199- // instead of logging an error and continuing.
200- rx_queue. add_used ( dc. head_index , dc. length ) . unwrap ( ) ;
226+ fn finish_frame ( & mut self , rx_queue : & mut Queue ) {
227+ rx_queue. advance_used_ring ( self . used_descriptors ) ;
228+ self . used_descriptors = 0 ;
229+ self . used_bytes = 0 ;
230+ }
231+
232+ /// Return a slice of iovecs for the first slice in the buffer.
233+ ///
234+ /// # Safety
235+ /// Buffer needs to have at least one descriptor chain parsed.
236+ unsafe fn single_chain_slice_mut ( & mut self ) -> & mut [ iovec ] {
237+ let nr_iovecs = self . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
238+ & mut self . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs]
239+ }
240+
241+ /// Return a slice of iovecs for all descriptor chains in the buffer.
242+ fn all_chains_slice_mut ( & mut self ) -> & mut [ iovec ] {
243+ self . iovec . as_iovec_mut_slice ( )
201244 }
202245}
203246
@@ -260,6 +303,7 @@ impl Net {
260303 | 1 << VIRTIO_NET_F_HOST_TSO6
261304 | 1 << VIRTIO_NET_F_HOST_UFO
262305 | 1 << VIRTIO_F_VERSION_1
306+ | 1 << VIRTIO_NET_F_MRG_RXBUF
263307 | 1 << VIRTIO_RING_F_EVENT_IDX ;
264308
265309 let mut config_space = ConfigSpace :: default ( ) ;
@@ -407,27 +451,35 @@ impl Net {
407451 // Attempts to copy a single frame into the guest if there is enough
408452 // rate limiting budget.
409453 // Returns true on successful frame delivery.
410- fn rate_limited_rx_single_frame ( & mut self , dc : & ParsedDescriptorChain ) -> bool {
454+ fn rate_limited_rx_single_frame ( & mut self , frame_size : u32 ) -> bool {
411455 let rx_queue = & mut self . queues [ RX_INDEX ] ;
412- if !Self :: rate_limiter_consume_op ( & mut self . rx_rate_limiter , dc . length as u64 ) {
456+ if !Self :: rate_limiter_consume_op ( & mut self . rx_rate_limiter , frame_size as u64 ) {
413457 self . metrics . rx_rate_limiter_throttled . inc ( ) ;
414458 return false ;
415459 }
416460
417- self . rx_buffer . finish_frame ( dc , rx_queue) ;
461+ self . rx_buffer . finish_frame ( rx_queue) ;
418462 true
419463 }
420464
421465 /// Returns the minimum size of buffer we expect the guest to provide us depending on the
422466 /// features we have negotiated with it
423467 fn minimum_rx_buffer_size ( & self ) -> u32 {
424- if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
425- || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
426- || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
427- {
428- 65562
468+ if !self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
469+ if self . has_feature ( VIRTIO_NET_F_GUEST_TSO4 as u64 )
470+ || self . has_feature ( VIRTIO_NET_F_GUEST_TSO6 as u64 )
471+ || self . has_feature ( VIRTIO_NET_F_GUEST_UFO as u64 )
472+ {
473+ 65562
474+ } else {
475+ 1526
476+ }
429477 } else {
430- 1526
478+ // header is 12 bytes long
479+ #[ allow( clippy:: cast_possible_truncation) ]
480+ {
481+ vnet_hdr_len ( ) as u32
482+ }
431483 }
432484 }
433485
@@ -442,6 +494,9 @@ impl Net {
442494 if let Err ( err) = unsafe { self . rx_buffer . add_buffer ( mem, head) } {
443495 self . metrics . rx_fails . inc ( ) ;
444496 error ! ( "net: Could not parse an RX descriptor: {err}" ) ;
497+ // Notify queue about ready frames. We need this
498+ // to bring queue into up to date state.
499+ self . rx_buffer . finish_frame ( queue) ;
445500 // Try to add the bad descriptor to the used ring.
446501 if let Err ( err) = queue. add_used ( index, 0 ) {
447502 error ! (
@@ -528,16 +583,15 @@ impl Net {
528583 }
529584
530585 // We currently prioritize packets from the MMDS over regular network packets.
531- fn read_from_mmds_or_tap ( & mut self ) -> Result < Option < ParsedDescriptorChain > , NetError > {
532- // If we don't have any buffers available try to parse more from the RX queue. There might
533- // be some buffers we didn't get the chance to process, because we got to handle the TAP
534- // event before the RX queue event.
535- if self . rx_buffer . is_empty ( ) {
586+ fn read_from_mmds_or_tap ( & mut self ) -> Result < Option < NonZeroU32 > , NetError > {
587+ // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as
588+ // this is the max size of 1 packet.
589+ if self . rx_buffer . capacity ( ) < u16:: MAX as usize {
536590 self . parse_rx_descriptors ( ) ;
537591
538- // If after parsing the RX queue we still don't have any buffers stop processing RX
592+ // If after parsing the RX queue we still don't have enough capacity, stop processing RX
539593 // frames.
540- if self . rx_buffer . is_empty ( ) {
594+ if self . rx_buffer . capacity ( ) < u16 :: MAX as usize {
541595 return Ok ( None ) ;
542596 }
543597 }
@@ -553,29 +607,46 @@ impl Net {
553607 self . rx_buffer
554608 . iovec
555609 . write_all_volatile_at ( & self . rx_frame_buf [ ..vnet_hdr_len ( ) + len] , 0 ) ?;
610+
611+ // SAFETY:
612+ // * len is never 0
613+ // * Maximum size of the packet is 65562 which fits into u32
614+ #[ allow( clippy:: cast_possible_truncation) ]
615+ let len = unsafe { NonZeroU32 :: new_unchecked ( ( vnet_hdr_len ( ) + len) as u32 ) } ;
616+
556617 // SAFETY: This is safe:
557618 // * We checked that `rx_buffer` includes at least one `DescriptorChain`
558619 // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects
559620 // are at least that big.
560- let dc = unsafe {
621+ unsafe {
561622 self . rx_buffer
562- . mark_used ( ( vnet_hdr_len ( ) + len) . try_into ( ) . unwrap ( ) )
563- } ;
564-
565- return Ok ( Some ( dc) ) ;
623+ . mark_used ( len. get ( ) , & mut self . queues [ RX_INDEX ] ) ;
624+ }
625+ return Ok ( Some ( len) ) ;
566626 }
567627 }
568628
569629 // SAFETY: this is safe because we ensured that `self.rx_buffer` has at least one
570630 // DescriptorChain parsed in it.
571631 let len = unsafe { self . read_tap ( ) . map_err ( NetError :: IO ) } ?;
572632
633+ // SAFETY:
634+ // * len is never 0
635+ // * Maximum size of the packet is 65562 which fits into u32
636+ #[ allow( clippy:: cast_possible_truncation) ]
637+ let len = unsafe { NonZeroU32 :: new_unchecked ( ( vnet_hdr_len ( ) + len) as u32 ) } ;
638+
573639 // SAFETY: This is safe,
574640 // * `rx_buffer` has at least one `DescriptorChain`
575641 // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more
576642 // bytes than its capacity.
577- let dc = unsafe { self . rx_buffer . mark_used ( len. try_into ( ) . unwrap ( ) ) } ;
578- Ok ( Some ( dc) )
643+ // * Maximum size of the packet is 65562 which fits into u32
644+ #[ allow( clippy:: cast_possible_truncation) ]
645+ unsafe {
646+ self . rx_buffer
647+ . mark_used ( len. get ( ) , & mut self . queues [ RX_INDEX ] ) ;
648+ }
649+ Ok ( Some ( len) )
579650 }
580651
581652 /// Read as many frames as possible.
@@ -586,12 +657,11 @@ impl Net {
586657 self . metrics . no_rx_avail_buffer . inc ( ) ;
587658 break ;
588659 }
589- Ok ( Some ( dc ) ) => {
660+ Ok ( Some ( bytes ) ) => {
590661 self . metrics . rx_count . inc ( ) ;
591- self . metrics . rx_bytes_count . add ( dc . length as u64 ) ;
662+ self . metrics . rx_bytes_count . add ( bytes . get ( ) as u64 ) ;
592663 self . metrics . rx_packets_count . inc ( ) ;
593- if !self . rate_limited_rx_single_frame ( & dc) {
594- self . rx_buffer . deferred_descriptor = Some ( dc) ;
664+ if !self . rate_limited_rx_single_frame ( bytes. get ( ) ) {
595665 break ;
596666 }
597667 }
@@ -619,11 +689,10 @@ impl Net {
619689
620690 fn resume_rx ( & mut self ) -> Result < ( ) , DeviceError > {
621691 // First try to handle any deferred frame
622- if let Some ( deferred_descriptor ) = self . rx_buffer . deferred_descriptor . take ( ) {
692+ if self . rx_buffer . used_bytes != 0 {
623693 // If can't finish sending this frame, re-set it as deferred and return; we can't
624694 // process any more frames from the TAP.
625- if !self . rate_limited_rx_single_frame ( & deferred_descriptor) {
626- self . rx_buffer . deferred_descriptor = Some ( deferred_descriptor) ;
695+ if !self . rate_limited_rx_single_frame ( self . rx_buffer . used_bytes ) {
627696 return Ok ( ( ) ) ;
628697 }
629698 }
@@ -774,9 +843,13 @@ impl Net {
774843 ///
775844 /// `self.rx_buffer` needs to have at least one descriptor chain parsed
776845 pub unsafe fn read_tap ( & mut self ) -> std:: io:: Result < usize > {
777- let nr_iovecs = self . rx_buffer . parsed_descriptors [ 0 ] . nr_iovecs as usize ;
778- self . tap
779- . read_iovec ( & mut self . rx_buffer . iovec . as_iovec_mut_slice ( ) [ ..nr_iovecs] )
846+ let slice = if self . has_feature ( VIRTIO_NET_F_MRG_RXBUF as u64 ) {
847+ self . rx_buffer . all_chains_slice_mut ( )
848+ } else {
849+ // SAFETY: we only call this if `rx_buffer` is not empty.
850+ unsafe { self . rx_buffer . single_chain_slice_mut ( ) }
851+ } ;
852+ self . tap . read_iovec ( slice)
780853 }
781854
782855 fn write_tap ( tap : & mut Tap , buf : & IoVecBuffer ) -> std:: io:: Result < usize > {
@@ -1815,11 +1888,8 @@ pub mod tests {
18151888 unsafe { libc:: close ( th. net . lock ( ) . unwrap ( ) . tap . as_raw_fd ( ) ) } ;
18161889
18171890 // The RX queue is empty and there is a deferred frame.
1818- th. net ( ) . rx_buffer . deferred_descriptor = Some ( ParsedDescriptorChain {
1819- head_index : 1 ,
1820- length : 100 ,
1821- nr_iovecs : 1 ,
1822- } ) ;
1891+ th. net ( ) . rx_buffer . used_descriptors = 1 ;
1892+ th. net ( ) . rx_buffer . used_bytes = 100 ;
18231893 check_metric_after_block ! (
18241894 th. net( ) . metrics. no_rx_avail_buffer,
18251895 1 ,
@@ -1829,7 +1899,8 @@ pub mod tests {
18291899 // We need to set this here to false, otherwise the device will try to
18301900 // handle a deferred frame, it will fail and will never try to read from
18311901 // the tap.
1832- th. net ( ) . rx_buffer . deferred_descriptor = None ;
1902+ th. net ( ) . rx_buffer . used_descriptors = 0 ;
1903+ th. net ( ) . rx_buffer . used_bytes = 0 ;
18331904
18341905 th. add_desc_chain ( NetQueue :: Rx , 0 , & [ ( 0 , 4096 , VIRTQ_DESC_F_WRITE ) ] ) ;
18351906 check_metric_after_block ! (
0 commit comments