@@ -229,6 +229,8 @@ use std::borrow::Cow;
229229#[ cfg( feature = "overlapped-lists" ) ]
230230use std:: collections:: VecDeque ;
231231use std:: io:: BufRead ;
232+ #[ cfg( feature = "overlapped-lists" ) ]
233+ use std:: num:: NonZeroUsize ;
232234
233235pub ( crate ) const INNER_VALUE : & str = "$value" ;
234236pub ( crate ) const UNFLATTEN_PREFIX : & str = "$unflatten=" ;
@@ -277,6 +279,12 @@ where
277279 /// moved from this to [`Self::read`].
278280 #[ cfg( feature = "overlapped-lists" ) ]
279281 write : VecDeque < DeEvent < ' de > > ,
282+ /// Maximum number of events that can be skipped when processing sequences
283+ /// that occur out-of-order. This field is used to prevent potential
284+ /// denial-of-service (DoS) attacks which could cause infinite memory
285+ /// consumption when parsing a very large amount of XML into a sequence field.
286+ #[ cfg( feature = "overlapped-lists" ) ]
287+ limit : Option < NonZeroUsize > ,
280288
281289 #[ cfg( not( feature = "overlapped-lists" ) ) ]
282290 peek : Option < DeEvent < ' de > > ,
@@ -375,6 +383,8 @@ where
375383 read : VecDeque :: new ( ) ,
376384 #[ cfg( feature = "overlapped-lists" ) ]
377385 write : VecDeque :: new ( ) ,
386+ #[ cfg( feature = "overlapped-lists" ) ]
387+ limit : None ,
378388
379389 #[ cfg( not( feature = "overlapped-lists" ) ) ]
380390 peek : None ,
@@ -387,6 +397,71 @@ where
387397 Self :: new ( reader)
388398 }
389399
400+ /// Set the maximum number of events that could be skipped during deserialization
401+ /// of sequences.
402+ ///
403+ /// If `<element>` contains more than specified nested elements, `#text` or
404+ /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
405+ /// deserialization of sequence field (any type that uses [`deserialize_seq`]
406+ /// for the deserialization, for example, `Vec<T>`).
407+ ///
408+ /// This method can be used to prevent a [DoS] attack and infinite memory
409+ /// consumption when parsing a very large XML to a sequence field.
410+ ///
411+ /// It is strongly recommended to set limit to some value when you parse data
412+ /// from untrusted sources. You should choose a value that your typical XMLs
413+ /// can have _between_ different elements that corresponds to the same sequence.
414+ ///
415+ /// # Examples
416+ ///
417+ /// Let's imagine, that we deserialize such structure:
418+ /// ```
419+ /// struct List {
420+ /// item: Vec<()>,
421+ /// }
422+ /// ```
423+ ///
424+ /// The XML that we try to parse look like this:
425+ /// ```xml
426+ /// <any-name>
427+ /// <item/>
428+ /// <!-- Bufferization starts at this point -->
429+ /// <another-item>
430+ /// <some-element>with text</some-element>
431+ /// <yet-another-element/>
432+ /// </another-item>
433+ /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
434+ /// <item/>
435+ /// <!-- There is nothing to buffer, because elements follows each other -->
436+ /// <item/>
437+ /// </any-name>
438+ /// ```
439+ ///
440+ /// There, when we deserialize the `item` field, we need to buffer 7 events,
441+ /// before we can deserialize the second `<item/>`:
442+ ///
443+ /// - `<another-item>`
444+ /// - `<some-element>`
445+ /// - `#text(with text)`
446+ /// - `</some-element>`
447+ /// - `<yet-another-element/>` (virtual start event)
448+ /// - `<yet-another-element/>` (vitrual end event)
449+ /// - `</another-item>`
450+ ///
451+ /// Note, that `<yet-another-element/>` internally represented as 2 events:
452+ /// one for the start tag and one for the end tag. In the future this can be
453+ /// eliminated, but for now we use [auto-expanding feature] of a reader,
454+ /// because this simplifies deserializer code.
455+ ///
456+ /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
457+ /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
458+ /// [auto-expanding feature]: Reader::expand_empty_elements
459+ #[ cfg( feature = "overlapped-lists" ) ]
460+ pub fn event_buffer_size ( & mut self , limit : Option < NonZeroUsize > ) -> & mut Self {
461+ self . limit = limit;
462+ self
463+ }
464+
390465 #[ cfg( feature = "overlapped-lists" ) ]
391466 fn peek ( & mut self ) -> Result < & DeEvent < ' de > , DeError > {
392467 if self . read . is_empty ( ) {
@@ -435,7 +510,7 @@ where
435510 #[ cfg( feature = "overlapped-lists" ) ]
436511 fn skip ( & mut self ) -> Result < ( ) , DeError > {
437512 let event = self . next ( ) ?;
438- self . write . push_back ( event) ;
513+ self . skip_event ( event) ? ;
439514 match self . write . back ( ) {
440515 // Skip all subtree, if we skip a start event
441516 Some ( DeEvent :: Start ( e) ) => {
@@ -445,24 +520,36 @@ where
445520 let event = self . next ( ) ?;
446521 match event {
447522 DeEvent :: Start ( ref e) if e. name ( ) == end => {
448- self . write . push_back ( event) ;
523+ self . skip_event ( event) ? ;
449524 depth += 1 ;
450525 }
451526 DeEvent :: End ( ref e) if e. name ( ) == end => {
452- self . write . push_back ( event) ;
527+ self . skip_event ( event) ? ;
453528 if depth == 0 {
454529 return Ok ( ( ) ) ;
455530 }
456531 depth -= 1 ;
457532 }
458- _ => self . write . push_back ( event) ,
533+ _ => self . skip_event ( event) ? ,
459534 }
460535 }
461536 }
462537 _ => Ok ( ( ) ) ,
463538 }
464539 }
465540
541+ #[ cfg( feature = "overlapped-lists" ) ]
542+ #[ inline]
543+ fn skip_event ( & mut self , event : DeEvent < ' de > ) -> Result < ( ) , DeError > {
544+ if let Some ( max) = self . limit {
545+ if self . write . len ( ) >= max. get ( ) {
546+ return Err ( DeError :: TooManyEvents ( max) ) ;
547+ }
548+ }
549+ self . write . push_back ( event) ;
550+ Ok ( ( ) )
551+ }
552+
466553 /// Moves all buffered events to the end of [`Self::write`] buffer and swaps
467554 /// read and write buffers.
468555 ///
@@ -1169,6 +1256,38 @@ mod tests {
11691256
11701257 assert_eq ! ( de. next( ) . unwrap( ) , End ( BytesEnd :: borrowed( b"root" ) ) ) ;
11711258 }
1259+
1260+ /// Checks that limiting buffer size works correctly
1261+ #[ test]
1262+ fn limit ( ) {
1263+ use serde:: Deserialize ;
1264+
1265+ #[ derive( Debug , Deserialize ) ]
1266+ #[ allow( unused) ]
1267+ struct List {
1268+ item : Vec < ( ) > ,
1269+ }
1270+
1271+ let mut de = Deserializer :: from_slice (
1272+ br#"
1273+ <any-name>
1274+ <item/>
1275+ <another-item>
1276+ <some-element>with text</some-element>
1277+ <yet-another-element/>
1278+ </another-item>
1279+ <item/>
1280+ <item/>
1281+ </any-name>
1282+ "# ,
1283+ ) ;
1284+ de. event_buffer_size ( NonZeroUsize :: new ( 3 ) ) ;
1285+
1286+ match List :: deserialize ( & mut de) {
1287+ Err ( DeError :: TooManyEvents ( count) ) => assert_eq ! ( count. get( ) , 3 ) ,
1288+ e => panic ! ( "Expected `Err(TooManyEvents(3))`, but found {:?}" , e) ,
1289+ }
1290+ }
11721291 }
11731292
11741293 #[ test]
0 commit comments