7
7
// option. This file may not be copied, modified, or distributed
8
8
// except according to those terms.
9
9
10
+ //! The `BufferQueue` struct and helper types.
11
+ //!
12
+ //! This type is designed for the efficient parsing of string data, especially where many
13
+ //! significant characters are from the ascii range 0-63. This includes, for example, important
14
+ //! characters in xml/html parsing.
15
+ //!
16
+ //! Good and predictable performance is achieved by avoiding allocation where possible (a.k.a. zero
17
+ //! copy).
18
+ //!
19
+ //! [`BufferQueue`]: struct.BufferQueue.html
20
+
10
21
11
22
use std:: collections:: VecDeque ;
12
23
@@ -15,39 +26,53 @@ use tendril::StrTendril;
15
26
pub use self :: SetResult :: { FromSet , NotFromSet } ;
16
27
use util:: smallcharset:: SmallCharSet ;
17
28
18
- /// Result from `pop_except_from`.
29
+ /// Result from [`pop_except_from`] containing either a character from a [`SmallCharSet`], or a
30
+ /// string buffer of characters not from the set.
31
+ ///
32
+ /// [`pop_except_from`]: struct.BufferQueue.html#method.pop_except_from
33
+ /// [`SmallCharSet`]: ../struct.SmallCharSet.html
19
34
#[ derive( PartialEq , Eq , Debug ) ]
20
35
pub enum SetResult {
36
+ /// A character from the `SmallCharSet`.
21
37
FromSet ( char ) ,
38
+ /// A string buffer containing no characters from the `SmallCharSet`.
22
39
NotFromSet ( StrTendril ) ,
23
40
}
24
41
25
- /// A queue of owned string buffers, which supports incrementally
26
- /// consuming characters.
42
+ /// A queue of owned string buffers, which supports incrementally consuming characters.
43
+ ///
44
+ /// Internally it uses [`VecDeque`] and has the same complexity properties.
45
+ ///
46
+ /// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
27
47
pub struct BufferQueue {
28
48
/// Buffers to process.
29
49
buffers : VecDeque < StrTendril > ,
30
50
}
31
51
32
52
impl BufferQueue {
33
53
/// Create an empty BufferQueue.
54
+ #[ inline]
34
55
pub fn new ( ) -> BufferQueue {
35
56
BufferQueue {
36
57
buffers : VecDeque :: with_capacity ( 16 ) ,
37
58
}
38
59
}
39
60
40
61
/// Returns whether the queue is empty.
62
+ #[ inline]
41
63
pub fn is_empty ( & self ) -> bool {
42
64
self . buffers . is_empty ( )
43
65
}
44
66
45
- /// Get the tendril at the beginning of the queue.
67
+ /// Get the buffer at the beginning of the queue.
68
+ #[ inline]
46
69
pub fn pop_front ( & mut self ) -> Option < StrTendril > {
47
70
self . buffers . pop_front ( )
48
71
}
49
72
50
73
/// Add a buffer to the beginning of the queue.
74
+ ///
75
+ /// If the buffer is empty, it will be skipped.
51
76
pub fn push_front ( & mut self , buf : StrTendril ) {
52
77
if buf. len32 ( ) == 0 {
53
78
return ;
@@ -56,20 +81,25 @@ impl BufferQueue {
56
81
}
57
82
58
83
/// Add a buffer to the end of the queue.
84
+ ///
85
+ /// If the buffer is empty, it will be skipped.
59
86
pub fn push_back ( & mut self , buf : StrTendril ) {
60
87
if buf. len32 ( ) == 0 {
61
88
return ;
62
89
}
63
90
self . buffers . push_back ( buf) ;
64
91
}
65
92
66
- /// Look at the next available character, if any .
93
+ /// Look at the next available character without removing it , if the queue is not empty .
67
94
pub fn peek ( & self ) -> Option < char > {
68
- // Invariant: all buffers in the queue are non-empty.
95
+ debug_assert ! ( self . buffers. iter( ) . skip_while( |el| el. len32( ) != 0 ) . next( ) . is_none( ) ,
96
+ "invariant \" all buffers in the queue are non-empty\" failed" ) ;
69
97
self . buffers . front ( ) . map ( |b| b. chars ( ) . next ( ) . unwrap ( ) )
70
98
}
71
99
72
- /// Get the next character, if one is available.
100
+ /// Get the next character if one is available, removing it from the queue.
101
+ ///
102
+ /// This function manages the buffers, removing them as they become empty.
73
103
pub fn next ( & mut self ) -> Option < char > {
74
104
let ( result, now_empty) = match self . buffers . front_mut ( ) {
75
105
None => ( None , false ) ,
@@ -87,9 +117,32 @@ impl BufferQueue {
87
117
}
88
118
89
119
/// Pops and returns either a single character from the given set, or
90
- /// a `StrTendril` of characters none of which are in the set. The set
91
- /// is represented as a bitmask and so can only contain the first 64
92
- /// ASCII characters.
120
+ /// a buffer of characters none of which are in the set.
121
+ ///
122
+ /// # Examples
123
+ ///
124
+ /// ```
125
+ /// # #[macro_use] extern crate markup5ever;
126
+ /// # #[macro_use] extern crate tendril;
127
+ /// # fn main() {
128
+ /// use markup5ever::buffer_queue::{BufferQueue, SetResult};
129
+ ///
130
+ /// let mut queue = BufferQueue::new();
131
+ /// queue.push_back(format_tendril!(r#"<some_tag attr="text">SomeText</some_tag>"#));
132
+ /// let set = small_char_set!(b'<' b'>' b' ' b'=' b'"' b'/');
133
+ /// let tag = format_tendril!("some_tag");
134
+ /// let attr = format_tendril!("attr");
135
+ /// let attr_val = format_tendril!("text");
136
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('<')));
137
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(tag)));
138
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet(' ')));
139
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr)));
140
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('=')));
141
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('"')));
142
+ /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr_val)));
143
+ /// // ...
144
+ /// # }
145
+ /// ```
93
146
pub fn pop_except_from ( & mut self , set : SmallCharSet ) -> Option < SetResult > {
94
147
let ( result, now_empty) = match self . buffers . front_mut ( ) {
95
148
None => ( None , false ) ,
@@ -117,12 +170,29 @@ impl BufferQueue {
117
170
result
118
171
}
119
172
120
- // Check if the next characters are an ASCII case-insensitive match for
121
- // `pat`, which must be non-empty.
122
- //
123
- // If so, consume them and return Some(true).
124
- // If they do not match, return Some(false).
125
- // If not enough characters are available to know, return None.
173
+ /// Consume bytes matching the pattern, using a custom comparison function `eq`.
174
+ ///
175
+ /// Returns `Some(true)` if there is a match, `Some(false)` if there is no match, or `None` if
176
+ /// it wasn't possible to know (more data is needed).
177
+ ///
178
+ /// The custom comparison function is used elsewhere to compare ascii-case-insensitively.
179
+ ///
180
+ /// # Examples
181
+ ///
182
+ /// ```
183
+ /// # extern crate markup5ever;
184
+ /// # #[macro_use] extern crate tendril;
185
+ /// # fn main() {
186
+ /// use markup5ever::buffer_queue::{BufferQueue};
187
+ ///
188
+ /// let mut queue = BufferQueue::new();
189
+ /// queue.push_back(format_tendril!("testtext"));
190
+ /// let test_str = "test";
191
+ /// assert_eq!(queue.eat("test", |&a, &b| a == b), Some(true));
192
+ /// assert_eq!(queue.eat("text", |&a, &b| a == b), Some(true));
193
+ /// assert!(queue.is_empty());
194
+ /// # }
195
+ /// ```
126
196
pub fn eat < F : Fn ( & u8 , & u8 ) -> bool > ( & mut self , pat : & str , eq : F ) -> Option < bool > {
127
197
let mut buffers_exhausted = 0 ;
128
198
let mut consumed_from_last = 0 ;
0 commit comments