@@ -83,86 +83,6 @@ pub(super) struct ReaderState {
8383}
8484
8585impl ReaderState {
86- /// reads `BytesElement` starting with a `!`,
87- /// return `Comment`, `CData` or `DocType` event
88- pub fn emit_bang < ' b > ( & mut self , bang_type : BangType , buf : & ' b [ u8 ] ) -> Result < Event < ' b > > {
89- let uncased_starts_with = |string : & [ u8 ] , prefix : & [ u8 ] | {
90- string. len ( ) >= prefix. len ( ) && string[ ..prefix. len ( ) ] . eq_ignore_ascii_case ( prefix)
91- } ;
92-
93- let len = buf. len ( ) ;
94- match bang_type {
95- BangType :: Comment if buf. starts_with ( b"!--" ) => {
96- debug_assert ! ( buf. ends_with( b"--" ) ) ;
97- if self . config . check_comments {
98- // search if '--' not in comments
99- let mut haystack = & buf[ 3 ..len - 2 ] ;
100- let mut off = 0 ;
101- while let Some ( p) = memchr:: memchr ( b'-' , haystack) {
102- off += p + 1 ;
103- // if next byte after `-` is also `-`, return an error
104- if buf[ 3 + off] == b'-' {
105- // Explanation of the magic:
106- //
107- // - `self.offset`` just after `>`,
108- // - `buf` contains `!-- con--tent --`
109- // - `p` is counted from byte after `<!--`
110- //
111- // <!-- con--tent -->:
112- // ~~~~~~~~~~~~~~~~ : - buf
113- // : =========== : - zone of search (possible values of `p`)
114- // : |---p : - p is counted from | (| is 0)
115- // : : : ^ - self.offset
116- // ^ : : - self.offset - len
117- // ^ : - self.offset - len + 2
118- // ^ - self.offset - len + 2 + p
119- self . last_error_offset = self . offset - len + 2 + p;
120- return Err ( Error :: IllFormed ( IllFormedError :: DoubleHyphenInComment ) ) ;
121- }
122- // Continue search after single `-` (+1 to skip it)
123- haystack = & haystack[ p + 1 ..] ;
124- }
125- }
126- Ok ( Event :: Comment ( BytesText :: wrap (
127- // Cut of `!--` and `--` from start and end
128- & buf[ 3 ..len - 2 ] ,
129- self . decoder ( ) ,
130- ) ) )
131- }
132- BangType :: CData if uncased_starts_with ( buf, b"![CDATA[" ) => {
133- debug_assert ! ( buf. ends_with( b"]]" ) ) ;
134- Ok ( Event :: CData ( BytesCData :: wrap (
135- // Cut of `![CDATA[` and `]]` from start and end
136- & buf[ 8 ..len - 2 ] ,
137- self . decoder ( ) ,
138- ) ) )
139- }
140- BangType :: DocType if uncased_starts_with ( buf, b"!DOCTYPE" ) => {
141- match buf[ 8 ..] . iter ( ) . position ( |& b| !is_whitespace ( b) ) {
142- Some ( start) => Ok ( Event :: DocType ( BytesText :: wrap (
143- // Cut of `!DOCTYPE` and any number of spaces from start
144- & buf[ 8 + start..] ,
145- self . decoder ( ) ,
146- ) ) ) ,
147- None => {
148- // Because we here, we at least read `<!DOCTYPE>` and offset after `>`.
149- // We want report error at place where name is expected - this is just
150- // before `>`
151- self . last_error_offset = self . offset - 1 ;
152- return Err ( Error :: IllFormed ( IllFormedError :: MissingDoctypeName ) ) ;
153- }
154- }
155- }
156- _ => {
157- // <!....>
158- // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`.
159- // ^------- We report error at that position, so we need to subtract 2 and buf len
160- self . last_error_offset = self . offset - len - 2 ;
161- Err ( bang_type. to_err ( ) )
162- }
163- }
164- }
165-
16686 /// Wraps content of `buf` into the [`Event::End`] event. Does the check that
16787 /// end name matches the last opened start name if `self.config.check_end_names` is set.
16888 pub fn emit_end < ' b > ( & mut self , buf : & ' b [ u8 ] ) -> Result < Event < ' b > > {
@@ -417,7 +337,57 @@ impl ReaderState {
417337 debug_assert ! ( content. starts_with( b"<!--" ) , "{:?}" , Bytes ( content) ) ;
418338 debug_assert ! ( content. ends_with( b"-->" ) , "{:?}" , Bytes ( content) ) ;
419339
420- self . emit_bang ( BangType :: Comment , & content[ 1 ..content. len ( ) - 1 ] )
340+ let bang_type = BangType :: Comment ;
341+ let buf = & content[ 1 ..content. len ( ) - 1 ] ;
342+ let uncased_starts_with = |string : & [ u8 ] , prefix : & [ u8 ] | {
343+ string. len ( ) >= prefix. len ( ) && string[ ..prefix. len ( ) ] . eq_ignore_ascii_case ( prefix)
344+ } ;
345+
346+ let len = buf. len ( ) ;
347+ match bang_type {
348+ BangType :: Comment if buf. starts_with ( b"!--" ) => {
349+ debug_assert ! ( buf. ends_with( b"--" ) ) ;
350+ if self . config . check_comments {
351+ // search if '--' not in comments
352+ let mut haystack = & buf[ 3 ..len - 2 ] ;
353+ let mut off = 0 ;
354+ while let Some ( p) = memchr:: memchr ( b'-' , haystack) {
355+ off += p + 1 ;
356+ // if next byte after `-` is also `-`, return an error
357+ if buf[ 3 + off] == b'-' {
358+ // Explanation of the magic:
359+ //
360+ // - `self.offset`` just after `>`,
361+ // - `buf` contains `!-- con--tent --`
362+ // - `p` is counted from byte after `<!--`
363+ //
364+ // <!-- con--tent -->:
365+ // ~~~~~~~~~~~~~~~~ : - buf
366+ // : =========== : - zone of search (possible values of `p`)
367+ // : |---p : - p is counted from | (| is 0)
368+ // : : : ^ - self.offset
369+ // ^ : : - self.offset - len
370+ // ^ : - self.offset - len + 2
371+ // ^ - self.offset - len + 2 + p
372+ self . last_error_offset = self . offset - len + 2 + p;
373+ return Err ( Error :: IllFormed ( IllFormedError :: DoubleHyphenInComment ) ) ;
374+ }
375+ haystack = & haystack[ p + 1 ..] ;
376+ }
377+ }
378+ Ok ( Event :: Comment ( BytesText :: wrap (
379+ & buf[ 3 ..len - 2 ] ,
380+ self . decoder ( ) ,
381+ ) ) )
382+ }
383+ _ => {
384+ // <!....>
385+ // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`.
386+ // ^------- We report error at that position, so we need to subtract 2 and buf len
387+ self . last_error_offset = self . offset - len - 2 ;
388+ Err ( bang_type. to_err ( ) )
389+ }
390+ }
421391 }
422392 FeedResult :: EmitDoctype ( _) => {
423393 debug_assert ! ( content. len( ) > 9 , "{:?}" , Bytes ( content) ) ;
@@ -428,7 +398,37 @@ impl ReaderState {
428398 ) ;
429399 debug_assert ! ( content. ends_with( b">" ) , "{:?}" , Bytes ( content) ) ;
430400
431- self . emit_bang ( BangType :: DocType , & content[ 1 ..content. len ( ) - 1 ] )
401+ let bang_type = BangType :: DocType ;
402+ let buf = & content[ 1 ..content. len ( ) - 1 ] ;
403+ let uncased_starts_with = |string : & [ u8 ] , prefix : & [ u8 ] | {
404+ string. len ( ) >= prefix. len ( ) && string[ ..prefix. len ( ) ] . eq_ignore_ascii_case ( prefix)
405+ } ;
406+
407+ let len = buf. len ( ) ;
408+ match bang_type {
409+ BangType :: DocType if uncased_starts_with ( buf, b"!DOCTYPE" ) => {
410+ match buf[ 8 ..] . iter ( ) . position ( |& b| !is_whitespace ( b) ) {
411+ Some ( start) => Ok ( Event :: DocType ( BytesText :: wrap (
412+ & buf[ 8 + start..] ,
413+ self . decoder ( ) ,
414+ ) ) ) ,
415+ None => {
416+ // Because we here, we at least read `<!DOCTYPE>` and offset after `>`.
417+ // We want report error at place where name is expected - this is just
418+ // before `>`
419+ self . last_error_offset = self . offset - 1 ;
420+ return Err ( Error :: IllFormed ( IllFormedError :: MissingDoctypeName ) ) ;
421+ }
422+ }
423+ }
424+ _ => {
425+ // <!....>
426+ // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`.
427+ // ^------- We report error at that position, so we need to subtract 2 and buf len
428+ self . last_error_offset = self . offset - len - 2 ;
429+ Err ( bang_type. to_err ( ) )
430+ }
431+ }
432432 }
433433 FeedResult :: EmitPI ( _) => {
434434 debug_assert ! ( content. starts_with( b"<?" ) , "{:?}" , Bytes ( content) ) ;
0 commit comments