11use crate :: base:: SharedEncoding ;
22use crate :: rewriter:: RewritingError ;
3- use encoding_rs:: { CoderResult , Decoder , Encoding } ;
3+ use encoding_rs:: { CoderResult , Decoder , Encoding , UTF_8 } ;
44
55pub ( crate ) struct TextDecoder {
66 encoding : SharedEncoding ,
@@ -27,7 +27,6 @@ impl TextDecoder {
2727 ) -> Result < ( ) , RewritingError > {
2828 if self . pending_text_streaming_decoder . is_some ( ) {
2929 self . feed_text ( & [ ] , true , output_handler) ?;
30- self . pending_text_streaming_decoder = None ;
3130 }
3231 Ok ( ( ) )
3332 }
@@ -40,13 +39,25 @@ impl TextDecoder {
4039 output_handler : & mut dyn FnMut ( & str , bool , & ' static Encoding ) -> Result < ( ) , RewritingError > ,
4140 ) -> Result < ( ) , RewritingError > {
4241 let encoding = self . encoding . get ( ) ;
43- let buffer = self . text_buffer . as_mut_str ( ) ;
42+
43+ if let Some ( ( utf8_text, rest) ) = self . split_utf8_start ( raw_input, encoding) {
44+ raw_input = rest;
45+ let really_last = last_in_text_node && rest. is_empty ( ) ;
46+
47+ ( output_handler) ( utf8_text, really_last, encoding) ?;
48+
49+ if really_last {
50+ debug_assert ! ( self . pending_text_streaming_decoder. is_none( ) ) ;
51+ return Ok ( ( ) ) ;
52+ }
53+ } ;
4454
4555 let decoder = self
4656 . pending_text_streaming_decoder
4757 . get_or_insert_with ( || encoding. new_decoder_without_bom_handling ( ) ) ;
4858
4959 loop {
60+ let buffer = self . text_buffer . as_mut_str ( ) ;
5061 let ( status, read, written, ..) =
5162 decoder. decode_to_str ( raw_input, buffer, last_in_text_node) ;
5263
@@ -60,9 +71,49 @@ impl TextDecoder {
6071 }
6172
6273 if finished_decoding {
74+ if last_in_text_node {
75+ self . pending_text_streaming_decoder = None ;
76+ }
6377 return Ok ( ( ) ) ;
6478 }
6579 raw_input = & raw_input[ read..] ;
6680 }
6781 }
82+
83+ /// Fast path for UTF-8 or ASCII prefix
84+ ///
85+ /// Returns UTF-8 text to emit + remaining bytes, or `None` if the fast path is not available
86+ #[ inline]
87+ fn split_utf8_start < ' i > (
88+ & self ,
89+ raw_input : & ' i [ u8 ] ,
90+ encoding : & ' static Encoding ,
91+ ) -> Option < ( & ' i str , & ' i [ u8 ] ) > {
92+ // Can't use the fast path if the decoder may have buffered some bytes
93+ if self . pending_text_streaming_decoder . is_some ( ) {
94+ return None ;
95+ }
96+
97+ let text_or_len = if encoding == UTF_8 {
98+ std:: str:: from_utf8 ( raw_input) . map_err ( |err| err. valid_up_to ( ) )
99+ } else {
100+ debug_assert ! ( encoding. is_ascii_compatible( ) ) ;
101+ Err ( Encoding :: ascii_valid_up_to ( raw_input) )
102+ } ;
103+
104+ match text_or_len {
105+ Ok ( utf8_text) => Some ( ( utf8_text, & [ ] [ ..] ) ) ,
106+ Err ( valid_up_to) => {
107+ // The slow path buffers 1KB, and even though this shouldn't matter,
108+ // it is an observable behavior, and it makes bugs worse for text handlers
109+ // that assume they'll get only a single chunk.
110+ if valid_up_to != raw_input. len ( ) && valid_up_to < self . text_buffer . len ( ) {
111+ return None ;
112+ }
113+
114+ let ( text, rest) = raw_input. split_at_checked ( valid_up_to) ?;
115+ Some ( ( std:: str:: from_utf8 ( text) . ok ( ) ?, rest) )
116+ }
117+ }
118+ }
68119}
0 commit comments