11//! Utilities for validating string and char literals and turning them into
22//! values they represent.
33
4+ use std:: iter:: { Peekable , from_fn} ;
45use std:: ops:: Range ;
5- use std:: str:: Chars ;
6+ use std:: str:: CharIndices ;
67
78use Mode :: * ;
89
@@ -231,7 +232,7 @@ impl Mode {
231232}
232233
233234fn scan_escape < T : From < char > + From < u8 > > (
234- chars : & mut Chars < ' _ > ,
235+ chars : & mut impl Iterator < Item = char > ,
235236 mode : Mode ,
236237) -> Result < T , EscapeError > {
237238 // Previous character was '\\', unescape what follows.
@@ -268,7 +269,10 @@ fn scan_escape<T: From<char> + From<u8>>(
268269 Ok ( T :: from ( res) )
269270}
270271
271- fn scan_unicode ( chars : & mut Chars < ' _ > , allow_unicode_escapes : bool ) -> Result < char , EscapeError > {
272+ fn scan_unicode (
273+ chars : & mut impl Iterator < Item = char > ,
274+ allow_unicode_escapes : bool ,
275+ ) -> Result < char , EscapeError > {
272276 // We've parsed '\u', now we have to parse '{..}'.
273277
274278 if chars. next ( ) != Some ( '{' ) {
@@ -326,7 +330,10 @@ fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError>
326330 if allow_unicode_chars || c. is_ascii ( ) { Ok ( c) } else { Err ( EscapeError :: NonAsciiCharInByte ) }
327331}
328332
329- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
333+ fn unescape_char_or_byte (
334+ chars : & mut impl Iterator < Item = char > ,
335+ mode : Mode ,
336+ ) -> Result < char , EscapeError > {
330337 let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
331338 let res = match c {
332339 '\\' => scan_escape ( chars, mode) ,
@@ -346,63 +353,52 @@ fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, c
346353where
347354 F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
348355{
349- let mut chars = src. chars ( ) ;
350356 let allow_unicode_chars = mode. allow_unicode_chars ( ) ; // get this outside the loop
351357
352- // The `start` and `end` computation here is complicated because
353- // `skip_ascii_whitespace` makes us to skip over chars without counting
354- // them in the range computation.
355- while let Some ( c) = chars. next ( ) {
356- let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
358+ let mut chars = src. char_indices ( ) . peekable ( ) ;
359+ while let Some ( ( start, c) ) = chars. next ( ) {
357360 let res = match c {
358- '\\' => {
359- match chars. clone ( ) . next ( ) {
360- Some ( '\n' ) => {
361- // Rust language specification requires us to skip whitespaces
362- // if unescaped '\' character is followed by '\n'.
363- // For details see [Rust language reference]
364- // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
365- skip_ascii_whitespace ( & mut chars, start, & mut |range, err| {
366- callback ( range, Err ( err) )
367- } ) ;
368- continue ;
369- }
370- _ => scan_escape :: < T > ( & mut chars, mode) ,
371- }
361+ // skip whitespace for backslash newline, see [Rust language reference]
362+ // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
363+ '\\' if chars. next_if ( |& ( _, c) | c == '\n' ) . is_some ( ) => {
364+ let mut callback_err = |range, err| callback ( range, Err ( err) ) ;
365+ skip_ascii_whitespace ( & mut chars, start, & mut callback_err) ;
366+ continue ;
372367 }
368+ '\\' => scan_escape :: < T > ( & mut from_fn ( || chars. next ( ) . map ( |i| i. 1 ) ) , mode) ,
373369 '"' => Err ( EscapeError :: EscapeOnlyChar ) ,
374370 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
375371 _ => ascii_check ( c, allow_unicode_chars) . map ( T :: from) ,
376372 } ;
377- let end = src . len ( ) - chars . as_str ( ) . len ( ) ;
373+ let end = chars . peek ( ) . map ( | & ( end , _ ) | end ) . unwrap_or ( src . len ( ) ) ;
378374 callback ( start..end, res) ;
379375 }
380376}
381377
382- fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
378+ /// Skip ASCII whitespace, except for the formfeed character
379+ /// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
380+ /// Warns on unescaped newline and following non-ASCII whitespace.
381+ fn skip_ascii_whitespace < F > ( chars : & mut Peekable < CharIndices < ' _ > > , start : usize , callback : & mut F )
383382where
384383 F : FnMut ( Range < usize > , EscapeError ) ,
385384{
386- let tail = chars. as_str ( ) ;
387- let first_non_space = tail
388- . bytes ( )
389- . position ( |b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r' )
390- . unwrap_or ( tail. len ( ) ) ;
391- if tail[ 1 ..first_non_space] . contains ( '\n' ) {
392- // The +1 accounts for the escaping slash.
393- let end = start + first_non_space + 1 ;
385+ // the escaping slash and newline characters add 2 bytes
386+ let mut end = start + 2 ;
387+ let mut contains_nl = false ;
388+ while let Some ( ( _, c) ) = chars. next_if ( |& ( _, c) | c. is_ascii_whitespace ( ) && c != '\x0c' ) {
389+ end += 1 ;
390+ contains_nl = contains_nl || c == '\n' ;
391+ }
392+
393+ if contains_nl {
394394 callback ( start..end, EscapeError :: MultipleSkippedLinesWarning ) ;
395395 }
396- let tail = & tail[ first_non_space..] ;
397- if let Some ( c) = tail. chars ( ) . next ( ) {
396+ if let Some ( ( _, c) ) = chars. peek ( ) {
398397 if c. is_whitespace ( ) {
399- // For error reporting, we would like the span to contain the character that was not
400- // skipped. The +1 is necessary to account for the leading \ that started the escape.
401- let end = start + first_non_space + c. len_utf8 ( ) + 1 ;
402- callback ( start..end, EscapeError :: UnskippedWhitespaceWarning ) ;
398+ // for error reporting, include the character that was not skipped in the span
399+ callback ( start..end + c. len_utf8 ( ) , EscapeError :: UnskippedWhitespaceWarning ) ;
403400 }
404401 }
405- * chars = tail. chars ( ) ;
406402}
407403
408404/// Takes a contents of a string literal (without quotes) and produces a
0 commit comments