|
| 1 | +use std::str::Chars; |
| 2 | + |
| 3 | +/// Peekable iterator over a char sequence. |
| 4 | +/// |
| 5 | +/// Next characters can be peeked via `first` method, |
| 6 | +/// and position can be shifted forward via `bump` method. |
| 7 | +/// based on: |
| 8 | +/// - <https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs> |
| 9 | +/// - <https://github.com/astral-sh/ruff/blob/d1079680bb29f6b797b5df15327195300f635f3c/crates/ruff_python_parser/src/lexer/cursor.rs> |
| 10 | +/// |
| 11 | +pub(crate) struct Cursor<'a> { |
| 12 | + /// Iterator over chars. Slightly faster than a &str. |
| 13 | + chars: Chars<'a>, |
| 14 | + len_remaining: usize, |
| 15 | +} |
| 16 | + |
| 17 | +pub(crate) const EOF_CHAR: char = '\0'; |
| 18 | + |
| 19 | +impl<'a> Cursor<'a> { |
| 20 | + pub(crate) fn new(input: &'a str) -> Cursor<'a> { |
| 21 | + Cursor { |
| 22 | + len_remaining: input.len(), |
| 23 | + chars: input.chars(), |
| 24 | + } |
| 25 | + } |
| 26 | + |
| 27 | + /// Peeks the next symbol from the input stream without consuming it. |
| 28 | + /// If requested position doesn't exist, `EOF_CHAR` is returned. |
| 29 | + /// However, getting `EOF_CHAR` doesn't always mean actual end of file, |
| 30 | + /// it should be checked with `is_eof` method. |
| 31 | + pub(crate) fn first(&self) -> char { |
| 32 | + // `.next()` optimizes better than `.nth(0)` |
| 33 | + self.chars.clone().next().unwrap_or(EOF_CHAR) |
| 34 | + } |
| 35 | + |
| 36 | + /// Checks if there is nothing more to consume. |
| 37 | + pub(crate) fn is_eof(&self) -> bool { |
| 38 | + self.chars.as_str().is_empty() |
| 39 | + } |
| 40 | + |
| 41 | + /// Returns amount of already consumed symbols. |
| 42 | + pub(crate) fn pos_within_token(&self) -> u32 { |
| 43 | + (self.len_remaining - self.chars.as_str().len()) as u32 |
| 44 | + } |
| 45 | + |
| 46 | + /// Resets the number of bytes consumed to 0. |
| 47 | + pub(crate) fn reset_pos_within_token(&mut self) { |
| 48 | + self.len_remaining = self.chars.as_str().len(); |
| 49 | + } |
| 50 | + |
| 51 | + /// Moves to the next character. |
| 52 | + pub(crate) fn bump(&mut self) -> Option<char> { |
| 53 | + let c = self.chars.next()?; |
| 54 | + Some(c) |
| 55 | + } |
| 56 | + |
| 57 | + /// Eats symbols while predicate returns true or until the end of file is reached. |
| 58 | + pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { |
| 59 | + // It was tried making optimized version of this for eg. line comments, but |
| 60 | + // LLVM can inline all of this and compile it down to fast iteration over bytes. |
| 61 | + while predicate(self.first()) && !self.is_eof() { |
| 62 | + self.bump(); |
| 63 | + } |
| 64 | + } |
| 65 | +} |
0 commit comments