diff --git a/crates/djls-templates/src/ast.rs b/crates/djls-templates/src/ast.rs index bc69b5e1..08210aef 100644 --- a/crates/djls-templates/src/ast.rs +++ b/crates/djls-templates/src/ast.rs @@ -153,7 +153,7 @@ impl Span { #[must_use] pub fn from_token(token: &Token<'_>, db: &dyn TemplateDb) -> Self { - let start = token.start().unwrap_or(0); + let start = token.offset().unwrap_or(0); let length = token.length(db); Span::new(start, length) } diff --git a/crates/djls-templates/src/lexer.rs b/crates/djls-templates/src/lexer.rs index feb5083c..e417c739 100644 --- a/crates/djls-templates/src/lexer.rs +++ b/crates/djls-templates/src/lexer.rs @@ -2,10 +2,16 @@ use crate::db::Db as TemplateDb; use crate::tokens::Token; use crate::tokens::TokenContent; +const BLOCK_TAG_START: &str = "{%"; +const BLOCK_TAG_END: &str = "%}"; +const VARIABLE_TAG_START: &str = "{{"; +const VARIABLE_TAG_END: &str = "}}"; +const COMMENT_TAG_START: &str = "{#"; +const COMMENT_TAG_END: &str = "#}"; + pub struct Lexer<'db> { db: &'db dyn TemplateDb, source: String, - chars: Vec, start: usize, current: usize, line: usize, @@ -17,7 +23,6 @@ impl<'db> Lexer<'db> { Lexer { db, source: String::from(source), - chars: source.chars().collect(), start: 0, current: 0, line: 1, @@ -32,22 +37,14 @@ impl<'db> Lexer<'db> { let token = match self.peek() { '{' => match self.peek_next() { - '%' => self.lex_django_construct("%}", |content, line, start| Token::Block { - content, - line, - start, + '%' => self.lex_django_construct(BLOCK_TAG_END, |content, offset| { + Token::Block { content, offset } }), - '{' => { - self.lex_django_construct("}}", |content, line, start| Token::Variable { - content, - line, - start, - }) - } - '#' => self.lex_django_construct("#}", |content, line, start| Token::Comment { - content, - line, - start, + '{' => self.lex_django_construct(VARIABLE_TAG_END, |content, offset| { + Token::Variable { content, offset } + }), + '#' => self.lex_django_construct(COMMENT_TAG_END, |content, offset| { + Token::Comment { content, offset } }), _ => self.lex_text(), }, @@ -69,7 +66,7 @@ impl<'db> Lexer<'db> { tokens.push(token); } - tokens.push(Token::Eof { line: self.line }); + tokens.push(Token::Eof); tokens } @@ -77,10 +74,9 @@ impl<'db> Lexer<'db> { fn lex_django_construct( &mut self, end: &str, - token_fn: impl FnOnce(TokenContent<'db>, usize, usize) -> Token<'db>, + token_fn: impl FnOnce(TokenContent<'db>, usize) -> Token<'db>, ) -> Token<'db> { - let line = self.line; - let start = self.start + 3; + let offset = self.start + 3; self.consume_n(2); @@ -88,30 +84,25 @@ impl<'db> Lexer<'db> { Ok(text) => { self.consume_n(2); let content = TokenContent::new(self.db, text); - token_fn(content, line, start) + token_fn(content, offset) } Err(err_text) => { self.synchronize(); let content = TokenContent::new(self.db, err_text); - Token::Error { - content, - line, - start, - } + Token::Error { content, offset } } } } fn lex_whitespace(&mut self, c: char) -> Token<'db> { - let line = self.line; - let start = self.start; + let offset = self.start; if c == '\n' || c == '\r' { self.consume(); // \r or \n if c == '\r' && self.peek() == '\n' { self.consume(); // \n of \r\n } - Token::Newline { line, start } + Token::Newline { offset } } else { self.consume(); // Consume the first whitespace while !self.is_at_end() && self.peek().is_whitespace() { @@ -121,67 +112,64 @@ impl<'db> Lexer<'db> { self.consume(); } let count = self.current - self.start; - Token::Whitespace { count, line, start } + Token::Whitespace { count, offset } } } fn lex_text(&mut self) -> Token<'db> { - let line = self.line; - let start = self.start; + let text_start = self.current; - let mut text = String::new(); while !self.is_at_end() { - let c = self.peek(); - - if c == '{' { - let next = self.peek_next(); - if next == '%' || next == '{' || next == '#' { - break; - } - } else if c == '\n' { + if self.source[self.current..].starts_with(BLOCK_TAG_START) + || self.source[self.current..].starts_with(VARIABLE_TAG_START) + || self.source[self.current..].starts_with(COMMENT_TAG_START) + || self.source[self.current..].starts_with('\n') + { break; } - - text.push(c); self.consume(); } - let content = TokenContent::new(self.db, text); + let text = &self.source[text_start..self.current]; + let content = TokenContent::new(self.db, text.to_string()); Token::Text { content, - line, - start, + offset: self.start, } } + #[inline] fn peek(&self) -> char { - self.peek_at(0) + self.source[self.current..].chars().next().unwrap_or('\0') } fn peek_next(&self) -> char { - self.peek_at(1) + let mut chars = self.source[self.current..].chars(); + chars.next(); // Skip current + chars.next().unwrap_or('\0') } fn peek_previous(&self) -> char { - self.peek_at(-1) - } - - fn peek_at(&self, offset: isize) -> char { - let Some(index) = self.current.checked_add_signed(offset) else { + if self.current == 0 { return '\0'; - }; - self.chars.get(index).copied().unwrap_or('\0') + } + let mut pos = self.current - 1; + while !self.source.is_char_boundary(pos) && pos > 0 { + pos -= 1; + } + self.source[pos..].chars().next().unwrap_or('\0') } + #[inline] fn is_at_end(&self) -> bool { self.current >= self.source.len() } + #[inline] fn consume(&mut self) { - if self.is_at_end() { - return; + if let Some(ch) = self.source[self.current..].chars().next() { + self.current += ch.len_utf8(); } - self.current += 1; } fn consume_n(&mut self, count: usize) { @@ -190,25 +178,24 @@ impl<'db> Lexer<'db> { } } - fn consume_until(&mut self, s: &str) -> Result { - let start = self.current; - while !self.is_at_end() { - if self.chars[self.current..self.chars.len()] - .starts_with(s.chars().collect::>().as_slice()) - { - return Ok(self.source[start..self.current].trim().to_string()); + fn consume_until(&mut self, delimiter: &str) -> Result { + let offset = self.current; + + while self.current < self.source.len() { + if self.source[self.current..].starts_with(delimiter) { + return Ok(self.source[offset..self.current].trim().to_string()); } self.consume(); } - Err(self.source[start..self.current].trim().to_string()) + + Err(self.source[offset..self.current].trim().to_string()) } fn synchronize(&mut self) { - let sync_chars = &['{', '\n', '\r']; + const SYNC_POINTS: &[u8] = b"{\n\r"; while !self.is_at_end() { - let current_char = self.peek(); - if sync_chars.contains(¤t_char) { + if SYNC_POINTS.contains(&self.source.as_bytes()[self.current]) { return; } self.consume(); diff --git a/crates/djls-templates/src/parser.rs b/crates/djls-templates/src/parser.rs index a3cb70b7..dd8ee121 100644 --- a/crates/djls-templates/src/parser.rs +++ b/crates/djls-templates/src/parser.rs @@ -37,9 +37,9 @@ impl<'db> Parser<'db> { let tokens = self.tokens.stream(self.db); for token in tokens { if matches!(token, Token::Newline { .. }) { - let start = token.start(); - if let Some(start) = start { - line_offsets.add_line(start + 1); + let offset = token.offset(); + if let Some(offset) = offset { + line_offsets.add_line(offset + 1); } } } @@ -91,16 +91,13 @@ impl<'db> Parser<'db> { let token = self.peek_previous()?; if let Token::Error { - content, - line: _, - start, - .. + content, offset, .. } = token { let error_text = content.text(self.db).clone(); Err(ParserError::MalformedConstruct { - position: start, + position: offset, content: error_text, }) } else { @@ -152,8 +149,8 @@ impl<'db> Parser<'db> { return self.next_node(); } - let start = first_token.start().unwrap_or(0); - let mut end_position = start + first_token.length(self.db); + let offset = first_token.offset().unwrap_or(0); + let mut end_position = offset + first_token.length(self.db); while let Ok(token) = self.peek() { match token { @@ -164,16 +161,16 @@ impl<'db> Parser<'db> { | Token::Eof { .. } => break, // Stop at Django constructs Token::Text { .. } | Token::Whitespace { .. } | Token::Newline { .. } => { // Update end position - let token_start = token.start().unwrap_or(end_position); + let token_offset = token.offset().unwrap_or(end_position); let token_length = token.length(self.db); - end_position = token_start + token_length; + end_position = token_offset + token_length; self.consume()?; } } } - let length = end_position - start; - let span = Span::new(start, length); + let length = end_position - offset; + let span = Span::new(offset, length); Ok(Node::Text { span }) } diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap index 25222cd7..f53e193f 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap @@ -4,86 +4,62 @@ expression: snapshot --- - Text: content: "" - line: 1 - start: 0 + offset: 0 - Newline: - line: 1 - start: 21 + offset: 21 - Comment: content: Django comment - line: 2 - start: 25 + offset: 25 - Newline: - line: 2 - start: 42 + offset: 42 - Text: content: "" - line: 7 - start: 121 + offset: 121 - Newline: - line: 7 - start: 130 + offset: 130 - Text: content: "" - line: 10 - start: 161 -- Eof: - line: 10 + offset: 161 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_block.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_block.snap index b075d5aa..040679fb 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_block.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_block.snap @@ -4,23 +4,17 @@ expression: snapshot --- - Block: content: if user.is_staff - line: 1 - start: 3 + offset: 3 - Text: content: Admin - line: 1 - start: 22 + offset: 22 - Block: content: else - line: 1 - start: 30 + offset: 30 - Text: content: User - line: 1 - start: 37 + offset: 37 - Block: content: endif - line: 1 - start: 44 -- Eof: - line: 1 + offset: 44 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_variable.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_variable.snap index 15f61af8..ee95d81a 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_variable.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_variable.snap @@ -4,7 +4,5 @@ expression: snapshot --- - Variable: content: "user.name|default:\"Anonymous\"|title" - line: 1 - start: 3 -- Eof: - line: 1 + offset: 3 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_everything.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_everything.snap index f12ef527..8de91f4b 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_everything.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_everything.snap @@ -4,310 +4,225 @@ expression: snapshot --- - Text: content: "" - line: 1 - start: 0 + offset: 0 - Newline: - line: 1 - start: 15 + offset: 15 - Text: content: "" - line: 2 - start: 16 + offset: 16 - Newline: - line: 2 - start: 22 + offset: 22 - Text: content: "" - line: 3 - start: 23 + offset: 23 - Newline: - line: 3 - start: 29 + offset: 29 - Whitespace: count: 4 - line: 4 - start: 30 + offset: 30 - Text: content: "" - line: 7 - start: 122 + offset: 122 - Newline: - line: 7 - start: 130 + offset: 130 - Whitespace: count: 4 - line: 8 - start: 131 + offset: 131 - Text: content: "" - line: 14 - start: 273 + offset: 273 - Newline: - line: 14 - start: 282 + offset: 282 - Text: content: "" - line: 15 - start: 283 + offset: 283 - Newline: - line: 15 - start: 290 + offset: 290 - Text: content: "" - line: 16 - start: 291 + offset: 291 - Newline: - line: 16 - start: 297 + offset: 297 - Whitespace: count: 4 - line: 17 - start: 298 + offset: 298 - Text: content: "" - line: 17 - start: 302 + offset: 302 - Newline: - line: 17 - start: 325 + offset: 325 - Whitespace: count: 4 - line: 18 - start: 326 + offset: 326 - Text: content: "
" - line: 18 - start: 330 + offset: 330 - Newline: - line: 18 - start: 386 + offset: 386 - Whitespace: count: 8 - line: 19 - start: 387 + offset: 387 - Block: content: if user.is_authenticated - line: 19 - start: 398 + offset: 398 - Newline: - line: 19 - start: 425 + offset: 425 - Whitespace: count: 12 - line: 20 - start: 426 + offset: 426 - Comment: content: Welcome message - line: 20 - start: 441 + offset: 441 - Newline: - line: 20 - start: 459 + offset: 459 - Whitespace: count: 12 - line: 21 - start: 460 + offset: 460 - Text: content: "

Welcome, " - line: 21 - start: 472 + offset: 472 - Variable: content: "user.name|default:\"Guest\"|title" - line: 21 - start: 488 + offset: 488 - Text: content: "!

" - line: 21 - start: 522 + offset: 522 - Newline: - line: 21 - start: 528 + offset: 528 - Whitespace: count: 12 - line: 22 - start: 529 + offset: 529 - Block: content: if user.is_staff - line: 22 - start: 544 + offset: 544 - Newline: - line: 22 - start: 563 + offset: 563 - Whitespace: count: 16 - line: 23 - start: 564 + offset: 564 - Text: content: "Admin" - line: 23 - start: 580 + offset: 580 - Newline: - line: 23 - start: 598 + offset: 598 - Whitespace: count: 12 - line: 24 - start: 599 + offset: 599 - Block: content: else - line: 24 - start: 614 + offset: 614 - Newline: - line: 24 - start: 621 + offset: 621 - Whitespace: count: 16 - line: 25 - start: 622 + offset: 622 - Text: content: "User" - line: 25 - start: 638 + offset: 638 - Newline: - line: 25 - start: 655 + offset: 655 - Whitespace: count: 12 - line: 26 - start: 656 + offset: 656 - Block: content: endif - line: 26 - start: 671 + offset: 671 - Newline: - line: 26 - start: 679 + offset: 679 - Whitespace: count: 8 - line: 27 - start: 680 + offset: 680 - Block: content: endif - line: 27 - start: 691 + offset: 691 - Newline: - line: 27 - start: 699 + offset: 699 - Whitespace: count: 4 - line: 28 - start: 700 + offset: 700 - Text: content: "
" - line: 28 - start: 704 + offset: 704 - Newline: - line: 28 - start: 710 + offset: 710 - Text: content: "" - line: 29 - start: 711 + offset: 711 - Newline: - line: 29 - start: 718 + offset: 718 - Text: content: "" - line: 30 - start: 719 -- Eof: - line: 30 + offset: 719 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_html.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_html.snap index cda99520..080753c3 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_html.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_html.snap @@ -4,7 +4,5 @@ expression: snapshot --- - Text: content: "
" - line: 1 - start: 0 -- Eof: - line: 1 + offset: 0 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_nested_delimiters.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_nested_delimiters.snap index aae6d228..30fe3d0a 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_nested_delimiters.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_nested_delimiters.snap @@ -4,35 +4,25 @@ expression: snapshot --- - Variable: content: user.name - line: 1 - start: 3 + offset: 3 - Newline: - line: 1 - start: 15 + offset: 15 - Block: content: if true - line: 2 - start: 19 + offset: 19 - Newline: - line: 2 - start: 29 + offset: 29 - Comment: content: comment - line: 3 - start: 33 + offset: 33 - Newline: - line: 3 - start: 43 + offset: 43 - Text: content: "" - line: 4 - start: 44 + offset: 44 - Newline: - line: 4 - start: 65 + offset: 65 - Text: content: "
text
" - line: 5 - start: 66 -- Eof: - line: 5 + offset: 66 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_script.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_script.snap index 1a5fe4ca..ac6fbf48 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_script.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_script.snap @@ -4,69 +4,50 @@ expression: snapshot --- - Text: content: "" - line: 7 - start: 132 -- Eof: - line: 7 + offset: 132 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_style.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_style.snap index 41507043..1fd19d23 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_style.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_style.snap @@ -4,58 +4,42 @@ expression: snapshot --- - Text: content: "" - line: 6 - start: 89 -- Eof: - line: 6 + offset: 89 +- Eof diff --git a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_unclosed_style.snap b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_unclosed_style.snap index 7c5a1703..ecad33de 100644 --- a/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_unclosed_style.snap +++ b/crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_unclosed_style.snap @@ -4,7 +4,5 @@ expression: snapshot --- - Text: content: "