Skip to content

Commit d9920f5

Browse files
wip
1 parent 472a040 commit d9920f5

13 files changed

+267
-514
lines changed

crates/djls-templates/src/ast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ impl Span {
153153

154154
#[must_use]
155155
pub fn from_token(token: &Token<'_>, db: &dyn TemplateDb) -> Self {
156-
let start = token.start().unwrap_or(0);
156+
let start = token.offset().unwrap_or(0);
157157
let length = token.length(db);
158158
Span::new(start, length)
159159
}

crates/djls-templates/src/lexer.rs

Lines changed: 57 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@ use crate::db::Db as TemplateDb;
22
use crate::tokens::Token;
33
use crate::tokens::TokenContent;
44

5+
const BLOCK_TAG_START: &str = "{%";
6+
const BLOCK_TAG_END: &str = "%}";
7+
const VARIABLE_TAG_START: &str = "{{";
8+
const VARIABLE_TAG_END: &str = "}}";
9+
const COMMENT_TAG_START: &str = "{#";
10+
const COMMENT_TAG_END: &str = "#}";
11+
512
pub struct Lexer<'db> {
613
db: &'db dyn TemplateDb,
714
source: String,
8-
chars: Vec<char>,
915
start: usize,
1016
current: usize,
1117
line: usize,
@@ -17,7 +23,6 @@ impl<'db> Lexer<'db> {
1723
Lexer {
1824
db,
1925
source: String::from(source),
20-
chars: source.chars().collect(),
2126
start: 0,
2227
current: 0,
2328
line: 1,
@@ -32,22 +37,14 @@ impl<'db> Lexer<'db> {
3237

3338
let token = match self.peek() {
3439
'{' => match self.peek_next() {
35-
'%' => self.lex_django_construct("%}", |content, line, start| Token::Block {
36-
content,
37-
line,
38-
start,
40+
'%' => self.lex_django_construct(BLOCK_TAG_END, |content, offset| {
41+
Token::Block { content, offset }
3942
}),
40-
'{' => {
41-
self.lex_django_construct("}}", |content, line, start| Token::Variable {
42-
content,
43-
line,
44-
start,
45-
})
46-
}
47-
'#' => self.lex_django_construct("#}", |content, line, start| Token::Comment {
48-
content,
49-
line,
50-
start,
43+
'{' => self.lex_django_construct(VARIABLE_TAG_END, |content, offset| {
44+
Token::Variable { content, offset }
45+
}),
46+
'#' => self.lex_django_construct(COMMENT_TAG_END, |content, offset| {
47+
Token::Comment { content, offset }
5148
}),
5249
_ => self.lex_text(),
5350
},
@@ -69,49 +66,43 @@ impl<'db> Lexer<'db> {
6966
tokens.push(token);
7067
}
7168

72-
tokens.push(Token::Eof { line: self.line });
69+
tokens.push(Token::Eof);
7370

7471
tokens
7572
}
7673

7774
fn lex_django_construct(
7875
&mut self,
7976
end: &str,
80-
token_fn: impl FnOnce(TokenContent<'db>, usize, usize) -> Token<'db>,
77+
token_fn: impl FnOnce(TokenContent<'db>, usize) -> Token<'db>,
8178
) -> Token<'db> {
82-
let line = self.line;
83-
let start = self.start + 3;
79+
let offset = self.start + 3;
8480

8581
self.consume_n(2);
8682

8783
match self.consume_until(end) {
8884
Ok(text) => {
8985
self.consume_n(2);
9086
let content = TokenContent::new(self.db, text);
91-
token_fn(content, line, start)
87+
token_fn(content, offset)
9288
}
9389
Err(err_text) => {
9490
self.synchronize();
9591
let content = TokenContent::new(self.db, err_text);
96-
Token::Error {
97-
content,
98-
line,
99-
start,
100-
}
92+
Token::Error { content, offset }
10193
}
10294
}
10395
}
10496

10597
fn lex_whitespace(&mut self, c: char) -> Token<'db> {
106-
let line = self.line;
107-
let start = self.start;
98+
let offset = self.start;
10899

109100
if c == '\n' || c == '\r' {
110101
self.consume(); // \r or \n
111102
if c == '\r' && self.peek() == '\n' {
112103
self.consume(); // \n of \r\n
113104
}
114-
Token::Newline { line, start }
105+
Token::Newline { offset }
115106
} else {
116107
self.consume(); // Consume the first whitespace
117108
while !self.is_at_end() && self.peek().is_whitespace() {
@@ -121,67 +112,64 @@ impl<'db> Lexer<'db> {
121112
self.consume();
122113
}
123114
let count = self.current - self.start;
124-
Token::Whitespace { count, line, start }
115+
Token::Whitespace { count, offset }
125116
}
126117
}
127118

128119
fn lex_text(&mut self) -> Token<'db> {
129-
let line = self.line;
130-
let start = self.start;
120+
let text_start = self.current;
131121

132-
let mut text = String::new();
133122
while !self.is_at_end() {
134-
let c = self.peek();
135-
136-
if c == '{' {
137-
let next = self.peek_next();
138-
if next == '%' || next == '{' || next == '#' {
139-
break;
140-
}
141-
} else if c == '\n' {
123+
if self.source[self.current..].starts_with(BLOCK_TAG_START)
124+
|| self.source[self.current..].starts_with(VARIABLE_TAG_START)
125+
|| self.source[self.current..].starts_with(COMMENT_TAG_START)
126+
|| self.source[self.current..].starts_with('\n')
127+
{
142128
break;
143129
}
144-
145-
text.push(c);
146130
self.consume();
147131
}
148132

149-
let content = TokenContent::new(self.db, text);
133+
let text = &self.source[text_start..self.current];
134+
let content = TokenContent::new(self.db, text.to_string());
150135
Token::Text {
151136
content,
152-
line,
153-
start,
137+
offset: self.start,
154138
}
155139
}
156140

141+
#[inline]
157142
fn peek(&self) -> char {
158-
self.peek_at(0)
143+
self.source[self.current..].chars().next().unwrap_or('\0')
159144
}
160145

161146
fn peek_next(&self) -> char {
162-
self.peek_at(1)
147+
let mut chars = self.source[self.current..].chars();
148+
chars.next(); // Skip current
149+
chars.next().unwrap_or('\0')
163150
}
164151

165152
fn peek_previous(&self) -> char {
166-
self.peek_at(-1)
167-
}
168-
169-
fn peek_at(&self, offset: isize) -> char {
170-
let Some(index) = self.current.checked_add_signed(offset) else {
153+
if self.current == 0 {
171154
return '\0';
172-
};
173-
self.chars.get(index).copied().unwrap_or('\0')
155+
}
156+
let mut pos = self.current - 1;
157+
while !self.source.is_char_boundary(pos) && pos > 0 {
158+
pos -= 1;
159+
}
160+
self.source[pos..].chars().next().unwrap_or('\0')
174161
}
175162

163+
#[inline]
176164
fn is_at_end(&self) -> bool {
177165
self.current >= self.source.len()
178166
}
179167

168+
#[inline]
180169
fn consume(&mut self) {
181-
if self.is_at_end() {
182-
return;
170+
if let Some(ch) = self.source[self.current..].chars().next() {
171+
self.current += ch.len_utf8();
183172
}
184-
self.current += 1;
185173
}
186174

187175
fn consume_n(&mut self, count: usize) {
@@ -190,25 +178,24 @@ impl<'db> Lexer<'db> {
190178
}
191179
}
192180

193-
fn consume_until(&mut self, s: &str) -> Result<String, String> {
194-
let start = self.current;
195-
while !self.is_at_end() {
196-
if self.chars[self.current..self.chars.len()]
197-
.starts_with(s.chars().collect::<Vec<_>>().as_slice())
198-
{
199-
return Ok(self.source[start..self.current].trim().to_string());
181+
fn consume_until(&mut self, delimiter: &str) -> Result<String, String> {
182+
let offset = self.current;
183+
184+
while self.current < self.source.len() {
185+
if self.source[self.current..].starts_with(delimiter) {
186+
return Ok(self.source[offset..self.current].trim().to_string());
200187
}
201188
self.consume();
202189
}
203-
Err(self.source[start..self.current].trim().to_string())
190+
191+
Err(self.source[offset..self.current].trim().to_string())
204192
}
205193

206194
fn synchronize(&mut self) {
207-
let sync_chars = &['{', '\n', '\r'];
195+
const SYNC_POINTS: &[u8] = b"{\n\r";
208196

209197
while !self.is_at_end() {
210-
let current_char = self.peek();
211-
if sync_chars.contains(&current_char) {
198+
if SYNC_POINTS.contains(&self.source.as_bytes()[self.current]) {
212199
return;
213200
}
214201
self.consume();

crates/djls-templates/src/parser.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ impl<'db> Parser<'db> {
3737
let tokens = self.tokens.stream(self.db);
3838
for token in tokens {
3939
if matches!(token, Token::Newline { .. }) {
40-
let start = token.start();
40+
let start = token.offset();
4141
if let Some(start) = start {
4242
line_offsets.add_line(start + 1);
4343
}
@@ -92,8 +92,7 @@ impl<'db> Parser<'db> {
9292

9393
if let Token::Error {
9494
content,
95-
line: _,
96-
start,
95+
offset: start,
9796
..
9897
} = token
9998
{
@@ -152,7 +151,7 @@ impl<'db> Parser<'db> {
152151
return self.next_node();
153152
}
154153

155-
let start = first_token.start().unwrap_or(0);
154+
let start = first_token.offset().unwrap_or(0);
156155
let mut end_position = start + first_token.length(self.db);
157156

158157
while let Ok(token) = self.peek() {
@@ -164,7 +163,7 @@ impl<'db> Parser<'db> {
164163
| Token::Eof { .. } => break, // Stop at Django constructs
165164
Token::Text { .. } | Token::Whitespace { .. } | Token::Newline { .. } => {
166165
// Update end position
167-
let token_start = token.start().unwrap_or(end_position);
166+
let token_start = token.offset().unwrap_or(end_position);
168167
let token_length = token.length(self.db);
169168
end_position = token_start + token_length;
170169
self.consume()?;

crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,86 +4,62 @@ expression: snapshot
44
---
55
- Text:
66
content: "<!-- HTML comment -->"
7-
line: 1
8-
start: 0
7+
offset: 0
98
- Newline:
10-
line: 1
11-
start: 21
9+
offset: 21
1210
- Comment:
1311
content: Django comment
14-
line: 2
15-
start: 25
12+
offset: 25
1613
- Newline:
17-
line: 2
18-
start: 42
14+
offset: 42
1915
- Text:
2016
content: "<script>"
21-
line: 3
22-
start: 43
17+
offset: 43
2318
- Newline:
24-
line: 3
25-
start: 51
19+
offset: 51
2620
- Whitespace:
2721
count: 4
28-
line: 4
29-
start: 52
22+
offset: 52
3023
- Text:
3124
content: // JS single line comment
32-
line: 4
33-
start: 56
25+
offset: 56
3426
- Newline:
35-
line: 4
36-
start: 81
27+
offset: 81
3728
- Whitespace:
3829
count: 4
39-
line: 5
40-
start: 82
30+
offset: 82
4131
- Text:
4232
content: /* JS multi-line
43-
line: 5
44-
start: 86
33+
offset: 86
4534
- Newline:
46-
line: 5
47-
start: 102
35+
offset: 102
4836
- Whitespace:
4937
count: 7
50-
line: 6
51-
start: 103
38+
offset: 103
5239
- Text:
5340
content: comment */
54-
line: 6
55-
start: 110
41+
offset: 110
5642
- Newline:
57-
line: 6
58-
start: 120
43+
offset: 120
5944
- Text:
6045
content: "</script>"
61-
line: 7
62-
start: 121
46+
offset: 121
6347
- Newline:
64-
line: 7
65-
start: 130
48+
offset: 130
6649
- Text:
6750
content: "<style>"
68-
line: 8
69-
start: 131
51+
offset: 131
7052
- Newline:
71-
line: 8
72-
start: 138
53+
offset: 138
7354
- Whitespace:
7455
count: 4
75-
line: 9
76-
start: 139
56+
offset: 139
7757
- Text:
7858
content: /* CSS comment */
79-
line: 9
80-
start: 143
59+
offset: 143
8160
- Newline:
82-
line: 9
83-
start: 160
61+
offset: 160
8462
- Text:
8563
content: "</style>"
86-
line: 10
87-
start: 161
88-
- Eof:
89-
line: 10
64+
offset: 161
65+
- Eof

0 commit comments

Comments
 (0)