Skip to content

Commit 6c44e6d

Browse files
Optimize parser w/TagBit interning and simplified token access (#221)
1 parent 6bcc692 commit 6c44e6d

File tree

6 files changed

+119
-130
lines changed

6 files changed

+119
-130
lines changed

crates/djls-templates/src/ast.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ impl Default for LineOffsets {
5959
pub enum Node<'db> {
6060
Tag {
6161
name: TagName<'db>,
62-
bits: Vec<String>,
62+
bits: Vec<TagBit<'db>>,
6363
span: Span,
6464
},
6565
Comment {
@@ -129,6 +129,11 @@ pub struct TagName<'db> {
129129
pub text: String,
130130
}
131131

132+
#[salsa::interned(debug)]
133+
pub struct TagBit<'db> {
134+
pub text: String,
135+
}
136+
132137
#[salsa::interned(debug)]
133138
pub struct VariableName<'db> {
134139
pub text: String,

crates/djls-templates/src/lexer.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::ast::LineOffsets;
12
use crate::db::Db as TemplateDb;
23
use crate::tokens::Token;
34
use crate::tokens::TokenContent;
@@ -14,7 +15,6 @@ pub struct Lexer<'db> {
1415
source: String,
1516
start: usize,
1617
current: usize,
17-
line: usize,
1818
}
1919

2020
impl<'db> Lexer<'db> {
@@ -25,12 +25,12 @@ impl<'db> Lexer<'db> {
2525
source: String::from(source),
2626
start: 0,
2727
current: 0,
28-
line: 1,
2928
}
3029
}
3130

32-
pub fn tokenize(&mut self) -> Vec<Token<'db>> {
31+
pub fn tokenize(&mut self) -> (Vec<Token<'db>>, LineOffsets) {
3332
let mut tokens = Vec::new();
33+
let mut line_offsets = LineOffsets::default();
3434

3535
while !self.is_at_end() {
3636
self.start = self.current;
@@ -53,9 +53,9 @@ impl<'db> Lexer<'db> {
5353
};
5454

5555
match self.peek_previous() {
56-
'\n' => self.line += 1,
56+
'\n' => line_offsets.add_line(u32::try_from(self.current).unwrap_or(u32::MAX)),
5757
'\r' => {
58-
self.line += 1;
58+
line_offsets.add_line(u32::try_from(self.current).unwrap_or(u32::MAX));
5959
if self.peek() == '\n' {
6060
self.current += 1;
6161
}
@@ -68,7 +68,7 @@ impl<'db> Lexer<'db> {
6868

6969
tokens.push(Token::Eof);
7070

71-
tokens
71+
(tokens, line_offsets)
7272
}
7373

7474
fn lex_django_construct(
@@ -255,7 +255,7 @@ mod tests {
255255
let db = TestDatabase::new();
256256
let source = r#"<div class="container" id="main" disabled></div>"#;
257257
let mut lexer = Lexer::new(&db, source);
258-
let tokens = lexer.tokenize();
258+
let (tokens, _) = lexer.tokenize();
259259
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
260260
insta::assert_yaml_snapshot!(snapshot);
261261
}
@@ -265,7 +265,7 @@ mod tests {
265265
let db = TestDatabase::new();
266266
let source = "{{ user.name|default:\"Anonymous\"|title }}";
267267
let mut lexer = Lexer::new(&db, source);
268-
let tokens = lexer.tokenize();
268+
let (tokens, _) = lexer.tokenize();
269269
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
270270
insta::assert_yaml_snapshot!(snapshot);
271271
}
@@ -275,7 +275,7 @@ mod tests {
275275
let db = TestDatabase::new();
276276
let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}";
277277
let mut lexer = Lexer::new(&db, source);
278-
let tokens = lexer.tokenize();
278+
let (tokens, _) = lexer.tokenize();
279279
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
280280
insta::assert_yaml_snapshot!(snapshot);
281281
}
@@ -294,7 +294,7 @@ mod tests {
294294
/* CSS comment */
295295
</style>";
296296
let mut lexer = Lexer::new(&db, source);
297-
let tokens = lexer.tokenize();
297+
let (tokens, _) = lexer.tokenize();
298298
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
299299
insta::assert_yaml_snapshot!(snapshot);
300300
}
@@ -310,7 +310,7 @@ mod tests {
310310
console.log(x);
311311
</script>"#;
312312
let mut lexer = Lexer::new(&db, source);
313-
let tokens = lexer.tokenize();
313+
let (tokens, _) = lexer.tokenize();
314314
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
315315
insta::assert_yaml_snapshot!(snapshot);
316316
}
@@ -325,7 +325,7 @@ mod tests {
325325
}
326326
</style>"#;
327327
let mut lexer = Lexer::new(&db, source);
328-
let tokens = lexer.tokenize();
328+
let (tokens, _) = lexer.tokenize();
329329
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
330330
insta::assert_yaml_snapshot!(snapshot);
331331
}
@@ -339,7 +339,7 @@ mod tests {
339339
<!-- html comment -->
340340
<div>text</div>";
341341
let mut lexer = Lexer::new(&db, source);
342-
let tokens = lexer.tokenize();
342+
let (tokens, _) = lexer.tokenize();
343343
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
344344
insta::assert_yaml_snapshot!(snapshot);
345345
}
@@ -378,7 +378,7 @@ mod tests {
378378
</body>
379379
</html>"#;
380380
let mut lexer = Lexer::new(&db, source);
381-
let tokens = lexer.tokenize();
381+
let (tokens, _) = lexer.tokenize();
382382
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
383383
insta::assert_yaml_snapshot!(snapshot);
384384
}
@@ -388,7 +388,7 @@ mod tests {
388388
let db = TestDatabase::new();
389389
let source = "<style>body { color: blue; ";
390390
let mut lexer = Lexer::new(&db, source);
391-
let tokens = lexer.tokenize();
391+
let (tokens, _) = lexer.tokenize();
392392
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
393393
insta::assert_yaml_snapshot!(snapshot);
394394
}

crates/djls-templates/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ use validation::TagValidator;
7575
#[salsa::tracked]
7676
fn lex_template(db: &dyn Db, file: SourceFile) -> TokenStream<'_> {
7777
if file.kind(db) != FileKind::Template {
78-
return TokenStream::new(db, vec![]);
78+
return TokenStream::new(db, vec![], LineOffsets::default());
7979
}
8080

8181
let text_arc = djls_workspace::db::source_text(db, file);
8282
let text = text_arc.as_ref();
8383

84-
let tokens = Lexer::new(db, text).tokenize();
85-
TokenStream::new(db, tokens)
84+
let (tokens, line_offsets) = Lexer::new(db, text).tokenize();
85+
TokenStream::new(db, tokens, line_offsets)
8686
}
8787

8888
/// Parse tokens into an AST.

0 commit comments

Comments
 (0)