Skip to content

Commit 767166f

Browse files
fix
1 parent 64ed17b commit 767166f

17 files changed

+278
-306
lines changed

crates/djls-templates/src/lexer.rs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,16 @@ impl Lexer {
103103
let mut text = String::new();
104104
while !self.is_at_end() {
105105
let c = self.peek();
106-
if c == '{' || c == '\n' {
106+
107+
if c == '{' {
108+
let next = self.peek_next();
109+
if next == '%' || next == '{' || next == '#' {
110+
break;
111+
}
112+
} else if c == '\n' {
107113
break;
108114
}
115+
109116
text.push(c);
110117
self.consume();
111118
}
@@ -125,23 +132,10 @@ impl Lexer {
125132
}
126133

127134
fn peek_at(&self, offset: isize) -> char {
128-
let index = if offset < 0 {
129-
match self.current.checked_sub(offset.unsigned_abs()) {
130-
Some(idx) => idx,
131-
None => return '\0',
132-
}
133-
} else {
134-
match self.current.checked_add(offset as usize) {
135-
Some(idx) => idx,
136-
None => return '\0',
137-
}
135+
let Some(index) = self.current.checked_add_signed(offset) else {
136+
return '\0';
138137
};
139-
140-
if index >= self.chars.len() {
141-
'\0'
142-
} else {
143-
self.chars[index]
144-
}
138+
self.chars.get(index).copied().unwrap_or('\0')
145139
}
146140

147141
fn is_at_end(&self) -> bool {
@@ -307,4 +301,12 @@ mod tests {
307301
let tokens = lexer.tokenize();
308302
insta::assert_yaml_snapshot!(tokens);
309303
}
304+
305+
#[test]
306+
fn test_tokenize_unclosed_style() {
307+
let source = "<style>body { color: blue; ";
308+
let mut lexer = Lexer::new(source);
309+
let tokens = lexer.tokenize();
310+
insta::assert_yaml_snapshot!(tokens);
311+
}
310312
}

crates/djls-templates/src/parser.rs

Lines changed: 74 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::ast::TextNode;
1313
use crate::ast::VariableName;
1414
use crate::ast::VariableNode;
1515
use crate::db::Db as TemplateDb;
16-
use crate::lexer::LexerError;
16+
1717
use crate::tokens::Token;
1818
use crate::tokens::TokenStream;
1919
use crate::tokens::TokenType;
@@ -75,29 +75,18 @@ impl<'db> Parser<'db> {
7575
let token = self.consume()?;
7676

7777
match token.token_type() {
78-
TokenType::Comment(_, open, _) => self.parse_comment(open),
78+
TokenType::Comment(_) => self.parse_comment(),
7979
TokenType::Eof => Err(ParserError::stream_error(StreamError::AtEnd)),
8080
TokenType::Block(_) => self.parse_django_block(),
8181
TokenType::Variable(_) => self.parse_django_variable(),
82-
TokenType::HtmlTagClose(_)
83-
| TokenType::HtmlTagOpen(_)
84-
| TokenType::HtmlTagVoid(_)
85-
| TokenType::Newline
86-
| TokenType::ScriptTagClose(_)
87-
| TokenType::ScriptTagOpen(_)
88-
| TokenType::StyleTagClose(_)
89-
| TokenType::StyleTagOpen(_)
82+
TokenType::Error(_) => self.parse_error(),
83+
TokenType::Newline
9084
| TokenType::Text(_)
9185
| TokenType::Whitespace(_) => self.parse_text(),
9286
}
9387
}
9488

95-
fn parse_comment(&mut self, open: &str) -> Result<Node<'db>, ParserError> {
96-
// Only treat Django comments as Comment nodes
97-
if open != "{#" {
98-
return self.parse_text();
99-
}
100-
89+
fn parse_comment(&mut self) -> Result<Node<'db>, ParserError> {
10190
let token = self.peek_previous()?;
10291

10392
Ok(Node::Comment(CommentNode {
@@ -106,6 +95,23 @@ impl<'db> Parser<'db> {
10695
}))
10796
}
10897

98+
fn parse_error(&mut self) -> Result<Node<'db>, ParserError> {
99+
let token = self.peek_previous()?;
100+
101+
if let TokenType::Error(content) = token.token_type() {
102+
let position = token.start().unwrap_or(0) as usize;
103+
104+
Err(ParserError::MalformedConstruct {
105+
position,
106+
content: content.clone(),
107+
})
108+
} else {
109+
Err(ParserError::InvalidSyntax {
110+
context: "Expected Error token".to_string(),
111+
})
112+
}
113+
}
114+
109115
pub fn parse_django_block(&mut self) -> Result<Node<'db>, ParserError> {
110116
let token = self.peek_previous()?;
111117

@@ -144,41 +150,60 @@ impl<'db> Parser<'db> {
144150
}
145151

146152
fn parse_text(&mut self) -> Result<Node<'db>, ParserError> {
147-
let token = self.peek_previous()?;
148-
149-
if token.token_type() == &TokenType::Newline {
150-
return self.next_node();
151-
}
152-
153-
let mut text = token.lexeme();
154-
155-
while let Ok(token) = self.peek() {
156-
match token.token_type() {
157-
TokenType::Block(_)
158-
| TokenType::Variable(_)
159-
| TokenType::Comment(_, _, _)
160-
| TokenType::Newline
161-
| TokenType::Eof => break,
162-
_ => {
163-
let token_text = token.lexeme();
164-
text.push_str(&token_text);
153+
// Keep looping until we find non-empty text or hit a significant token
154+
loop {
155+
let token = self.peek_previous()?;
156+
157+
// Skip newlines directly without recursion
158+
if token.token_type() == &TokenType::Newline {
159+
if !self.is_at_end() {
165160
self.consume()?;
161+
continue;
162+
} else {
163+
return Err(ParserError::stream_error(StreamError::AtEnd));
166164
}
167165
}
168-
}
169166

170-
let content = match text.trim() {
171-
"" => return self.next_node(),
172-
trimmed => trimmed.to_string(),
173-
};
167+
let mut text = token.lexeme();
168+
169+
while let Ok(token) = self.peek() {
170+
match token.token_type() {
171+
TokenType::Block(_)
172+
| TokenType::Variable(_)
173+
| TokenType::Comment(_)
174+
| TokenType::Error(_)
175+
| TokenType::Newline
176+
| TokenType::Eof => break,
177+
_ => {
178+
let token_text = token.lexeme();
179+
text.push_str(&token_text);
180+
self.consume()?;
181+
}
182+
}
183+
}
174184

175-
let start = token.start().unwrap_or(0);
176-
let offset = u32::try_from(text.find(content.as_str()).unwrap_or(0))
177-
.expect("Offset should fit in u32");
178-
let length = u32::try_from(content.len()).expect("Content length should fit in u32");
179-
let span = Span::new(start + offset, length);
185+
let content = match text.trim() {
186+
"" => {
187+
// Instead of recursing, continue the loop
188+
if !self.is_at_end() {
189+
self.consume()?;
190+
continue;
191+
} else {
192+
return Err(ParserError::stream_error(StreamError::AtEnd));
193+
}
194+
}
195+
trimmed => trimmed.to_string(),
196+
};
197+
198+
// We have non-empty content, create the text node
199+
let start = token.start().unwrap_or(0);
200+
let offset = u32::try_from(text.find(content.as_str()).unwrap_or(0))
201+
.expect("Offset should fit in u32");
202+
let length = u32::try_from(content.len()).expect("Content length should fit in u32");
203+
let span = Span::new(start + offset, length);
180204

181-
Ok(Node::Text(TextNode { content, span }))
205+
return Ok(Node::Text(TextNode { content, span }));
206+
}
182207
}
183208

184209
fn peek(&self) -> Result<Token, ParserError> {
@@ -255,7 +280,7 @@ impl<'db> Parser<'db> {
255280
let sync_types = &[
256281
TokenType::Block(String::new()),
257282
TokenType::Variable(String::new()),
258-
TokenType::Comment(String::new(), String::from("{#"), Some(String::from("#}"))),
283+
TokenType::Comment(String::new()),
259284
TokenType::Eof,
260285
];
261286

@@ -314,8 +339,8 @@ pub enum ParseError {
314339
#[error("Empty tag")]
315340
EmptyTag,
316341

317-
#[error("Lexer error: {0}")]
318-
Lexer(#[from] LexerError),
342+
#[error("Malformed Django construct at position {position}: {content}")]
343+
MalformedConstruct { position: usize, content: String },
319344

320345
#[error("Stream error: {kind:?}")]
321346
StreamError { kind: StreamError },
@@ -392,7 +417,7 @@ mod tests {
392417
#[salsa::tracked]
393418
fn parse_test_template(db: &dyn TemplateDb, template: TestTemplate) -> NodeList<'_> {
394419
let source = template.source(db);
395-
let tokens = Lexer::new(source).tokenize().unwrap();
420+
let tokens = Lexer::new(source).tokenize();
396421
let token_stream = TokenStream::new(db, tokens);
397422
let mut parser = Parser::new(db, token_stream);
398423
let (ast, _) = parser.parse().unwrap();

crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_comments.snap

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,21 @@ source: crates/djls-templates/src/lexer.rs
33
expression: tokens
44
---
55
- token_type:
6-
Comment:
7-
- HTML comment
8-
- "<!--"
9-
- "-->"
6+
Text: "<!-- HTML comment -->"
107
line: 1
118
start: 0
129
- token_type: Newline
1310
line: 1
1411
start: 21
1512
- token_type:
16-
Comment:
17-
- Django comment
18-
- "{#"
19-
- "#}"
13+
Comment: Django comment
2014
line: 2
2115
start: 22
2216
- token_type: Newline
2317
line: 2
2418
start: 42
2519
- token_type:
26-
ScriptTagOpen: script
20+
Text: "<script>"
2721
line: 3
2822
start: 43
2923
- token_type: Newline
@@ -34,10 +28,7 @@ expression: tokens
3428
line: 4
3529
start: 52
3630
- token_type:
37-
Comment:
38-
- JS single line comment
39-
- //
40-
- ~
31+
Text: // JS single line comment
4132
line: 4
4233
start: 56
4334
- token_type: Newline
@@ -48,47 +39,52 @@ expression: tokens
4839
line: 5
4940
start: 82
5041
- token_type:
51-
Comment:
52-
- "JS multi-line\n comment"
53-
- /*
54-
- "*/"
42+
Text: /* JS multi-line
5543
line: 5
5644
start: 86
5745
- token_type: Newline
5846
line: 5
59-
start: 120
47+
start: 102
6048
- token_type:
61-
HtmlTagClose: script
49+
Whitespace: 7
6250
line: 6
63-
start: 121
51+
start: 103
52+
- token_type:
53+
Text: comment */
54+
line: 6
55+
start: 110
6456
- token_type: Newline
6557
line: 6
66-
start: 130
58+
start: 120
6759
- token_type:
68-
StyleTagOpen: style
60+
Text: "</script>"
6961
line: 7
70-
start: 131
62+
start: 121
7163
- token_type: Newline
7264
line: 7
65+
start: 130
66+
- token_type:
67+
Text: "<style>"
68+
line: 8
69+
start: 131
70+
- token_type: Newline
71+
line: 8
7372
start: 138
7473
- token_type:
7574
Whitespace: 4
76-
line: 8
75+
line: 9
7776
start: 139
7877
- token_type:
79-
Comment:
80-
- CSS comment
81-
- /*
82-
- "*/"
83-
line: 8
78+
Text: /* CSS comment */
79+
line: 9
8480
start: 143
8581
- token_type: Newline
86-
line: 8
82+
line: 9
8783
start: 160
8884
- token_type:
89-
HtmlTagClose: style
90-
line: 9
85+
Text: "</style>"
86+
line: 10
9187
start: 161
9288
- token_type: Eof
93-
line: 9
89+
line: 10
9490
start: ~

crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_block.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,23 @@ source: crates/djls-templates/src/lexer.rs
33
expression: tokens
44
---
55
- token_type:
6-
DjangoBlock: if user.is_staff
6+
Block: if user.is_staff
77
line: 1
88
start: 0
99
- token_type:
1010
Text: Admin
1111
line: 1
1212
start: 22
1313
- token_type:
14-
DjangoBlock: else
14+
Block: else
1515
line: 1
1616
start: 27
1717
- token_type:
1818
Text: User
1919
line: 1
2020
start: 37
2121
- token_type:
22-
DjangoBlock: endif
22+
Block: endif
2323
line: 1
2424
start: 41
2525
- token_type: Eof

crates/djls-templates/src/snapshots/djls_templates__lexer__tests__tokenize_django_variable.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ source: crates/djls-templates/src/lexer.rs
33
expression: tokens
44
---
55
- token_type:
6-
DjangoVariable: "user.name|default:\"Anonymous\"|title"
6+
Variable: "user.name|default:\"Anonymous\"|title"
77
line: 1
88
start: 0
99
- token_type: Eof

0 commit comments

Comments
 (0)