Skip to content

Commit bc54814

Browse files
revert changes to lexer
1 parent 725a603 commit bc54814

17 files changed

+437
-466
lines changed

crates/djls-template-ast/src/lexer.rs

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl Lexer {
4545
self.consume_n(2)?; // {{
4646
let content = self.consume_until("}}")?;
4747
self.consume_n(2)?; // }}
48-
TokenType::DjangoVariable(content.trim().to_string())
48+
TokenType::DjangoVariable(content)
4949
}
5050
'#' => {
5151
self.consume_n(2)?; // {#
@@ -54,42 +54,106 @@ impl Lexer {
5454
TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
5555
}
5656
_ => {
57-
self.consume()?;
58-
TokenType::Text("{".to_string())
57+
self.consume()?; // {
58+
TokenType::Text(String::from("{"))
5959
}
6060
},
61-
'\n' => {
62-
self.consume()?;
63-
let token = TokenType::Newline;
64-
self.line += 1;
65-
token
66-
}
67-
' ' | '\t' | '\r' => {
68-
let mut count = 1;
69-
self.consume()?;
70-
while let Ok(c) = self.peek() {
71-
if c != ' ' && c != '\t' && c != '\r' {
72-
break;
61+
62+
'<' => match self.peek_next()? {
63+
'/' => {
64+
self.consume_n(2)?; // </
65+
let tag = self.consume_until(">")?;
66+
self.consume()?; // >
67+
TokenType::HtmlTagClose(tag)
68+
}
69+
'!' if self.matches("<!--")? => {
70+
self.consume_n(4)?; // <!--
71+
let content = self.consume_until("-->")?;
72+
self.consume_n(3)?; // -->
73+
TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
74+
}
75+
_ => {
76+
self.consume()?; // consume <
77+
let tag = self.consume_until(">")?;
78+
self.consume()?; // consume >
79+
if tag.starts_with("script") {
80+
TokenType::ScriptTagOpen(tag)
81+
} else if tag.starts_with("style") {
82+
TokenType::StyleTagOpen(tag)
83+
} else if tag.ends_with("/") {
84+
TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
85+
} else {
86+
TokenType::HtmlTagOpen(tag)
7387
}
88+
}
89+
},
90+
91+
'/' => match self.peek_next()? {
92+
'/' => {
93+
self.consume_n(2)?; // //
94+
let content = self.consume_until("\n")?;
95+
TokenType::Comment(content, "//".to_string(), None)
96+
}
97+
'*' => {
98+
self.consume_n(2)?; // /*
99+
let content = self.consume_until("*/")?;
100+
self.consume_n(2)?; // */
101+
TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
102+
}
103+
_ => {
74104
self.consume()?;
75-
count += 1;
105+
TokenType::Text("/".to_string())
106+
}
107+
},
108+
109+
c if c.is_whitespace() => {
110+
if c == '\n' || c == '\r' {
111+
self.consume()?; // \r or \n
112+
if c == '\r' && self.peek()? == '\n' {
113+
self.consume()?; // \n of \r\n
114+
}
115+
TokenType::Newline
116+
} else {
117+
self.consume()?; // Consume the first whitespace
118+
while !self.is_at_end() && self.peek()?.is_whitespace() {
119+
if self.peek()? == '\n' || self.peek()? == '\r' {
120+
break;
121+
}
122+
self.consume()?;
123+
}
124+
let whitespace_count = self.current - self.start;
125+
TokenType::Whitespace(whitespace_count)
76126
}
77-
TokenType::Whitespace(count)
78127
}
128+
79129
_ => {
80130
let mut text = String::new();
81131
while !self.is_at_end() {
82132
let c = self.peek()?;
83-
if c == '{' || c == '\n' || c == ' ' || c == '\t' || c == '\r' {
133+
if c == '{' || c == '<' || c == '\n' {
84134
break;
85135
}
86-
text.push(self.consume()?);
136+
text.push(c);
137+
self.consume()?;
87138
}
88139
TokenType::Text(text)
89140
}
90141
};
91142

92-
Ok(Token::new(token_type, self.line, Some(self.start)))
143+
let token = Token::new(token_type, self.line, Some(self.start));
144+
145+
match self.peek_previous()? {
146+
'\n' => self.line += 1,
147+
'\r' => {
148+
self.line += 1;
149+
if self.peek()? == '\n' {
150+
self.current += 1;
151+
}
152+
}
153+
_ => {}
154+
}
155+
156+
Ok(token)
93157
}
94158

95159
fn peek(&self) -> Result<char, LexerError> {
@@ -246,7 +310,15 @@ mod tests {
246310
#[test]
247311
fn test_tokenize_comments() {
248312
let source = r#"<!-- HTML comment -->
249-
{# Django comment #}"#;
313+
{# Django comment #}
314+
<script>
315+
// JS single line comment
316+
/* JS multi-line
317+
comment */
318+
</script>
319+
<style>
320+
/* CSS comment */
321+
</style>"#;
250322
let mut lexer = Lexer::new(source);
251323
let tokens = lexer.tokenize().unwrap();
252324
insta::assert_yaml_snapshot!(tokens);
@@ -285,7 +357,7 @@ mod tests {
285357
assert!(Lexer::new("{{ user.name").tokenize().is_err()); // No closing }}
286358
assert!(Lexer::new("{% if").tokenize().is_err()); // No closing %}
287359
assert!(Lexer::new("{#").tokenize().is_err()); // No closing #}
288-
assert!(Lexer::new("<div").tokenize().is_ok()); // No closing >, but HTML is treated as text
360+
assert!(Lexer::new("<div").tokenize().is_err()); // No closing >
289361

290362
// Invalid characters or syntax within tokens
291363
assert!(Lexer::new("{{}}").tokenize().is_ok()); // Empty but valid

crates/djls-template-ast/src/snapshots/djls_template_ast__lexer__tests__tokenize_comments.snap

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,14 @@ source: crates/djls-template-ast/src/lexer.rs
33
expression: tokens
44
---
55
- token_type:
6-
Text: "<!--"
6+
Comment:
7+
- HTML comment
8+
- "<!--"
9+
- "-->"
710
line: 1
811
start: 0
9-
- token_type:
10-
Whitespace: 1
11-
line: 1
12-
start: 4
13-
- token_type:
14-
Text: HTML
15-
line: 1
16-
start: 5
17-
- token_type:
18-
Whitespace: 1
19-
line: 1
20-
start: 9
21-
- token_type:
22-
Text: comment
23-
line: 1
24-
start: 10
25-
- token_type:
26-
Whitespace: 1
27-
line: 1
28-
start: 17
29-
- token_type:
30-
Text: "-->"
31-
line: 1
32-
start: 18
3312
- token_type: Newline
34-
line: 2
13+
line: 1
3514
start: 21
3615
- token_type:
3716
Comment:
@@ -40,6 +19,76 @@ expression: tokens
4019
- "#}"
4120
line: 2
4221
start: 22
43-
- token_type: Eof
22+
- token_type: Newline
4423
line: 2
24+
start: 42
25+
- token_type:
26+
ScriptTagOpen: script
27+
line: 3
28+
start: 43
29+
- token_type: Newline
30+
line: 3
31+
start: 51
32+
- token_type:
33+
Whitespace: 4
34+
line: 4
35+
start: 52
36+
- token_type:
37+
Comment:
38+
- JS single line comment
39+
- //
40+
- ~
41+
line: 4
42+
start: 56
43+
- token_type: Newline
44+
line: 4
45+
start: 81
46+
- token_type:
47+
Whitespace: 4
48+
line: 5
49+
start: 82
50+
- token_type:
51+
Comment:
52+
- "JS multi-line\n comment"
53+
- /*
54+
- "*/"
55+
line: 5
56+
start: 86
57+
- token_type: Newline
58+
line: 5
59+
start: 120
60+
- token_type:
61+
HtmlTagClose: script
62+
line: 6
63+
start: 121
64+
- token_type: Newline
65+
line: 6
66+
start: 130
67+
- token_type:
68+
StyleTagOpen: style
69+
line: 7
70+
start: 131
71+
- token_type: Newline
72+
line: 7
73+
start: 138
74+
- token_type:
75+
Whitespace: 4
76+
line: 8
77+
start: 139
78+
- token_type:
79+
Comment:
80+
- CSS comment
81+
- /*
82+
- "*/"
83+
line: 8
84+
start: 143
85+
- token_type: Newline
86+
line: 8
87+
start: 160
88+
- token_type:
89+
HtmlTagClose: style
90+
line: 9
91+
start: 161
92+
- token_type: Eof
93+
line: 9
4594
start: ~

0 commit comments

Comments
 (0)