joshuadavidthomas
diff --git a/‎crates/djls-template-ast/src/lexer.rs‎
Lines changed: 94 additions & 22 deletions b/‎crates/djls-template-ast/src/lexer.rs‎
Lines changed: 94 additions & 22 deletions
diff --git a/‎crates/djls-template-ast/src/snapshots/djls_template_ast__lexer__tests__tokenize_comments.snap‎
Lines changed: 76 additions & 27 deletions b/‎crates/djls-template-ast/src/snapshots/djls_template_ast__lexer__tests__tokenize_comments.snap‎
Lines changed: 76 additions & 27 deletions
@@ -45,7 +45,7 @@ impl Lexer {
                     self.consume_n(2)?; // {{
                     let content = self.consume_until("}}")?;
                     self.consume_n(2)?; // }}
-                    TokenType::DjangoVariable(content.trim().to_string())
+                    TokenType::DjangoVariable(content)
                 }
                 '#' => {
                     self.consume_n(2)?; // {#
@@ -54,42 +54,106 @@ impl Lexer {
                     TokenType::Comment(content, "{#".to_string(), Some("#}".to_string()))
                 }
                 _ => {
-                    self.consume()?;
-                    TokenType::Text("{".to_string())
+                    self.consume()?; // {
+                    TokenType::Text(String::from("{"))
                 }
             },
-            '\n' => {
-                self.consume()?;
-                let token = TokenType::Newline;
-                self.line += 1;
-                token
-            }
-            ' ' | '\t' | '\r' => {
-                let mut count = 1;
-                self.consume()?;
-                while let Ok(c) = self.peek() {
-                    if c != ' ' && c != '\t' && c != '\r' {
-                        break;
+
+            '<' => match self.peek_next()? {
+                '/' => {
+                    self.consume_n(2)?; // </
+                    let tag = self.consume_until(">")?;
+                    self.consume()?; // >
+                    TokenType::HtmlTagClose(tag)
+                }
+                '!' if self.matches("<!--")? => {
+                    self.consume_n(4)?; // <!--
+                    let content = self.consume_until("-->")?;
+                    self.consume_n(3)?; // -->
+                    TokenType::Comment(content, "<!--".to_string(), Some("-->".to_string()))
+                }
+                _ => {
+                    self.consume()?; // consume <
+                    let tag = self.consume_until(">")?;
+                    self.consume()?; // consume >
+                    if tag.starts_with("script") {
+                        TokenType::ScriptTagOpen(tag)
+                    } else if tag.starts_with("style") {
+                        TokenType::StyleTagOpen(tag)
+                    } else if tag.ends_with("/") {
+                        TokenType::HtmlTagVoid(tag.trim_end_matches("/").to_string())
+                    } else {
+                        TokenType::HtmlTagOpen(tag)
                     }
+                }
+            },
+
+            '/' => match self.peek_next()? {
+                '/' => {
+                    self.consume_n(2)?; // //
+                    let content = self.consume_until("\n")?;
+                    TokenType::Comment(content, "//".to_string(), None)
+                }
+                '*' => {
+                    self.consume_n(2)?; // /*
+                    let content = self.consume_until("*/")?;
+                    self.consume_n(2)?; // */
+                    TokenType::Comment(content, "/*".to_string(), Some("*/".to_string()))
+                }
+                _ => {
                     self.consume()?;
-                    count += 1;
+                    TokenType::Text("/".to_string())
+                }
+            },
+
+            c if c.is_whitespace() => {
+                if c == '\n' || c == '\r' {
+                    self.consume()?; // \r or \n
+                    if c == '\r' && self.peek()? == '\n' {
+                        self.consume()?; // \n of \r\n
+                    }
+                    TokenType::Newline
+                } else {
+                    self.consume()?; // Consume the first whitespace
+                    while !self.is_at_end() && self.peek()?.is_whitespace() {
+                        if self.peek()? == '\n' || self.peek()? == '\r' {
+                            break;
+                        }
+                        self.consume()?;
+                    }
+                    let whitespace_count = self.current - self.start;
+                    TokenType::Whitespace(whitespace_count)
                 }
-                TokenType::Whitespace(count)
             }
+
             _ => {
                 let mut text = String::new();
                 while !self.is_at_end() {
                     let c = self.peek()?;
-                    if c == '{' || c == '\n' || c == ' ' || c == '\t' || c == '\r' {
+                    if c == '{' || c == '<' || c == '\n' {
                         break;
                     }
-                    text.push(self.consume()?);
+                    text.push(c);
+                    self.consume()?;
                 }
                 TokenType::Text(text)
             }
         };
 
-        Ok(Token::new(token_type, self.line, Some(self.start)))
+        let token = Token::new(token_type, self.line, Some(self.start));
+
+        match self.peek_previous()? {
+            '\n' => self.line += 1,
+            '\r' => {
+                self.line += 1;
+                if self.peek()? == '\n' {
+                    self.current += 1;
+                }
+            }
+            _ => {}
+        }
+
+        Ok(token)
     }
 
     fn peek(&self) -> Result<char, LexerError> {
@@ -246,7 +310,15 @@ mod tests {
     #[test]
     fn test_tokenize_comments() {
         let source = r#"<!-- HTML comment -->
-{# Django comment #}"#;
+{# Django comment #}
+<script>
+    // JS single line comment
+    /* JS multi-line
+       comment */
+</script>
+<style>
+    /* CSS comment */
+</style>"#;
         let mut lexer = Lexer::new(source);
         let tokens = lexer.tokenize().unwrap();
         insta::assert_yaml_snapshot!(tokens);
@@ -285,7 +357,7 @@ mod tests {
         assert!(Lexer::new("{{ user.name").tokenize().is_err()); // No closing }}
         assert!(Lexer::new("{% if").tokenize().is_err()); // No closing %}
         assert!(Lexer::new("{#").tokenize().is_err()); // No closing #}
-        assert!(Lexer::new("<div").tokenize().is_ok()); // No closing >, but HTML is treated as text
+        assert!(Lexer::new("<div").tokenize().is_err()); // No closing >
 
         // Invalid characters or syntax within tokens
         assert!(Lexer::new("{{}}").tokenize().is_ok()); // Empty but valid
 
@@ -3,35 +3,14 @@ source: crates/djls-template-ast/src/lexer.rs
 expression: tokens
 ---
 - token_type:
-    Text: "<!--"
+    Comment:
+      - HTML comment
+      - "<!--"
+      - "-->"
   line: 1
   start: 0
-- token_type:
-    Whitespace: 1
-  line: 1
-  start: 4
-- token_type:
-    Text: HTML
-  line: 1
-  start: 5
-- token_type:
-    Whitespace: 1
-  line: 1
-  start: 9
-- token_type:
-    Text: comment
-  line: 1
-  start: 10
-- token_type:
-    Whitespace: 1
-  line: 1
-  start: 17
-- token_type:
-    Text: "-->"
-  line: 1
-  start: 18
 - token_type: Newline
-  line: 2
+  line: 1
   start: 21
 - token_type:
     Comment:
@@ -40,6 +19,76 @@ expression: tokens
       - "#}"
   line: 2
   start: 22
-- token_type: Eof
+- token_type: Newline
   line: 2
+  start: 42
+- token_type:
+    ScriptTagOpen: script
+  line: 3
+  start: 43
+- token_type: Newline
+  line: 3
+  start: 51
+- token_type:
+    Whitespace: 4
+  line: 4
+  start: 52
+- token_type:
+    Comment:
+      - JS single line comment
+      - //
+      - ~
+  line: 4
+  start: 56
+- token_type: Newline
+  line: 4
+  start: 81
+- token_type:
+    Whitespace: 4
+  line: 5
+  start: 82
+- token_type:
+    Comment:
+      - "JS multi-line\n       comment"
+      - /*
+      - "*/"
+  line: 5
+  start: 86
+- token_type: Newline
+  line: 5
+  start: 120
+- token_type:
+    HtmlTagClose: script
+  line: 6
+  start: 121
+- token_type: Newline
+  line: 6
+  start: 130
+- token_type:
+    StyleTagOpen: style
+  line: 7
+  start: 131
+- token_type: Newline
+  line: 7
+  start: 138
+- token_type:
+    Whitespace: 4
+  line: 8
+  start: 139
+- token_type:
+    Comment:
+      - CSS comment
+      - /*
+      - "*/"
+  line: 8
+  start: 143
+- token_type: Newline
+  line: 8
+  start: 160
+- token_type:
+    HtmlTagClose: style
+  line: 9
+  start: 161
+- token_type: Eof
+  line: 9
   start: ~