@@ -3,13 +3,13 @@ use djls_source::Span;
33use crate :: db:: Db as TemplateDb ;
44use crate :: tokens:: Token ;
55use crate :: tokens:: TokenContent ;
6-
7- const BLOCK_TAG_START : & str = "{%" ;
8- const BLOCK_TAG_END : & str = "%}" ;
9- const VARIABLE_TAG_START : & str = "{{" ;
10- const VARIABLE_TAG_END : & str = "}}" ;
11- const COMMENT_TAG_START : & str = "{#" ;
12- const COMMENT_TAG_END : & str = "#}" ;
6+ use crate :: tokens :: BLOCK_TAG_END ;
7+ use crate :: tokens :: BLOCK_TAG_START ;
8+ use crate :: tokens :: COMMENT_TAG_END ;
9+ use crate :: tokens :: COMMENT_TAG_START ;
10+ use crate :: tokens :: DJANGO_TAG_LEN ;
11+ use crate :: tokens :: VARIABLE_TAG_END ;
12+ use crate :: tokens :: VARIABLE_TAG_START ;
1313
1414pub struct Lexer < ' db > {
1515 db : & ' db dyn TemplateDb ,
@@ -35,23 +35,24 @@ impl<'db> Lexer<'db> {
3535 while !self . is_at_end ( ) {
3636 self . start = self . current ;
3737
38- let token = match self . peek ( ) {
39- '{' => match self . peek_next ( ) {
40- '%' => self . lex_django_construct ( BLOCK_TAG_END , |content, span| Token :: Block {
41- content,
42- span,
43- } ) ,
44- '{' => self . lex_django_construct ( VARIABLE_TAG_END , |content, span| {
45- Token :: Variable { content, span }
46- } ) ,
47- '#' => self . lex_django_construct ( COMMENT_TAG_END , |content, span| {
48- Token :: Comment { content, span }
49- } ) ,
38+ let token =
39+ match self . peek ( ) {
40+ '{' => match self . peek_next ( ) {
41+ '%' => self . lex_django_tag ( BLOCK_TAG_END , |content, span| Token :: Block {
42+ content,
43+ span,
44+ } ) ,
45+ '{' => self . lex_django_tag ( VARIABLE_TAG_END , |content, span| {
46+ Token :: Variable { content, span }
47+ } ) ,
48+ '#' => self . lex_django_tag ( COMMENT_TAG_END , |content, span| {
49+ Token :: Comment { content, span }
50+ } ) ,
51+ _ => self . lex_text ( ) ,
52+ } ,
53+ c if c. is_whitespace ( ) => self . lex_whitespace ( c) ,
5054 _ => self . lex_text ( ) ,
51- } ,
52- c if c. is_whitespace ( ) => self . lex_whitespace ( c) ,
53- _ => self . lex_text ( ) ,
54- } ;
55+ } ;
5556
5657 tokens. push ( token) ;
5758 }
@@ -61,28 +62,30 @@ impl<'db> Lexer<'db> {
6162 tokens
6263 }
6364
64- fn lex_django_construct (
65+ fn lex_django_tag (
6566 & mut self ,
6667 end : & str ,
6768 token_fn : impl FnOnce ( TokenContent < ' db > , Span ) -> Token < ' db > ,
6869 ) -> Token < ' db > {
69- let opening_len = 2 ;
70- let content_start = self . start + opening_len;
71-
72- self . consume_n ( opening_len) ;
70+ let content_start = self . start + DJANGO_TAG_LEN as usize ;
71+ self . consume_n ( DJANGO_TAG_LEN as usize ) ;
7372
7473 match self . consume_until ( end) {
7574 Ok ( text) => {
75+ let len = text. len ( ) ;
7676 let content = TokenContent :: new ( self . db , text) ;
77- let content_end = self . current ;
78- let span = Span :: from_bounds ( content_start, content_end) ;
77+ let span = Span :: from_parts ( content_start, len) ;
7978 self . consume_n ( end. len ( ) ) ;
8079 token_fn ( content, span)
8180 }
8281 Err ( err_text) => {
83- let content_end = self . current ;
84- let span = Span :: from_bounds ( content_start, content_end) ;
82+ let len = err_text. len ( ) ;
8583 let content = TokenContent :: new ( self . db , err_text) ;
84+ let span = if len == 0 {
85+ Span :: from_bounds ( content_start, self . current )
86+ } else {
87+ Span :: from_parts ( content_start, len)
88+ } ;
8689 Token :: Error { content, span }
8790 }
8891 }
@@ -134,6 +137,7 @@ impl<'db> Lexer<'db> {
134137 self . source [ self . current ..] . chars ( ) . next ( ) . unwrap_or ( '\0' )
135138 }
136139
140+ #[ inline]
137141 fn peek_next ( & self ) -> char {
138142 let mut chars = self . source [ self . current ..] . chars ( ) ;
139143 chars. next ( ) ; // Skip current
@@ -183,9 +187,187 @@ impl<'db> Lexer<'db> {
183187 self . consume ( ) ;
184188 }
185189
186- let end = fallback. unwrap_or ( self . current ) ;
187- let text = self . source [ offset..end] . to_string ( ) ;
188- self . current = end;
189- Err ( text)
190+ self . current = fallback. unwrap_or ( self . current ) ;
191+ Err ( self . source [ offset..self . current ] . to_string ( ) )
192+ }
193+ }
194+
195+ #[ cfg( test) ]
196+ mod tests {
197+ use camino:: Utf8Path ;
198+
199+ use super :: * ;
200+ use crate :: tokens:: TokenSnapshotVec ;
201+
202+ #[ salsa:: db]
203+ #[ derive( Clone ) ]
204+ struct TestDatabase {
205+ storage : salsa:: Storage < Self > ,
206+ }
207+
208+ impl TestDatabase {
209+ fn new ( ) -> Self {
210+ Self {
211+ storage : salsa:: Storage :: default ( ) ,
212+ }
213+ }
214+ }
215+
216+ #[ salsa:: db]
217+ impl salsa:: Database for TestDatabase { }
218+
219+ #[ salsa:: db]
220+ impl djls_source:: Db for TestDatabase {
221+ fn read_file_source ( & self , path : & Utf8Path ) -> Result < String , std:: io:: Error > {
222+ std:: fs:: read_to_string ( path)
223+ }
224+ }
225+
226+ #[ salsa:: db]
227+ impl crate :: db:: Db for TestDatabase {
228+ // Template parsing only - semantic analysis moved to djls-semantic
229+ }
230+
231+ #[ test]
232+ fn test_tokenize_html ( ) {
233+ let db = TestDatabase :: new ( ) ;
234+ let source = r#"<div class="container" id="main" disabled></div>"# ;
235+ let mut lexer = Lexer :: new ( & db, source) ;
236+ let tokens = lexer. tokenize ( ) ;
237+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
238+ insta:: assert_yaml_snapshot!( snapshot) ;
239+ }
240+
241+ #[ test]
242+ fn test_tokenize_django_variable ( ) {
243+ let db = TestDatabase :: new ( ) ;
244+ let source = "{{ user.name|default:\" Anonymous\" |title }}" ;
245+ let mut lexer = Lexer :: new ( & db, source) ;
246+ let tokens = lexer. tokenize ( ) ;
247+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
248+ insta:: assert_yaml_snapshot!( snapshot) ;
249+ }
250+
251+ #[ test]
252+ fn test_tokenize_django_block ( ) {
253+ let db = TestDatabase :: new ( ) ;
254+ let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}" ;
255+ let mut lexer = Lexer :: new ( & db, source) ;
256+ let tokens = lexer. tokenize ( ) ;
257+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
258+ insta:: assert_yaml_snapshot!( snapshot) ;
259+ }
260+
261+ #[ test]
262+ fn test_tokenize_comments ( ) {
263+ let db = TestDatabase :: new ( ) ;
264+ let source = r"<!-- HTML comment -->
265+ {# Django comment #}
266+ <script>
267+ // JS single line comment
268+ /* JS multi-line
269+ comment */
270+ </script>
271+ <style>
272+ /* CSS comment */
273+ </style>" ;
274+ let mut lexer = Lexer :: new ( & db, source) ;
275+ let tokens = lexer. tokenize ( ) ;
276+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
277+ insta:: assert_yaml_snapshot!( snapshot) ;
278+ }
279+
280+ #[ test]
281+ fn test_tokenize_script ( ) {
282+ let db = TestDatabase :: new ( ) ;
283+ let source = r#"<script type="text/javascript">
284+ // Single line comment
285+ const x = 1;
286+ /* Multi-line
287+ comment */
288+ console.log(x);
289+ </script>"# ;
290+ let mut lexer = Lexer :: new ( & db, source) ;
291+ let tokens = lexer. tokenize ( ) ;
292+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
293+ insta:: assert_yaml_snapshot!( snapshot) ;
294+ }
295+
296+ #[ test]
297+ fn test_tokenize_style ( ) {
298+ let db = TestDatabase :: new ( ) ;
299+ let source = r#"<style type="text/css">
300+ /* Header styles */
301+ .header {
302+ color: blue;
303+ }
304+ </style>"# ;
305+ let mut lexer = Lexer :: new ( & db, source) ;
306+ let tokens = lexer. tokenize ( ) ;
307+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
308+ insta:: assert_yaml_snapshot!( snapshot) ;
309+ }
310+
311+ #[ test]
312+ fn test_tokenize_nested_delimiters ( ) {
313+ let db = TestDatabase :: new ( ) ;
314+ let source = r"{{ user.name }}
315+ {% if true %}
316+ {# comment #}
317+ <!-- html comment -->
318+ <div>text</div>" ;
319+ let mut lexer = Lexer :: new ( & db, source) ;
320+ let tokens = lexer. tokenize ( ) ;
321+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
322+ insta:: assert_yaml_snapshot!( snapshot) ;
323+ }
324+
325+ #[ test]
326+ fn test_tokenize_everything ( ) {
327+ let db = TestDatabase :: new ( ) ;
328+ let source = r#"<!DOCTYPE html>
329+ <html>
330+ <head>
331+ <style type="text/css">
332+ /* Style header */
333+ .header { color: blue; }
334+ </style>
335+ <script type="text/javascript">
336+ // Init app
337+ const app = {
338+ /* Config */
339+ debug: true
340+ };
341+ </script>
342+ </head>
343+ <body>
344+ <!-- Header section -->
345+ <div class="header" id="main" data-value="123" disabled>
346+ {% if user.is_authenticated %}
347+ {# Welcome message #}
348+ <h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
349+ {% if user.is_staff %}
350+ <span>Admin</span>
351+ {% else %}
352+ <span>User</span>
353+ {% endif %}
354+ {% endif %}
355+ </div>
356+ </body>
357+ </html>"# ;
358+ let mut lexer = Lexer :: new ( & db, source) ;
359+ let tokens = lexer. tokenize ( ) ;
360+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
361+ insta:: assert_yaml_snapshot!( snapshot) ;
362+ }
363+
364+ #[ test]
365+ fn test_tokenize_unclosed_style ( ) {
366+ let db = TestDatabase :: new ( ) ;
367+ let source = "<style>body { color: blue; " ;
368+ let mut lexer = Lexer :: new ( & db, source) ;
369+ let tokens = lexer. tokenize ( ) ;
370+ let snapshot = TokenSnapshotVec ( tokens) . to_snapshot ( & db) ;
371+ insta:: assert_yaml_snapshot!( snapshot) ;
190372 }
191373}
0 commit comments