Skip to content

Commit e60e879

Browse files
wip
1 parent 3a24bc3 commit e60e879

File tree

5 files changed

+360
-185
lines changed

5 files changed

+360
-185
lines changed

crates/djls-source/src/position.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,32 @@ impl Span {
3232
Self { start, length }
3333
}
3434

35+
#[must_use]
36+
pub fn expand(self, opening: u32, closing: u32) -> Self {
37+
let start_expand = self.start.saturating_sub(opening);
38+
let length_expand = opening + self.length + closing;
39+
Self::new(start_expand, length_expand)
40+
}
41+
42+
#[must_use]
43+
pub fn as_tuple(self) -> (u32, u32) {
44+
(self.start, self.length)
45+
}
46+
3547
/// Construct a span from integer bounds expressed as byte offsets.
3648
#[must_use]
3749
pub fn from_bounds(start: usize, end: usize) -> Self {
3850
let start_u32 = u32::try_from(start).unwrap_or(u32::MAX);
3951
let end_u32 = u32::try_from(end).unwrap_or(u32::MAX);
40-
let length = end_u32.saturating_sub(start_u32);
41-
Self::new(start_u32, length)
52+
let length_u32 = end_u32.saturating_sub(start_u32);
53+
Self::new(start_u32, length_u32)
54+
}
55+
56+
#[must_use]
57+
pub fn from_parts(start: usize, len: usize) -> Self {
58+
let start_u32 = u32::try_from(start).unwrap_or(u32::MAX);
59+
let length_u32 = u32::try_from(len).unwrap_or(u32::MAX.saturating_sub(start_u32));
60+
Span::new(start_u32, length_u32)
4261
}
4362

4463
#[must_use]

crates/djls-templates/src/lexer.rs

Lines changed: 218 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ use djls_source::Span;
33
use crate::db::Db as TemplateDb;
44
use crate::tokens::Token;
55
use crate::tokens::TokenContent;
6-
7-
const BLOCK_TAG_START: &str = "{%";
8-
const BLOCK_TAG_END: &str = "%}";
9-
const VARIABLE_TAG_START: &str = "{{";
10-
const VARIABLE_TAG_END: &str = "}}";
11-
const COMMENT_TAG_START: &str = "{#";
12-
const COMMENT_TAG_END: &str = "#}";
6+
use crate::tokens::BLOCK_TAG_END;
7+
use crate::tokens::BLOCK_TAG_START;
8+
use crate::tokens::COMMENT_TAG_END;
9+
use crate::tokens::COMMENT_TAG_START;
10+
use crate::tokens::DJANGO_TAG_LEN;
11+
use crate::tokens::VARIABLE_TAG_END;
12+
use crate::tokens::VARIABLE_TAG_START;
1313

1414
pub struct Lexer<'db> {
1515
db: &'db dyn TemplateDb,
@@ -35,23 +35,24 @@ impl<'db> Lexer<'db> {
3535
while !self.is_at_end() {
3636
self.start = self.current;
3737

38-
let token = match self.peek() {
39-
'{' => match self.peek_next() {
40-
'%' => self.lex_django_construct(BLOCK_TAG_END, |content, span| Token::Block {
41-
content,
42-
span,
43-
}),
44-
'{' => self.lex_django_construct(VARIABLE_TAG_END, |content, span| {
45-
Token::Variable { content, span }
46-
}),
47-
'#' => self.lex_django_construct(COMMENT_TAG_END, |content, span| {
48-
Token::Comment { content, span }
49-
}),
38+
let token =
39+
match self.peek() {
40+
'{' => match self.peek_next() {
41+
'%' => self.lex_django_tag(BLOCK_TAG_END, |content, span| Token::Block {
42+
content,
43+
span,
44+
}),
45+
'{' => self.lex_django_tag(VARIABLE_TAG_END, |content, span| {
46+
Token::Variable { content, span }
47+
}),
48+
'#' => self.lex_django_tag(COMMENT_TAG_END, |content, span| {
49+
Token::Comment { content, span }
50+
}),
51+
_ => self.lex_text(),
52+
},
53+
c if c.is_whitespace() => self.lex_whitespace(c),
5054
_ => self.lex_text(),
51-
},
52-
c if c.is_whitespace() => self.lex_whitespace(c),
53-
_ => self.lex_text(),
54-
};
55+
};
5556

5657
tokens.push(token);
5758
}
@@ -61,28 +62,30 @@ impl<'db> Lexer<'db> {
6162
tokens
6263
}
6364

64-
fn lex_django_construct(
65+
fn lex_django_tag(
6566
&mut self,
6667
end: &str,
6768
token_fn: impl FnOnce(TokenContent<'db>, Span) -> Token<'db>,
6869
) -> Token<'db> {
69-
let opening_len = 2;
70-
let content_start = self.start + opening_len;
71-
72-
self.consume_n(opening_len);
70+
let content_start = self.start + DJANGO_TAG_LEN as usize;
71+
self.consume_n(DJANGO_TAG_LEN as usize);
7372

7473
match self.consume_until(end) {
7574
Ok(text) => {
75+
let len = text.len();
7676
let content = TokenContent::new(self.db, text);
77-
let content_end = self.current;
78-
let span = Span::from_bounds(content_start, content_end);
77+
let span = Span::from_parts(content_start, len);
7978
self.consume_n(end.len());
8079
token_fn(content, span)
8180
}
8281
Err(err_text) => {
83-
let content_end = self.current;
84-
let span = Span::from_bounds(content_start, content_end);
82+
let len = err_text.len();
8583
let content = TokenContent::new(self.db, err_text);
84+
let span = if len == 0 {
85+
Span::from_bounds(content_start, self.current)
86+
} else {
87+
Span::from_parts(content_start, len)
88+
};
8689
Token::Error { content, span }
8790
}
8891
}
@@ -134,6 +137,7 @@ impl<'db> Lexer<'db> {
134137
self.source[self.current..].chars().next().unwrap_or('\0')
135138
}
136139

140+
#[inline]
137141
fn peek_next(&self) -> char {
138142
let mut chars = self.source[self.current..].chars();
139143
chars.next(); // Skip current
@@ -183,9 +187,187 @@ impl<'db> Lexer<'db> {
183187
self.consume();
184188
}
185189

186-
let end = fallback.unwrap_or(self.current);
187-
let text = self.source[offset..end].to_string();
188-
self.current = end;
189-
Err(text)
190+
self.current = fallback.unwrap_or(self.current);
191+
Err(self.source[offset..self.current].to_string())
192+
}
193+
}
194+
195+
#[cfg(test)]
196+
mod tests {
197+
use camino::Utf8Path;
198+
199+
use super::*;
200+
use crate::tokens::TokenSnapshotVec;
201+
202+
#[salsa::db]
203+
#[derive(Clone)]
204+
struct TestDatabase {
205+
storage: salsa::Storage<Self>,
206+
}
207+
208+
impl TestDatabase {
209+
fn new() -> Self {
210+
Self {
211+
storage: salsa::Storage::default(),
212+
}
213+
}
214+
}
215+
216+
#[salsa::db]
217+
impl salsa::Database for TestDatabase {}
218+
219+
#[salsa::db]
220+
impl djls_source::Db for TestDatabase {
221+
fn read_file_source(&self, path: &Utf8Path) -> Result<String, std::io::Error> {
222+
std::fs::read_to_string(path)
223+
}
224+
}
225+
226+
#[salsa::db]
227+
impl crate::db::Db for TestDatabase {
228+
// Template parsing only - semantic analysis moved to djls-semantic
229+
}
230+
231+
#[test]
232+
fn test_tokenize_html() {
233+
let db = TestDatabase::new();
234+
let source = r#"<div class="container" id="main" disabled></div>"#;
235+
let mut lexer = Lexer::new(&db, source);
236+
let tokens = lexer.tokenize();
237+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
238+
insta::assert_yaml_snapshot!(snapshot);
239+
}
240+
241+
#[test]
242+
fn test_tokenize_django_variable() {
243+
let db = TestDatabase::new();
244+
let source = "{{ user.name|default:\"Anonymous\"|title }}";
245+
let mut lexer = Lexer::new(&db, source);
246+
let tokens = lexer.tokenize();
247+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
248+
insta::assert_yaml_snapshot!(snapshot);
249+
}
250+
251+
#[test]
252+
fn test_tokenize_django_block() {
253+
let db = TestDatabase::new();
254+
let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}";
255+
let mut lexer = Lexer::new(&db, source);
256+
let tokens = lexer.tokenize();
257+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
258+
insta::assert_yaml_snapshot!(snapshot);
259+
}
260+
261+
#[test]
262+
fn test_tokenize_comments() {
263+
let db = TestDatabase::new();
264+
let source = r"<!-- HTML comment -->
265+
{# Django comment #}
266+
<script>
267+
// JS single line comment
268+
/* JS multi-line
269+
comment */
270+
</script>
271+
<style>
272+
/* CSS comment */
273+
</style>";
274+
let mut lexer = Lexer::new(&db, source);
275+
let tokens = lexer.tokenize();
276+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
277+
insta::assert_yaml_snapshot!(snapshot);
278+
}
279+
280+
#[test]
281+
fn test_tokenize_script() {
282+
let db = TestDatabase::new();
283+
let source = r#"<script type="text/javascript">
284+
// Single line comment
285+
const x = 1;
286+
/* Multi-line
287+
comment */
288+
console.log(x);
289+
</script>"#;
290+
let mut lexer = Lexer::new(&db, source);
291+
let tokens = lexer.tokenize();
292+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
293+
insta::assert_yaml_snapshot!(snapshot);
294+
}
295+
296+
#[test]
297+
fn test_tokenize_style() {
298+
let db = TestDatabase::new();
299+
let source = r#"<style type="text/css">
300+
/* Header styles */
301+
.header {
302+
color: blue;
303+
}
304+
</style>"#;
305+
let mut lexer = Lexer::new(&db, source);
306+
let tokens = lexer.tokenize();
307+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
308+
insta::assert_yaml_snapshot!(snapshot);
309+
}
310+
311+
#[test]
312+
fn test_tokenize_nested_delimiters() {
313+
let db = TestDatabase::new();
314+
let source = r"{{ user.name }}
315+
{% if true %}
316+
{# comment #}
317+
<!-- html comment -->
318+
<div>text</div>";
319+
let mut lexer = Lexer::new(&db, source);
320+
let tokens = lexer.tokenize();
321+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
322+
insta::assert_yaml_snapshot!(snapshot);
323+
}
324+
325+
#[test]
326+
fn test_tokenize_everything() {
327+
let db = TestDatabase::new();
328+
let source = r#"<!DOCTYPE html>
329+
<html>
330+
<head>
331+
<style type="text/css">
332+
/* Style header */
333+
.header { color: blue; }
334+
</style>
335+
<script type="text/javascript">
336+
// Init app
337+
const app = {
338+
/* Config */
339+
debug: true
340+
};
341+
</script>
342+
</head>
343+
<body>
344+
<!-- Header section -->
345+
<div class="header" id="main" data-value="123" disabled>
346+
{% if user.is_authenticated %}
347+
{# Welcome message #}
348+
<h1>Welcome, {{ user.name|default:"Guest"|title }}!</h1>
349+
{% if user.is_staff %}
350+
<span>Admin</span>
351+
{% else %}
352+
<span>User</span>
353+
{% endif %}
354+
{% endif %}
355+
</div>
356+
</body>
357+
</html>"#;
358+
let mut lexer = Lexer::new(&db, source);
359+
let tokens = lexer.tokenize();
360+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
361+
insta::assert_yaml_snapshot!(snapshot);
362+
}
363+
364+
#[test]
365+
fn test_tokenize_unclosed_style() {
366+
let db = TestDatabase::new();
367+
let source = "<style>body { color: blue; ";
368+
let mut lexer = Lexer::new(&db, source);
369+
let tokens = lexer.tokenize();
370+
let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db);
371+
insta::assert_yaml_snapshot!(snapshot);
190372
}
191373
}

crates/djls-templates/src/nodelist.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ use djls_source::Span;
22

33
use crate::db::Db as TemplateDb;
44
use crate::parser::ParseError;
5-
6-
const DJANGO_DELIM_LEN: u32 = 2;
5+
use crate::tokens::DJANGO_TAG_LEN;
76

87
#[salsa::tracked(debug)]
98
pub struct NodeList<'db> {
@@ -52,7 +51,7 @@ impl<'db> Node<'db> {
5251
pub fn full_span(&self) -> Span {
5352
match self {
5453
Node::Variable { span, .. } | Node::Comment { span, .. } | Node::Tag { span, .. } => {
55-
expand_with_delimiters(*span, DJANGO_DELIM_LEN, DJANGO_DELIM_LEN)
54+
span.expand(DJANGO_TAG_LEN, DJANGO_TAG_LEN)
5655
}
5756
Node::Text { span, .. } => *span,
5857
Node::Error { node } => node.full_span,
@@ -82,14 +81,6 @@ impl<'db> Node<'db> {
8281
}
8382
}
8483

85-
fn expand_with_delimiters(span: Span, opening: u32, closing: u32) -> Span {
86-
let start = span.start.saturating_sub(opening);
87-
Span {
88-
start,
89-
length: opening + span.length + closing,
90-
}
91-
}
92-
9384
#[derive(Clone, Debug, PartialEq, Eq, salsa::Update)]
9485
pub struct ErrorNode {
9586
pub span: Span,

0 commit comments

Comments
 (0)