Skip to content

Commit 5971c23

Browse files
refactor and cleanup parsing logic (#10)
just a little bit
1 parent 0c230c3 commit 5971c23

File tree

1 file changed

+97
-95
lines changed

1 file changed

+97
-95
lines changed

crates/djls-ast/src/parser.rs

Lines changed: 97 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,54 +19,65 @@ impl Parser {
1919

2020
pub fn parse(&mut self) -> Result<Ast, ParserError> {
2121
let mut ast = Ast::default();
22-
2322
while !self.is_at_end() {
24-
let node = self.next_node();
25-
match node {
23+
match self.next_node() {
2624
Ok(node) => {
2725
ast.add_node(node);
2826
}
29-
Err(ParserError::AtEndOfStream) => {
27+
Err(ParserError::StreamError(Stream::AtEnd)) => {
3028
if ast.nodes().is_empty() {
31-
return Err(ParserError::UnexpectedEof);
29+
return Err(ParserError::StreamError(Stream::UnexpectedEof));
3230
}
3331
break;
3432
}
3533
Err(_) => {
36-
self.synchronize(&[
37-
TokenType::DjangoBlock(String::new()),
38-
TokenType::HtmlTagOpen(String::new()),
39-
TokenType::HtmlTagVoid(String::new()),
40-
TokenType::ScriptTagOpen(String::new()),
41-
TokenType::StyleTagOpen(String::new()),
42-
TokenType::Newline,
43-
TokenType::Eof,
44-
])?;
34+
self.synchronize()?;
4535
continue;
4636
}
4737
}
4838
}
49-
50-
Ok(ast.finalize()?)
39+
ast.finalize()?;
40+
Ok(ast)
5141
}
5242

5343
fn next_node(&mut self) -> Result<Node, ParserError> {
54-
let token = self.peek()?;
44+
let token = self.consume()?;
5545
let node = match token.token_type() {
5646
TokenType::Comment(s, start, end) => self.parse_comment(s, start, end.as_deref()),
5747
TokenType::DjangoBlock(s) => self.parse_django_block(s),
5848
TokenType::DjangoVariable(s) => self.parse_django_variable(s),
59-
TokenType::Eof => self.parse_eof(),
60-
TokenType::HtmlTagClose(tag) => Err(ParserError::ClosingTagFound(tag.to_string())),
49+
TokenType::Eof => {
50+
if self.is_at_end() {
51+
self.next_node()
52+
} else {
53+
Err(ParserError::StreamError(Stream::UnexpectedEof))
54+
}
55+
}
56+
TokenType::HtmlTagClose(tag) => {
57+
self.backtrack(1)?;
58+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(
59+
tag.to_string(),
60+
)))
61+
}
6162
TokenType::HtmlTagOpen(s) => self.parse_html_tag_open(s),
6263
TokenType::HtmlTagVoid(s) => self.parse_html_tag_void(s),
63-
TokenType::Newline => self.parse_newline(),
64+
TokenType::Newline => self.next_node(),
65+
TokenType::ScriptTagClose(_) => {
66+
self.backtrack(1)?;
67+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(
68+
"script".to_string(),
69+
)))
70+
}
6471
TokenType::ScriptTagOpen(s) => self.parse_script_tag_open(s),
65-
TokenType::ScriptTagClose(_) => Err(ParserError::ClosingTagFound("script".to_string())),
72+
TokenType::StyleTagClose(_) => {
73+
self.backtrack(1)?;
74+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(
75+
"style".to_string(),
76+
)))
77+
}
6678
TokenType::StyleTagOpen(s) => self.parse_style_tag_open(s),
67-
TokenType::StyleTagClose(_) => Err(ParserError::ClosingTagFound("style".to_string())),
68-
TokenType::Text(s) => self.parse_text(s),
69-
TokenType::Whitespace(_) => self.parse_whitespace(),
79+
TokenType::Text(s) => Ok(Node::Text(s.to_string())),
80+
TokenType::Whitespace(_) => self.next_node(),
7081
}?;
7182
Ok(node)
7283
}
@@ -77,8 +88,6 @@ impl Parser {
7788
start: &str,
7889
end: Option<&str>,
7990
) -> Result<Node, ParserError> {
80-
self.consume()?;
81-
8291
match start {
8392
"{#" => Ok(Node::Django(DjangoNode::Comment(content.to_string()))),
8493
"<!--" => Ok(Node::Html(HtmlNode::Comment(content.to_string()))),
@@ -125,14 +134,13 @@ impl Parser {
125134
}
126135

127136
fn parse_django_block(&mut self, s: &str) -> Result<Node, ParserError> {
128-
self.consume()?;
129-
130137
let bits: Vec<String> = s.split_whitespace().map(String::from).collect();
131138
let kind = DjangoTagKind::from_str(&bits[0])?;
132139

133-
// If this is an end tag, signal it like we do with HTML closing tags
134140
if bits[0].starts_with("end") {
135-
return Err(ParserError::ClosingTagFound(bits[0].clone()));
141+
return Err(ParserError::ErrorSignal(Signal::ClosingTagFound(
142+
bits[0].clone(),
143+
)));
136144
}
137145

138146
let mut children = Vec::new();
@@ -141,10 +149,9 @@ impl Parser {
141149
while !self.is_at_end() {
142150
match self.next_node() {
143151
Ok(node) => {
144-
println!("found django child node: {:?}", node);
145152
children.push(node);
146153
}
147-
Err(ParserError::ClosingTagFound(tag)) => {
154+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => {
148155
if tag == end_tag {
149156
self.consume()?;
150157
break;
@@ -163,8 +170,6 @@ impl Parser {
163170
}
164171

165172
fn parse_django_variable(&mut self, s: &str) -> Result<Node, ParserError> {
166-
self.consume()?;
167-
168173
let parts: Vec<&str> = s.split('|').collect();
169174

170175
let bits: Vec<String> = parts[0].trim().split('.').map(String::from).collect();
@@ -192,23 +197,12 @@ impl Parser {
192197
Ok(Node::Django(DjangoNode::Variable { bits, filters }))
193198
}
194199

195-
fn parse_eof(&mut self) -> Result<Node, ParserError> {
196-
if self.is_at_end() {
197-
self.consume()?;
198-
self.next_node()
199-
} else {
200-
Err(ParserError::UnexpectedEof)
201-
}
202-
}
203-
204200
fn parse_html_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
205-
self.consume()?;
206-
207201
let mut parts = s.split_whitespace();
208202

209203
let tag_name = parts
210204
.next()
211-
.ok_or(ParserError::InvalidTokenAccess)?
205+
.ok_or(ParserError::StreamError(Stream::InvalidAccess))?
212206
.to_string();
213207

214208
let mut attributes = BTreeMap::new();
@@ -233,7 +227,7 @@ impl Parser {
233227
Ok(node) => {
234228
children.push(node);
235229
}
236-
Err(ParserError::ClosingTagFound(tag)) => {
230+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => {
237231
if tag == tag_name {
238232
self.consume()?;
239233
break;
@@ -251,12 +245,11 @@ impl Parser {
251245
}
252246

253247
fn parse_html_tag_void(&mut self, s: &str) -> Result<Node, ParserError> {
254-
self.consume()?;
255248
let mut parts = s.split_whitespace();
256249

257250
let tag_name = parts
258251
.next()
259-
.ok_or(ParserError::InvalidTokenAccess)?
252+
.ok_or(ParserError::StreamError(Stream::InvalidAccess))?
260253
.to_string();
261254

262255
let mut attributes = BTreeMap::new();
@@ -278,14 +271,7 @@ impl Parser {
278271
}))
279272
}
280273

281-
fn parse_newline(&mut self) -> Result<Node, ParserError> {
282-
self.consume()?;
283-
self.next_node()
284-
}
285-
286274
fn parse_script_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
287-
self.consume()?;
288-
289275
let parts = s.split_whitespace();
290276

291277
let mut attributes = BTreeMap::new();
@@ -308,7 +294,7 @@ impl Parser {
308294
Ok(node) => {
309295
children.push(node);
310296
}
311-
Err(ParserError::ClosingTagFound(tag)) => {
297+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => {
312298
if tag == "script" {
313299
self.consume()?;
314300
break;
@@ -326,13 +312,11 @@ impl Parser {
326312
}
327313

328314
fn parse_style_tag_open(&mut self, s: &str) -> Result<Node, ParserError> {
329-
self.consume()?;
330-
331315
let mut parts = s.split_whitespace();
332316

333317
let _tag_name = parts
334318
.next()
335-
.ok_or(ParserError::InvalidTokenAccess)?
319+
.ok_or(ParserError::StreamError(Stream::InvalidAccess))?
336320
.to_string();
337321

338322
let mut attributes = BTreeMap::new();
@@ -355,7 +339,7 @@ impl Parser {
355339
Ok(node) => {
356340
children.push(node);
357341
}
358-
Err(ParserError::ClosingTagFound(tag)) => {
342+
Err(ParserError::ErrorSignal(Signal::ClosingTagFound(tag))) => {
359343
if tag == "style" {
360344
self.consume()?;
361345
break;
@@ -372,17 +356,6 @@ impl Parser {
372356
}))
373357
}
374358

375-
fn parse_text(&mut self, s: &str) -> Result<Node, ParserError> {
376-
self.consume()?;
377-
378-
Ok(Node::Text(s.to_string()))
379-
}
380-
381-
fn parse_whitespace(&mut self) -> Result<Node, ParserError> {
382-
self.consume()?;
383-
self.next_node()
384-
}
385-
386359
fn peek(&self) -> Result<Token, ParserError> {
387360
self.peek_at(0)
388361
}
@@ -413,13 +386,13 @@ impl Parser {
413386
Ok(token.clone())
414387
} else {
415388
let error = if self.tokens.is_empty() {
416-
ParserError::EmptyTokenStream
389+
ParserError::StreamError(Stream::Empty)
417390
} else if index < self.current {
418-
ParserError::AtBeginningOfStream
391+
ParserError::StreamError(Stream::AtBeginning)
419392
} else if index >= self.tokens.len() {
420-
ParserError::AtEndOfStream
393+
ParserError::StreamError(Stream::AtEnd)
421394
} else {
422-
ParserError::InvalidTokenAccess
395+
ParserError::StreamError(Stream::InvalidAccess)
423396
};
424397
Err(error)
425398
}
@@ -431,15 +404,15 @@ impl Parser {
431404

432405
fn consume(&mut self) -> Result<Token, ParserError> {
433406
if self.is_at_end() {
434-
return Err(ParserError::AtEndOfStream);
407+
return Err(ParserError::StreamError(Stream::AtEnd));
435408
}
436409
self.current += 1;
437410
self.peek_previous()
438411
}
439412

440413
fn backtrack(&mut self, steps: usize) -> Result<Token, ParserError> {
441414
if self.current < steps {
442-
return Err(ParserError::AtBeginningOfStream);
415+
return Err(ParserError::StreamError(Stream::AtBeginning));
443416
}
444417
self.current -= steps;
445418
self.peek_next()
@@ -475,9 +448,19 @@ impl Parser {
475448
Ok(consumed)
476449
}
477450

478-
fn synchronize(&mut self, sync_types: &[TokenType]) -> Result<(), ParserError> {
451+
fn synchronize(&mut self) -> Result<(), ParserError> {
452+
const SYNC_TYPES: &[TokenType] = &[
453+
TokenType::DjangoBlock(String::new()),
454+
TokenType::HtmlTagOpen(String::new()),
455+
TokenType::HtmlTagVoid(String::new()),
456+
TokenType::ScriptTagOpen(String::new()),
457+
TokenType::StyleTagOpen(String::new()),
458+
TokenType::Newline,
459+
TokenType::Eof,
460+
];
461+
479462
while !self.is_at_end() {
480-
if sync_types.contains(self.peek()?.token_type()) {
463+
if SYNC_TYPES.contains(self.peek()?.token_type()) {
481464
return Ok(());
482465
}
483466
self.consume()?;
@@ -488,28 +471,44 @@ impl Parser {
488471

489472
#[derive(Error, Debug)]
490473
pub enum ParserError {
491-
#[error("token stream is empty")]
492-
EmptyTokenStream,
493-
#[error("at beginning of token stream")]
494-
AtBeginningOfStream,
495-
#[error("at end of token stream")]
496-
AtEndOfStream,
497-
#[error("invalid token access")]
498-
InvalidTokenAccess,
474+
#[error("token stream {0}")]
475+
StreamError(Stream),
476+
#[error("parsing signal: {0:?}")]
477+
ErrorSignal(Signal),
499478
#[error("unexpected token '{0:?}', expected type '{1:?}'")]
500479
ExpectedTokenType(Token, TokenType),
501480
#[error("unexpected token '{0:?}'")]
502481
UnexpectedToken(Token),
503-
#[error("unexpected end tag: {0}")]
504-
UnexpectedEndTag(String),
505482
#[error("multi-line comment outside of script or style context")]
506483
InvalidMultLineComment,
507-
#[error("unexpected end of file")]
484+
#[error(transparent)]
485+
Ast(#[from] AstError),
486+
}
487+
488+
#[derive(Debug)]
489+
pub enum Stream {
490+
Empty,
491+
AtBeginning,
492+
AtEnd,
508493
UnexpectedEof,
509-
#[error("found closing tag: {0}")]
494+
InvalidAccess,
495+
}
496+
497+
#[derive(Debug)]
498+
pub enum Signal {
510499
ClosingTagFound(String),
511-
#[error(transparent)]
512-
Node(#[from] AstError),
500+
}
501+
502+
impl std::fmt::Display for Stream {
503+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
504+
match self {
505+
Self::Empty => write!(f, "is empty"),
506+
Self::AtBeginning => write!(f, "at beginning"),
507+
Self::AtEnd => write!(f, "at end"),
508+
Self::UnexpectedEof => write!(f, "unexpected end of file"),
509+
Self::InvalidAccess => write!(f, "invalid access"),
510+
}
511+
}
513512
}
514513

515514
#[cfg(test)]
@@ -633,6 +632,9 @@ mod tests {
633632
let tokens = Lexer::new(source).tokenize().unwrap();
634633
let mut parser = Parser::new(tokens);
635634
let ast = parser.parse();
636-
assert!(matches!(ast, Err(ParserError::UnexpectedEof)));
635+
assert!(matches!(
636+
ast,
637+
Err(ParserError::StreamError(Stream::UnexpectedEof))
638+
));
637639
}
638640
}

0 commit comments

Comments
 (0)