Add newline to grammer to explicitly address end-of-command

jmbeck15 · jmbeck15 · commit c70543a032e5 · 2022-08-17T14:21:03.000+02:00
Fix skip_whitespace test and add respect_newlines test.
diff --git a/gcode/src/lexer.rs b/gcode/src/lexer.rs
@@ -5,6 +5,7 @@ pub(crate) enum TokenType {
     Letter,
     Number,
     Comment,
+    Newline,
     Unknown,
 }
 
@@ -16,6 +17,8 @@ impl From<char> for TokenType {
             TokenType::Number
         } else if c == '(' || c == ';' || c == ')' {
             TokenType::Comment
+        } else if c == '\n' {
+            TokenType::Newline
         } else {
             TokenType::Unknown
         }
@@ -53,14 +56,14 @@ impl<'input> Lexer<'input> {
     {
         let start = self.current_position;
         let mut end = start;
-        let mut line_endings = 0;
 
         for letter in self.rest().chars() {
             if !predicate(letter) {
                 break;
             }
             if letter == '\n' {
-                line_endings += 1;
+                // Newline defines the command to be complete.
+                break;
             }
             end += letter.len_utf8();
         }
@@ -69,7 +72,6 @@ impl<'input> Lexer<'input> {
             None
         } else {
             self.current_position = end;
-            self.current_line += line_endings;
             Some(&self.src[start..end])
         }
     }
@@ -175,6 +177,23 @@ impl<'input> Lexer<'input> {
             },
         })
     }
+    
+    fn tokenize_newline(&mut self) -> Option<Token<'input>> {
+        let start = self.current_position;
+        let line = self.current_line;
+        let value = "\n";
+        self.current_position += 1;
+        self.current_line += 1;
+        Some(Token {
+            kind: TokenType::Newline,
+            value,
+            span: Span {
+                start,
+                line,
+                end: start + 1,
+            },
+        })
+    }
 
     fn finished(&self) -> bool { self.current_position >= self.src.len() }
 
@@ -219,6 +238,9 @@ impl<'input> Iterator for Lexer<'input> {
                 TokenType::Number => {
                     return Some(self.tokenize_number().expect(MSG))
                 },
+                TokenType::Newline => {
+                    return Some(self.tokenize_newline().expect(MSG))
+                },
                 TokenType::Unknown => self.current_position += 1,
             }
         }
@@ -253,11 +275,22 @@ mod tests {
 
     #[test]
     fn skip_whitespace() {
-        let mut lexer = Lexer::new("  \n\r\t  ");
+        let mut lexer = Lexer::new("  \r\t  ");
 
         lexer.skip_whitespace();
 
         assert_eq!(lexer.current_position, lexer.src.len());
+        assert_eq!(lexer.current_line, 0);
+    }
+
+    #[test]
+    fn respect_newlines() {
+        let mut lexer = Lexer::new("\n\rM30garbage");
+
+        let token = lexer.tokenize_newline();
+        assert_eq!(token.expect("Failed.").kind, TokenType::Newline);
+
+        assert_eq!(lexer.current_position, 1);
         assert_eq!(lexer.current_line, 1);
     }
 
diff --git a/gcode/src/parser.rs b/gcode/src/parser.rs
@@ -219,11 +219,13 @@ where
         // constructing
         let mut temp_gcode = None;
 
-        while let Some(next_line) = self.next_line_number() {
-            if !line.is_empty() && next_line != line.span().line {
-                // we've started the next line
-                break;
-            }
+        if let None = self.atoms.peek() {
+            // There is nothing left in the file. :sad-face:
+            // This ends the parser's work.
+            return None;
+        }
+
+        while let Some(_next_line) = self.next_line_number() {
 
             match self.atoms.next().expect("unreachable") {
                 Atom::Unknown(token) => {
@@ -234,6 +236,13 @@ where
                         self.on_comment_push_error(e.0);
                     }
                 },
+                Atom::Newline(_) => {
+                    if !line.is_empty() {
+                        // Newline ends the current command
+                        // if there was something to parse.
+                        break;
+                    }
+                },
                 // line numbers are annoying, so handle them separately
                 Atom::Word(word) if word.letter.to_ascii_lowercase() == 'n' => {
                     self.handle_line_number(
@@ -255,11 +264,11 @@ where
             }
         }
 
-        if line.is_empty() {
-            None
-        } else {
-            Some(line)
-        }
+        // TODO: This should exit the parser under some conditions.
+        // IS M2 or M30: see 3.6.1.
+        // return None;
+
+        return Some(line);
     }
 }
 
@@ -422,7 +431,6 @@ mod tests {
     /// For some reason we were parsing the G90, then an empty G01 and the
     /// actual G01.
     #[test]
-    #[ignore]
     fn funny_bug_in_crate_example() {
         let src = "G90 \n G01 X50.0 Y-10";
         let expected = vec![
diff --git a/gcode/src/words.rs b/gcode/src/words.rs
@@ -42,6 +42,7 @@ impl Display for Word {
 pub(crate) enum Atom<'input> {
     Word(Word),
     Comment(Comment<'input>),
+    Newline(Token<'input>),
     /// Incomplete parts of a [`Word`].
     BrokenWord(Token<'input>),
     /// Garbage from the tokenizer (see [`TokenType::Unknown`]).
@@ -53,6 +54,7 @@ impl<'input> Atom<'input> {
         match self {
             Atom::Word(word) => word.span,
             Atom::Comment(comment) => comment.span,
+            Atom::Newline(newline) => newline.span,
             Atom::Unknown(token) | Atom::BrokenWord(token) => token.span,
         }
     }
@@ -90,6 +92,7 @@ where
 
             match kind {
                 TokenType::Unknown => return Some(Atom::Unknown(token)),
+                TokenType::Newline => return Some(Atom::Newline(token)),
                 TokenType::Comment => {
                     return Some(Atom::Comment(Comment { value, span }))
                 },