Add parse_tree! macro

ictrobot · ictrobot · commit d32c7eb4a5f0 · 2024-11-03T20:52:02.000Z
diff --git a/crates/utils/src/parser/macros.rs b/crates/utils/src/parser/macros.rs
@@ -1,4 +1,4 @@
-/// Parse one or more string literals, mapping the results.
+/// Macro to define a parser for one or more string literals, mapping the results.
 ///
 /// This is a replacement for
 /// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
@@ -55,3 +55,163 @@ macro_rules! parser_literal_map {
         $crate::parser::ParseError::ExpectedLiteral($first)
     };
 }
+
+/// Macro to define a custom parser using a `match` inspired parse tree syntax.
+///
+/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
+/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
+/// the supplied variable, similar to normal match patterns.
+///
+/// After the list of parsers, there is an arrow determining the functionality of the rule when the
+/// parsers match:
+/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
+/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
+///         the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise,
+///         the string contained inside the [`Err`] is handled as a custom
+///         [`ParseError`](super::ParseError), and parsing will continue with the following rule.
+/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
+///
+/// If none of the rules match successfully, the error from the rule which parsed furthest into
+/// the input is returned.
+///
+/// # Examples
+/// ```
+/// # use utils::parser::{self, Parser};
+/// #
+/// #[derive(Debug, PartialEq)]
+/// enum Register {
+///     A, B, C
+/// }
+///
+/// #[derive(Debug, PartialEq)]
+/// enum Instruction {
+///     Add(Register, Register),
+///     AddConstant(Register, i32),
+///     Copy(Register, Register),
+///     Noop,
+/// }
+///
+/// let register = parser::literal_map!(
+///     "A" => Register::A, "B" => Register::B, "C" => Register::C,
+/// );
+///
+/// let instruction = parser::parse_tree!(
+///     ("add ", r @ register, ", ") =>> {
+///         (r2 @ register) => Instruction::Add(r, r2),
+///         (v @ parser::i32()) => Instruction::AddConstant(r, v),
+///     },
+///     ("copy ", r @ register, ", ", r2 @ register) =?> {
+///         if r == r2 {
+///             Err("cannot copy register to itself")
+///         } else {
+///             Ok(Instruction::Copy(r, r2))
+///         }
+///     },
+///     ("noop") => Instruction::Noop,
+/// );
+///
+/// assert_eq!(
+///     instruction.parse_complete("add A, B").unwrap(),
+///     Instruction::Add(Register::A, Register::B)
+/// );
+/// assert_eq!(
+///     instruction.parse_complete("add C, 100").unwrap(),
+///     Instruction::AddConstant(Register::C, 100)
+/// );
+/// assert_eq!(
+///     instruction.parse_complete("copy A, B").unwrap(),
+///     Instruction::Copy(Register::A, Register::B)
+/// );
+/// assert!(instruction
+///     .parse_complete("copy A, A")
+///     .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
+/// ```
+#[macro_export]
+macro_rules! parser_parse_tree {
+    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr $rhs:expr) => {
+        return Ok(($rhs, $input));
+    };
+    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr_res $rhs:expr) => {
+        match $rhs {
+            Ok(v) => return Ok((v, $input)),
+            Err(e) => {
+                if $input.len() < $furthest_remaining {
+                    $furthest_err = $crate::parser::ParseError::Custom(e);
+                    $furthest_remaining = $input.len();
+                }
+            }
+        };
+    };
+    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @subtree $($rhs:tt)+) => {
+        $crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($rhs)+);
+    };
+
+    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
+        [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
+    ) => {
+        match $crate::parser::Parser::parse(&($lhs), $input) {
+            Ok(($n, $input)) => {
+                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
+                    [$($($tail)*)?] $($rhs)+
+                );
+            },
+            Err((err, remaining)) => {
+                if remaining.len() < $furthest_remaining {
+                    $furthest_err = err;
+                    $furthest_remaining = remaining.len();
+                }
+            }
+        };
+    };
+    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
+        [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
+    ) => {
+        match $crate::parser::Parser::parse(&($lhs), $input) {
+            Ok((_, $input)) => {
+                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
+                    [$($($tail)*)?] $($rhs)+
+                );
+            },
+            Err((err, remaining)) => {
+                if remaining.len() < $furthest_remaining {
+                    $furthest_err = err;
+                    $furthest_remaining = remaining.len();
+                }
+            }
+        };
+    };
+
+    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
+        ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
+    ) => {
+        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr $rhs);
+        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
+    };
+    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
+        ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
+    ) => {
+        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr_res $rhs);
+        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
+    };
+    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
+        ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
+    ) => {
+        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @subtree $($rhs)+);
+        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
+    };
+    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident $(,)?) => {};
+
+    // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
+    (($($first:tt)+) $($tail:tt)+) => {{
+        fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
+
+        coerce_to_parser(|input| {
+            let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");
+            let mut furthest_remaining = usize::MAX;
+
+            $crate::parser_parse_tree!(@toplevel input furthest_err furthest_remaining ($($first)+) $($tail)+);
+
+            Err((furthest_err, &input[input.len() - furthest_remaining..]))
+        })
+    }};
+}
diff --git a/crates/utils/src/parser/mod.rs b/crates/utils/src/parser/mod.rs
@@ -18,3 +18,4 @@ pub use one_of::one_of;
 pub use simple::{byte, byte_range, constant, eof, eol, noop, take_while, take_while1};
 
 pub use crate::parser_literal_map as literal_map;
+pub use crate::parser_parse_tree as parse_tree;
diff --git a/crates/year2015/src/day23.rs b/crates/year2015/src/day23.rs
@@ -24,23 +24,20 @@ enum Instruction {
 
 impl Day23 {
     pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
-        let register = parser::literal_map!("a" => Register::A, "b" => Register::B);
+        let register = parser::literal_map!(
+            "a" => Register::A,
+            "b" => Register::B,
+        );
 
         Ok(Self {
-            instructions: parser::one_of((
-                register.with_prefix("hlf ").map(Instruction::Half),
-                register.with_prefix("tpl ").map(Instruction::Triple),
-                register.with_prefix("inc ").map(Instruction::Increment),
-                parser::i16().with_prefix("jmp ").map(Instruction::Jump),
-                register
-                    .with_prefix("jie ")
-                    .then(parser::i16().with_prefix(", "))
-                    .map(|(r, o)| Instruction::JumpIfEven(r, o)),
-                register
-                    .with_prefix("jio ")
-                    .then(parser::i16().with_prefix(", "))
-                    .map(|(r, o)| Instruction::JumpIfOne(r, o)),
-            ))
+            instructions: parser::parse_tree!(
+                ("hlf ", r @ register) => Instruction::Half(r),
+                ("tpl ", r @ register) => Instruction::Triple(r),
+                ("inc ", r @ register) => Instruction::Increment(r),
+                ("jmp ", v @ parser::i16()) => Instruction::Jump(v),
+                ("jie ", r @ register, ", ", o @ parser::i16()) => Instruction::JumpIfEven(r, o),
+                ("jio ", r @ register, ", ", o @ parser::i16()) => Instruction::JumpIfOne(r, o),
+            )
             .parse_lines(input)?,
         })
     }
diff --git a/crates/year2016/src/assembunny.rs b/crates/year2016/src/assembunny.rs
@@ -48,32 +48,26 @@ impl<const TGL: bool, const OUT: bool> Interpreter<TGL, OUT> {
             .or(parser::i32().map(Value::Number));
 
         Ok(Self {
-            instructions: parser::one_of((
-                register.with_prefix("inc ").map(Instruction::Increment),
-                register.with_prefix("dec ").map(Instruction::Decrement),
-                value
-                    .with_prefix("cpy ")
-                    .then(register.with_prefix(" "))
-                    .map(|(v, r)| Instruction::Copy(v, r)),
-                value
-                    .with_prefix("jnz ")
-                    .then(value.with_prefix(" "))
-                    .map(|(v, o)| Instruction::JumpIfNotZero(v, o)),
-                register.with_prefix("tgl ").map_res(|r| {
+            instructions: parser::parse_tree!(
+                ("inc ", r @ register) => Instruction::Increment(r),
+                ("dec ", r @ register) => Instruction::Decrement(r),
+                ("cpy ", v @ value, " ", r @ register) => Instruction::Copy(v, r),
+                ("jnz ", v @ value, " ", o @ value) => Instruction::JumpIfNotZero(v, o),
+                ("tgl ", r @ register) =?> {
                     if TGL {
                         Ok(Instruction::Toggle(r))
                     } else {
                         Err("tgl instruction not supported")
                     }
-                }),
-                register.with_prefix("out ").map_res(|r| {
+                },
+                ("out ", r @ register) =?> {
                     if OUT {
                         Ok(Instruction::Out(r))
                     } else {
                         Err("out instruction not supported")
                     }
-                }),
-            ))
+                },
+            )
             .parse_lines(input)?,
         })
     }

Original file line number	Diff line number	Diff line change
`@@ -18,3 +18,4 @@ pub use one_of::one_of;`
`18`	`18`	`pub use simple::{byte, byte_range, constant, eof, eol, noop, take_while, take_while1};`
`19`	`19`
`20`	`20`	`pub use crate::parser_literal_map as literal_map;`
	`21`	`+pub use crate::parser_parse_tree as parse_tree;`