diff --git a/Cargo.toml b/Cargo.toml index af34f5fa9..3f1044736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,3 +47,4 @@ proc-macro2 = "1.0" prettyplease = "0.2.31" syn = "2.0" yaml-rust2 = "0.10.1" +ron = "0.10.1" \ No newline at end of file diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 355f22055..6ac9f54e2 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -20,7 +20,9 @@ use num_traits::{AsPrimitive, PrimInt, Unsigned}; use proc_macro2::TokenStream; use quote::quote; #[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize, Serializer, ser::SerializeMap}; +#[cfg(feature = "serde")] +use std::collections::VecDeque; use crate::{LexError, Lexeme, LexerTypes, NonStreamingLexer, cpctplus}; @@ -41,6 +43,88 @@ pub enum Node, StorageT> { }, } +#[cfg(feature = "serde")] +pub struct SerializableNode<'b, 'a, LexemeT: Lexeme, StorageT> { + grm: &'b YaccGrammar, + src: &'a str, + node: Node, +} + +#[cfg(feature = "serde")] +impl<'b, 'a, StorageT, LexemeT: Lexeme> SerializableNode<'b, 'a, LexemeT, StorageT> { + pub fn new( + src: &'a str, + grm: &'b YaccGrammar, + node: Node, + ) -> SerializableNode<'b, 'a, LexemeT, StorageT> { + SerializableNode { grm, src, node } + } +} +#[cfg(feature = "serde")] +impl<'b, 'a, StorageT, LexemeT> Serialize for SerializableNode<'b, 'a, LexemeT, StorageT> +where + LexemeT: Lexeme, + usize: AsPrimitive, + StorageT: 'static + PrimInt + Unsigned, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + #[derive(Serialize)] + enum NamedNode<'a> { + Term(&'a str, &'a str), + Nonterm(&'a str, VecDeque>), + } + + fn insert_inner<'a>( + top: &mut VecDeque>, + node: NamedNode<'a>, + limit: usize, + acc: usize, + ) { + match top.back_mut() { + Some(NamedNode::Nonterm(_, entries)) => { + if acc < limit { + insert_inner(entries, node, limit, acc + 1) + } else { + top.push_back(node) + } + } + _ => top.push_back(node), + } + } + let mut st = vec![(0, &self.node)]; + let mut map = serializer.serialize_map(Some(2))?; + map.serialize_entry("src", self.src)?; + let mut out: VecDeque = VecDeque::new(); + while let Some((limit, e)) = st.pop() { + match e { + Node::Term { lexeme } => { + let tidx = TIdx(lexeme.tok_id()); + let tn = self.grm.token_name(tidx).unwrap(); + let lt = &self.src[lexeme.span().start()..lexeme.span().end()]; + insert_inner(&mut out, NamedNode::Term(tn, lt), limit, 0); + } + Node::Nonterm { ridx, nodes } => { + let rule_name = self.grm.rule_name_str(*ridx); + insert_inner( + &mut out, + NamedNode::Nonterm(rule_name, VecDeque::new()), + limit, + 0, + ); + for x in nodes.iter().rev() { + st.push((limit + 1, x)); + } + } + } + } + map.serialize_entry("ast", &out)?; + map.end() + } +} + impl, StorageT: 'static + PrimInt + Unsigned> Node where usize: AsPrimitive, diff --git a/nimbleparse/Cargo.toml b/nimbleparse/Cargo.toml index 5b55ee6a9..ab66f9ebc 100644 --- a/nimbleparse/Cargo.toml +++ b/nimbleparse/Cargo.toml @@ -15,9 +15,10 @@ name = "nimbleparse" [dependencies] cfgrammar = { path="../cfgrammar", version="0.13" } lrlex = { path="../lrlex", version="0.13" } -lrpar = { path="../lrpar", version="0.13" } +lrpar = { path="../lrpar", version="0.13", features = ["serde"] } lrtable = { path="../lrtable", version="0.13" } getopts.workspace = true num-traits.workspace = true glob.workspace = true +ron.workspace = true diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index 390928107..727ad16e2 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -9,7 +9,7 @@ use lrlex::{DefaultLexerTypes, LRLexError, LRNonStreamingLexerDef, LexerDef}; use lrpar::{ LexerTypes, diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter}, - parser::{RTParserBuilder, RecoveryKind}, + parser::{RTParserBuilder, RecoveryKind, SerializableNode}, }; use lrtable::{Minimiser, StateTable, from_yacc}; use num_traits::AsPrimitive; @@ -37,7 +37,7 @@ fn usage(prog: &str, msg: &str) -> ! { eprintln!("{}", msg); } eprintln!( - "Usage: {} [-r ] [-y ] [-dq] ...", + "Usage: {} [-r ] [-y ] [-dqt] ...", leaf ); process::exit(1); @@ -105,6 +105,7 @@ fn main() { .optflag("h", "help", "") .optflag("q", "quiet", "Don't print warnings such as conflicts") .optflag("d", "dump-state-graph", "Print the parser state graph") + .optflag("t", "test-file-output", "Output a test file") .optopt( "r", "recoverer", @@ -380,6 +381,7 @@ fn main() { recoverykind, }; + let ron_output = matches.opt_present("t"); if matches.free.len() == 3 { let input_path = PathBuf::from(&matches.free[2]); // If there is only one input file we want to print the generic parse tree. @@ -391,11 +393,11 @@ fn main() { } else { read_file(&matches.free[2]) }; - if let Err(e) = parser_build_ctxt.parse_string(input_path, input) { + if let Err(e) = parser_build_ctxt.parse_string(input_path, input, ron_output) { eprintln!("{}", e); process::exit(1); } - } else if let Err(e) = parser_build_ctxt.parse_many(&matches.free[2..]) { + } else if let Err(e) = parser_build_ctxt.parse_many(&matches.free[2..], ron_output) { eprintln!("{}", e); process::exit(1); } @@ -476,12 +478,30 @@ where usize: AsPrimitive, LexerTypesT::StorageT: TryFrom, { - fn parse_string(self, input_path: PathBuf, input_src: String) -> Result<(), NimbleparseError> { + fn parse_string( + self, + input_path: PathBuf, + input_src: String, + ron_output: bool, + ) -> Result<(), NimbleparseError> { let lexer = self.lexerdef.lexer(&input_src); let pb = RTParserBuilder::new(&self.grm, &self.stable).recoverer(self.recoverykind); let (pt, errs) = pb.parse_generictree(&lexer); match pt { - Some(pt) => println!("{}", pt.pp(&self.grm, &input_src)), + Some(pt) => { + println!( + "{}", + if ron_output { + ron::ser::to_string_pretty( + &SerializableNode::new(&input_src, &self.grm, pt), + ron::ser::PrettyConfig::default(), + ) + .unwrap() + } else { + pt.pp(&self.grm, &input_src) + } + ); + } None => println!("Unable to repair input sufficiently to produce parse tree.\n"), } if !errs.is_empty() { @@ -496,7 +516,7 @@ where Ok(()) } - fn parse_many(self, input_paths: &[String]) -> Result<(), NimbleparseError> { + fn parse_many(self, input_paths: &[String], _ron_output: bool) -> Result<(), NimbleparseError> { let input_paths = if input_paths.is_empty() { // If given no input paths, try to find some with `test_files` in the header. if let Some(HeaderValue(_, val)) = self.header.get("test_files") {