diff --git a/Cargo.toml b/Cargo.toml index 219eb7126..072d15b47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members=[ "lrpar/examples/clone_param", "lrtable", "nimbleparse", + "grammar_testing", ] resolver = "2" @@ -48,3 +49,4 @@ proc-macro2 = "1.0" prettyplease = "0.2.31" syn = "2.0" yaml-rust2 = "0.10.1" +ron = "0.10" diff --git a/doc/src/SUMMARY.md b/doc/src/SUMMARY.md index 8e73b3305..76f5f960c 100644 --- a/doc/src/SUMMARY.md +++ b/doc/src/SUMMARY.md @@ -14,6 +14,7 @@ - [grmtools parsing idioms](parsing_idioms.md) - [Error recovery](errorrecovery.md) - [An AST evaluator](ast_example.md) + - [Testing](testing.md) - [Rust Editions](editions.md) - [The individual libraries and tools](libsandtools.md) - [lrpar](lrpar.md) @@ -21,5 +22,6 @@ - [nimbleparse](nimbleparse.md) - [cfgrammar](cfgrammar.md) - [lrtable](lrtable.md) + - [grammar_testing](grammar_testing.md) - [third party](thirdparty.md) - [Other Rust parsing tools](othertools.md) diff --git a/doc/src/grammar_testing.md b/doc/src/grammar_testing.md new file mode 100644 index 000000000..ca40fb455 --- /dev/null +++ b/doc/src/grammar_testing.md @@ -0,0 +1,6 @@ +# `grammar_testing` + +`grammar_testing` ([crate](https://crates.io/crates/grammar_testing); +[source](https://github.com/softdevteam/grmtools/tree/master/grammar_testing)) is a library +that facilitates testing of grammars. It's primary purpose is to define an standardize serialization formats +so tests can be performed from tools like `nimbleparse` as well as user specified build scripts. \ No newline at end of file diff --git a/doc/src/testing.md b/doc/src/testing.md new file mode 100644 index 000000000..c13257e5e --- /dev/null +++ b/doc/src/testing.md @@ -0,0 +1,165 @@ +# Testing (experimental) + +grmtools testing facilities are currently experimental, under development, and subject to change. +Not all features described in this are section currently implemented (in fact most are incomplete). +Currently this text primarily serves as a design document for testing features and feedback is welcome. + +## Testing raw input + +### Example lexer +🚀 +```lex +%% +[a-z] "character" +\" '"' +[ \n\t] ; +``` + + +### Example grammar +🚀 +```yacc +%grmtools{ + yacckind: Original(NoAction), + test_files: "input*.txt" +} + +%% +start: "character"; +``` + + +### Input text +🚀 +Contents specified via `test_files`: +```text +a +``` + +## Specifying multiple globs +🚧 +It would be nice if you could specify multiple globs: +``` +test_files: ["*.txt", "*.grmtest"], +``` + +## Testing with serialized formats + +🚧 +When specifying `test_files` the `grmtest` extension is treated specially. + +### Example Grammar for serializated test data. +🚧 +By specifying the `grmtest` extension for the `test_files` value you enable serialized +test data including output expectations for abstract syntax trees and error text. + +```yacc + test_files "*.grmtest" +``` + + +### Serialized test input +🚧 +The `grmtest` file deserializes to a struct with many optional fields. +It is a [`ron`](https://crates.io/crates/ron) using the [Implicit Some](https://github.com/ron-rs/ron/blob/master/docs/extensions.md#implicit_some) extension. +This allows you to omit most of the values, but has the downside that typos may fall back to default values. + +#### Fields +🚧 +* `input: String` field is required, and specifies the input to the parser. +* `pass: bool` defaults to None, only Some if explicitly specified. +* `ast: Option` if present specifies the expected output see [Serializing with nimbleparse] for generating ast output. +* `errors: Option>` if present specifies the expected error text, without error recovery. + +#### Methods +* `should_pass(&self)` Returns the value of the `pass` field when it is `Some` otherwise returns whether the `errors` field is `None` or it's inner value is empty. + +#### Example .grmtest file +🚧 +```grmtest +( + input: "a", + ast: ("start", [ + ("character", "a"), + ]), +) +``` + + +### Serializing with nimbleparse + +#### ast output +* 🚀 Using the `-s` option to nimbleparse to produce a serialized ast. +* 🚧 `-s ast` + +```console +$ echo "a" | nimbleparse -s testing.l testing.y - +("start", [ + ("character", "a"), +]) +``` + + +#### grmtest output +🚧 `-s grmtest` + +```console +$ echo "a" | nimbleparse -s grmtest testing.l testing.y - +( + input: "a\n", + ast: ("start", [ + ("character", "a"), + ]) +) +``` + + +## Testing multiple inputs +🚧 +Specify a plural extension `test_files: "*.grmtests` to allow a vector of test inputs. + +```grmtests +[ + (input: "a"), + (input: "b"), + (input: "a", ast: ("start", [("character", "a")])), + (input: "abc", pass: false), +] +``` + + +## Escaping and raw strings + +Because grammars often contain quotation marks, it is convenient to support rusts `raw string` format. +By default the serializer outputs raw strings. + +🚀 +```grmtest +$ echo "\"" | ./target/debug/nimbleparse -s testing.{l,y} - +("start", [ + (r#"""#, r#"""#), +]) +``` + +🚧 +``` +[ + // Raw strings, and escaped characters are usable interchangably. + ( input: r#"""#, ast:("start", [(r#"""#, "\"")])), + ( input: "\"", ast:("start", [("\"", r#"""#)])), +] +``` + + +## Running tests +🚀 + +If using build.rs/`CTParserBuilder` just cargo build. +If no arguments are specified `nimbleparse testing.l testing.y` then if `test_files` is specified it will be used by default. + +## Manual Lexer limitations +🚧 + +Currently grmtools testing facilities are limited to being used with the combination of `lrlex`, +and `lrpar`. There currently isn't any mechanism in place for testing when using a manual lexer. +Doing so requires using unstable/hidden/undocumented aspects of the API. \ No newline at end of file diff --git a/grammar_testing/Cargo.toml b/grammar_testing/Cargo.toml new file mode 100644 index 000000000..516f8dcc8 --- /dev/null +++ b/grammar_testing/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "grammar_testing" +version = "0.0.1" +edition = "2024" + +[dependencies] +ron.workspace = true +serde = { workspace = true, features = ["derive"] } \ No newline at end of file diff --git a/grammar_testing/src/lib.rs b/grammar_testing/src/lib.rs new file mode 100644 index 000000000..7e48f7cac --- /dev/null +++ b/grammar_testing/src/lib.rs @@ -0,0 +1,228 @@ +use ron::Options; +use ron::extensions::Extensions; +use ron::ser::PrettyConfig; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[serde(untagged)] +pub enum ASTRepr { + Term(String, String), + Nonterm(String, Vec), +} + +impl ASTRepr { + pub fn to_ron_string(&self) -> Result { + let pretty_config = PrettyConfig::new() + .escape_strings(false) + .extensions(Extensions::IMPLICIT_SOME); + ron::ser::to_string_pretty(self, pretty_config) + } + + pub fn from_ron_str>(s: S) -> Result { + let opts = Options::default(); + Ok(opts.from_str(s.as_ref())?) + } +} + +#[derive(Deserialize, Serialize, PartialEq, Debug, Clone)] +#[serde(deny_unknown_fields)] +pub enum Test { + TestError { + input: String, + errors: Option>, + }, + TestSuccess { + input: String, + ast: Option, + }, +} + +impl Test { + pub fn to_ron_string(&self) -> Result { + let pretty_config = PrettyConfig::new() + .escape_strings(false) + .extensions(Extensions::IMPLICIT_SOME); + ron::ser::to_string_pretty(self, pretty_config) + } + + pub fn from_ron_str>(s: S) -> Result { + let opts = Options::default(); + Ok(opts.from_str(s.as_ref())?) + } + + /// Returns the value of the `pass` field when it is `Some`, otherwise + /// returns whether the `errors` field is `None` or it's inner value is empty. + pub fn should_pass(&self) -> bool { + matches!(self, Test::TestSuccess{..}) + } +} + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +#[serde(transparent)] +pub struct Tests(Vec); + +impl Tests { + pub fn to_ron_string(&self) -> Result { + let pretty_config = PrettyConfig::new() + .escape_strings(false) + .extensions(Extensions::IMPLICIT_SOME); + ron::ser::to_string_pretty(self, pretty_config) + } + + pub fn from_ron_str>(s: S) -> Result { + let opts = Options::default(); + Ok(opts.from_str(s.as_ref())?) + } +} + +impl std::ops::Deref for Tests { + type Target = Vec; + + fn deref(&self) -> &Vec { + &self.0 + } +} + +impl IntoIterator for Tests { + type Item = Test; + type IntoIter = std::vec::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[cfg(test)] +mod tests { + use super::{ASTRepr, Test, Tests}; + + #[test] + fn grmtest_input_only() { + let input = r#" + #![enable(implicit_some)] + TestSuccess(input: "a") + "#; + let x: Test = ron::from_str(&input).unwrap(); + assert_eq!( + x, + Test::TestSuccess { + input: "a".to_string(), + ast: None, + } + ); + } + + #[test] + fn grmtest_input_ast() { + let input = r#" + #![enable(implicit_some)] + TestSuccess(input: "a", ast: ("start", [("character", "a")])) + "#; + let x: Test = Test::from_ron_str(&input).unwrap(); + assert_eq!( + x, + Test::TestSuccess { + input: "a".to_string(), + ast: Some(ASTRepr::Nonterm( + "start".to_string(), + vec![ASTRepr::Term("character".to_string(), "a".to_string())] + )) + } + ); + } + + #[test] + fn grmtest_many() { + let input = r#" + #![enable(implicit_some)] + [ + // First 2 are equivalent + TestSuccess(input: "a"), + TestSuccess(input: "a", ast: None), + + TestSuccess(input: "b"), + // The last two are equivalent + TestSuccess(input: "a", ast: ("start", [("character", "a")])), + TestSuccess(input: "a", ast: Some(("start", [("character", "a")]))), + ] + "#; + let xs = Tests::from_ron_str(&input).unwrap(); + for x in &*xs { + eprintln!("{:?}", x); + assert!(x.should_pass()); + } + let first = Test::TestSuccess { + input: "a".to_string(), + ast: None, + }; + let last = Test::TestSuccess { + input: "a".to_string(), + ast: Some(ASTRepr::Nonterm( + "start".to_string(), + vec![ASTRepr::Term("character".to_string(), "a".to_string())], + )), + }; + assert_eq!( + last.to_ron_string().unwrap(), + r#"#![enable(implicit_some)] +TestSuccess( + input: "a", + ast: ("start", [ + ("character", "a"), + ]), +)"# + ); + assert_eq!( + xs, + Tests(vec![ + first.clone(), + first, + Test::TestSuccess { + input: "b".to_string(), + ast: None, + }, + last.clone(), + last + ]) + ); + } + + #[test] + fn grmtest_many_fails() { + let input = r#" + #![enable(implicit_some)] + [ + TestError(input: "abc"), + TestError(input: "abc", errors: ["some error"]), + ] + "#; + let xs = Tests::from_ron_str(&input).unwrap(); + for x in &*xs { + assert!(!x.should_pass()) + } + assert_eq!( + xs, + Tests(vec![ + Test::TestError { + input: "abc".to_string(), + errors: None, + }, + Test::TestError { + input: "abc".to_string(), + errors: Some(vec!["some error".to_string()]), + } + ]) + ); + } + + #[test] + fn astrepr_esc_raw_string() { + let x = ASTRepr::from_ron_str(r##"("start", [("\"", r#"""#)])"##).unwrap(); + assert_eq!( + x, + ASTRepr::Nonterm( + "start".to_string(), + vec![ASTRepr::Term(r#"""#.to_string(), "\"".to_string())] + ) + ); + } +} diff --git a/lrpar/src/lib/mod.rs b/lrpar/src/lib/mod.rs index ad99982c3..c9e107641 100644 --- a/lrpar/src/lib/mod.rs +++ b/lrpar/src/lib/mod.rs @@ -202,6 +202,7 @@ mod dijkstra; pub mod lex_api; #[doc(hidden)] pub mod parser; +// Support module for the lrpar crate `#[test]` utilities. #[cfg(test)] pub mod test_utils; diff --git a/nimbleparse/Cargo.toml b/nimbleparse/Cargo.toml index 5b55ee6a9..8f49bd6aa 100644 --- a/nimbleparse/Cargo.toml +++ b/nimbleparse/Cargo.toml @@ -17,7 +17,9 @@ cfgrammar = { path="../cfgrammar", version="0.13" } lrlex = { path="../lrlex", version="0.13" } lrpar = { path="../lrpar", version="0.13" } lrtable = { path="../lrtable", version="0.13" } +grammar_testing = { path="../grammar_testing", version = "0.0.1" } getopts.workspace = true num-traits.workspace = true glob.workspace = true +ron.workspace = true diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index 2bb953fce..abc1047fb 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -5,6 +5,7 @@ use cfgrammar::{ yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind, ast::ASTWithValidityInfo}, }; use getopts::Options; +use grammar_testing::ASTRepr; use lrlex::{DefaultLexerTypes, LRLexError, LRNonStreamingLexerDef, LexerDef}; use lrpar::{ Lexeme, LexerTypes, @@ -14,6 +15,7 @@ use lrpar::{ use lrtable::{Minimiser, StateTable, from_yacc}; use num_traits::ToPrimitive as _; use num_traits::{AsPrimitive, PrimInt, Unsigned}; +use ron; use std::{ env, error::Error, @@ -81,7 +83,7 @@ fn usage(prog: &str, msg: &str) -> ! { eprintln!("{}", msg); } eprintln!( - "Usage: {} [-r ] [-y ] [-dq] ...", + "Usage: {} [-r ] [-y ] [-dqs] ...", leaf ); process::exit(1); @@ -149,6 +151,11 @@ fn main() { .optflag("h", "help", "") .optflag("q", "quiet", "Don't print warnings such as conflicts") .optflag("d", "dump-state-graph", "Print the parser state graph") + .optflag( + "s", + "serialized-test-output", + "Output in lrpartest serialized format", + ) .optopt( "r", "recoverer", @@ -424,6 +431,7 @@ fn main() { recoverykind, }; + let ron_output = matches.opt_present("s"); if matches.free.len() == 3 { let input_path = PathBuf::from(&matches.free[2]); // If there is only one input file we want to print the generic parse tree. @@ -435,7 +443,7 @@ fn main() { } else { read_file(&matches.free[2]) }; - if let Err(e) = parser_build_ctxt.parse_string(input_path, input) { + if let Err(e) = parser_build_ctxt.parse_string(input_path, input, ron_output) { eprintln!("{}", e); process::exit(1); } @@ -469,6 +477,7 @@ enum NimbleparseError { Glob(glob::GlobError), Pattern(glob::PatternError), Other(Box), + Serialization(ron::Error), } impl From for NimbleparseError { @@ -489,6 +498,11 @@ impl From for NimbleparseError { } } +impl From for NimbleparseError { + fn from(it: ron::Error) -> Self { + NimbleparseError::Serialization(it) + } +} impl Error for NimbleparseError {} impl fmt::Display for NimbleparseError { @@ -510,6 +524,9 @@ impl fmt::Display for NimbleparseError { Self::Other(e) => { write!(f, "{}", e) } + Self::Serialization(e) => { + write!(f, "{}", e) + } } } } @@ -520,16 +537,44 @@ where usize: AsPrimitive, LexerTypesT::StorageT: TryFrom, { - fn parse_string(self, input_path: PathBuf, input_src: String) -> Result<(), NimbleparseError> { + fn parse_string( + self, + input_path: PathBuf, + input_src: String, + ron_output: bool, + ) -> Result<(), NimbleparseError> { let lexer = self.lexerdef.lexer(&input_src); let pb = RTParserBuilder::new(&self.grm, &self.stable).recoverer(self.recoverykind); - let (pt, errs) = pb.parse_map(&lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| { - Node::Nonterm { ridx, nodes } - }); - match pt { - Some(pt) => println!("{}", pt.pp(&self.grm, &input_src)), - None => println!("Unable to repair input sufficiently to produce parse tree.\n"), - } + let errs = if ron_output { + let (parse_map, errs) = pb.parse_map( + &lexer, + &|lexeme: LexerTypesT::LexemeT| { + let tidx = TIdx(lexeme.tok_id()); + let tn = self.grm.token_name(tidx).unwrap().to_string(); + let lt = input_src[lexeme.span().start()..lexeme.span().end()].to_string(); + ASTRepr::Term(tn, lt) + }, + &|ridx, nodes| { + let rule_name = &self.grm.rule_name_str(ridx); + ASTRepr::Nonterm(rule_name.to_string(), nodes) + }, + ); + if let Some(parse_map) = parse_map { + let s = parse_map.to_ron_string()?; + println!("{s}"); + } + errs + } else { + let (pt, errs) = + pb.parse_map(&lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| { + Node::Nonterm { ridx, nodes } + }); + match pt { + Some(pt) => println!("{}", pt.pp(&self.grm, &input_src)), + None => println!("Unable to repair input sufficiently to produce parse tree.\n"), + } + errs + }; if !errs.is_empty() { return Err(NimbleparseError::Source { src_path: input_path,