From e1f967625b2d7d266012b86d92541ff66305e6a9 Mon Sep 17 00:00:00 2001 From: matt rice Date: Sun, 19 Oct 2025 01:06:12 -0700 Subject: [PATCH 1/2] Generalize lifetime of `CTLexerBuilder` lrpar_config callback --- lrlex/src/lib/ctbuilder.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 8d85ef6fd..9b3060b33 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -226,7 +226,8 @@ where LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens, usize: num_traits::AsPrimitive, { - lrpar_config: Option) -> CTParserBuilder>>, + lrpar_config: + Option) -> CTParserBuilder + 'a>>, lexer_path: Option, output_path: Option, lexerkind: Option, @@ -313,9 +314,9 @@ where /// .lexer_in_src_dir("calc.l")? /// .build()?; /// ``` - pub fn lrpar_config(mut self, config_func: F) -> Self + pub fn lrpar_config(mut self, config_func: F) -> Self where - F: 'static + Fn(CTParserBuilder) -> CTParserBuilder, + F: Fn(CTParserBuilder) -> CTParserBuilder, { self.lrpar_config = Some(Box::new(config_func)); self From 00281263c43132b32491230f6bf6b2dc749a531d Mon Sep 17 00:00:00 2001 From: matt rice Date: Sun, 19 Oct 2025 01:06:12 -0700 Subject: [PATCH 2/2] Add extra_files yaml key to cttests, test %grmtools{test_files} Because `%grmtools{test_files}` needs *both* the lexer and parser to work. We migrate this code from using the separate `CTLexerBuilder` and `CTParserBuilder` with `rule_ids_map` to the combined method using `lrpar_config`. That causes `%grmtools{test_files}` to be invoked, also add an `extra_files` keys to the `.test` file yaml. These emit the files to `$OUT_DIR` where the `test_files` globs are relative to. --- lrpar/cttests/src/calc_input.test | 49 ++++++++ lrpar/cttests/src/cgen_helper.rs | 110 ++++++++++-------- lrpar/cttests/src/ctfails/calc_bad_input.test | 47 ++++++++ lrpar/cttests/src/lib.rs | 13 +++ 4 files changed, 168 insertions(+), 51 deletions(-) create mode 100644 lrpar/cttests/src/calc_input.test create mode 100644 lrpar/cttests/src/ctfails/calc_bad_input.test diff --git a/lrpar/cttests/src/calc_input.test b/lrpar/cttests/src/calc_input.test new file mode 100644 index 000000000..c731508d1 --- /dev/null +++ b/lrpar/cttests/src/calc_input.test @@ -0,0 +1,49 @@ +name: Test with calculator input from %grmtools{test_files} +grammar: | + %grmtools { + yacckind: Original(YaccOriginalActionKind::UserAction), + recoverer: RecoveryKind::None, + test_files: "*.calc_input" + } + %start Expr + %actiontype Result + %avoid_insert 'INT' + %% + Expr: Expr '+' Term { Ok($1? + $3?) } + | Term { $1 } + ; + + Term: Term '*' Factor { Ok($1? * $3?) } + | Factor { $1 } + ; + + Factor: '(' Expr ')' { $2 } + | 'INT' { + let l = $1.map_err(|_| ())?; + match $lexer.span_str(l.span()).parse::() { + Ok(v) => Ok(v), + Err(_) => { + let ((_, col), _) = $lexer.line_col(l.span()); + eprintln!("Error at column {}: '{}' cannot be represented as a u64", + col, + $lexer.span_str(l.span())); + Err(()) + } + } + } + ; + +lexer: | + %% + [0-9]+ "INT" + \+ "+" + \* "*" + \( "(" + \) ")" + [\t\n ]+ ; +extra_files: + input1.calc_input: | + 1 + 2 * 3 + input2.calc_input: | + (1 + 2) * 3 + diff --git a/lrpar/cttests/src/cgen_helper.rs b/lrpar/cttests/src/cgen_helper.rs index 8a6f8ca7d..c4e4e5140 100644 --- a/lrpar/cttests/src/cgen_helper.rs +++ b/lrpar/cttests/src/cgen_helper.rs @@ -1,6 +1,6 @@ use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind}; -use lrlex::{CTLexerBuilder, DefaultLexerTypes}; -use lrpar::{CTParserBuilder, RecoveryKind}; +use lrlex::CTLexerBuilder; +use lrpar::RecoveryKind; use std::{ env, fs, path::{Path, PathBuf}, @@ -10,11 +10,10 @@ use yaml_rust2::YamlLoader; #[allow(dead_code)] pub(crate) fn run_test_path>(path: P) -> Result<(), Box> { let out_dir = env::var("OUT_DIR").unwrap(); - let path = path.as_ref(); - if path.is_file() { - println!("cargo::rerun-if-changed={}", path.display()); + if path.as_ref().is_file() { + println!("cargo::rerun-if-changed={}", path.as_ref().display()); // Parse test file - let s = fs::read_to_string(path).unwrap(); + let s = fs::read_to_string(path.as_ref()).unwrap(); let docs = YamlLoader::load_from_str(&s).unwrap(); let grm = &docs[0]["grammar"].as_str().unwrap(); let lex = &docs[0]["lexer"].as_str().unwrap(); @@ -37,26 +36,6 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box Some(RecoveryKind::None), _ => None, }; - let (negative_yacc_flags, positive_yacc_flags) = &docs[0]["yacc_flags"] - .as_vec() - .map(|flags_vec| { - flags_vec - .iter() - .partition(|flag| flag.as_str().unwrap().starts_with('!')) - }) - .unwrap_or_else(|| (Vec::new(), Vec::new())); - let positive_yacc_flags = positive_yacc_flags - .iter() - .map(|flag| flag.as_str().unwrap()) - .collect::>(); - let negative_yacc_flags = negative_yacc_flags - .iter() - .map(|flag| { - let flag = flag.as_str().unwrap(); - flag.strip_prefix('!').unwrap() - }) - .collect::>(); - let yacc_flags = (&positive_yacc_flags, &negative_yacc_flags); let (negative_lex_flags, positive_lex_flags) = &docs[0]["lex_flags"] .as_vec() .map(|flags_vec| { @@ -82,7 +61,7 @@ pub(crate) fn run_test_path>(path: P) -> Result<(), Box>(path: P) -> Result<(), Box>::new(); - if let Some(yacckind) = yacckind { - cp_build = cp_build.yacckind(yacckind); - } - if let Some(recoverer) = recoverer { - cp_build = cp_build.recoverer(recoverer) - } - cp_build = cp_build - .grammar_path(pg.to_str().unwrap()) - .output_path(&outp); - if let Some(flag) = check_flag(yacc_flags, "error_on_conflicts") { - cp_build = cp_build.error_on_conflicts(flag) - } - if let Some(flag) = check_flag(yacc_flags, "warnings_are_errors") { - cp_build = cp_build.warnings_are_errors(flag) + if let Some(extra_files) = docs[0]["extra_files"].as_hash() { + for (filename, contents) in extra_files.iter() { + let mut out_file = PathBuf::from(&out_dir); + let filename = filename.as_str().unwrap(); + out_file.push(filename); + let contents = contents.as_str().unwrap(); + fs::write(&out_file, contents).unwrap(); + } } - if let Some(flag) = check_flag(yacc_flags, "show_warnings") { - cp_build = cp_build.show_warnings(flag) - }; - let cp = cp_build.build()?; + // Build parser and lexer let mut outl = PathBuf::from(&out_dir); outl.push(format!("{}.l.rs", base)); outl.set_extension("rs"); let mut cl_build = CTLexerBuilder::new() - .rule_ids_map(cp.token_map()) + .lrpar_config(|mut cp_build| { + let mut outp = PathBuf::from(&out_dir); + outp.push(format!("{}.y.rs", base)); + outp.set_extension("rs"); + let (negative_yacc_flags, positive_yacc_flags) = &docs[0]["yacc_flags"] + .as_vec() + .map(|flags_vec| { + flags_vec + .iter() + .partition(|flag| flag.as_str().unwrap().starts_with('!')) + }) + .unwrap_or_else(|| (Vec::new(), Vec::new())); + let positive_yacc_flags = positive_yacc_flags + .iter() + .map(|flag| flag.as_str().unwrap()) + .collect::>(); + let negative_yacc_flags = negative_yacc_flags + .iter() + .map(|flag| { + let flag = flag.as_str().unwrap(); + flag.strip_prefix('!').unwrap() + }) + .collect::>(); + let yacc_flags = (&positive_yacc_flags, &negative_yacc_flags); + if let Some(yacckind) = yacckind { + cp_build = cp_build.yacckind(yacckind); + } + if let Some(recoverer) = recoverer { + cp_build = cp_build.recoverer(recoverer) + } + cp_build = cp_build + .grammar_path(pg.to_str().unwrap()) + .output_path(&outp); + if let Some(flag) = check_flag(yacc_flags, "error_on_conflicts") { + cp_build = cp_build.error_on_conflicts(flag) + } + if let Some(flag) = check_flag(yacc_flags, "warnings_are_errors") { + cp_build = cp_build.warnings_are_errors(flag) + } + if let Some(flag) = check_flag(yacc_flags, "show_warnings") { + cp_build = cp_build.show_warnings(flag) + }; + cp_build + }) .lexer_path(pl.to_str().unwrap()) .output_path(&outl); if let Some(flag) = check_flag(lex_flags, "allow_missing_terms_in_lexer") { diff --git a/lrpar/cttests/src/ctfails/calc_bad_input.test b/lrpar/cttests/src/ctfails/calc_bad_input.test new file mode 100644 index 000000000..31deceb22 --- /dev/null +++ b/lrpar/cttests/src/ctfails/calc_bad_input.test @@ -0,0 +1,47 @@ +name: Test calculator with malformed input from %grmtools{test_files} +grammar: | + %grmtools { + yacckind: Original(YaccOriginalActionKind::UserAction), + recoverer: RecoveryKind::None, + test_files: "*.bad_input" + } + %start Expr + %actiontype Result + %avoid_insert 'INT' + %% + Expr: Expr '+' Term { Ok($1? + $3?) } + | Term { $1 } + ; + + Term: Term '*' Factor { Ok($1? * $3?) } + | Factor { $1 } + ; + + Factor: '(' Expr ')' { $2 } + | 'INT' { + let l = $1.map_err(|_| ())?; + match $lexer.span_str(l.span()).parse::() { + Ok(v) => Ok(v), + Err(_) => { + let ((_, col), _) = $lexer.line_col(l.span()); + eprintln!("Error at column {}: '{}' cannot be represented as a u64", + col, + $lexer.span_str(l.span())); + Err(()) + } + } + } + ; + +lexer: | + %% + [0-9]+ "INT" + \+ "+" + \* "*" + \( "(" + \) ")" + [\t\n ]+ ; +extra_files: + input1.bad_input: | + (1 + 2 * 3 + diff --git a/lrpar/cttests/src/lib.rs b/lrpar/cttests/src/lib.rs index 4f98b8102..eb845b6c9 100644 --- a/lrpar/cttests/src/lib.rs +++ b/lrpar/cttests/src/lib.rs @@ -29,6 +29,9 @@ lrpar_mod!("calc_nodefault_yacckind.y"); lrlex_mod!("calc_unsafeaction.l"); lrpar_mod!("calc_unsafeaction.y"); +lrlex_mod!("calc_input.l"); +lrpar_mod!("calc_input.y"); + lrlex_mod!("expect.l"); lrpar_mod!("expect.y"); @@ -103,6 +106,16 @@ fn test_basic_actions() { } } +#[test] +fn test_calc_input() { + let lexerdef = calc_input_l::lexerdef(); + let lexer = lexerdef.lexer("2+3"); + match calc_input_y::parse(&lexer) { + (Some(Ok(5)), ref errs) if errs.is_empty() => (), + _ => unreachable!(), + } +} + #[test] fn test_nodefault_yacckind() { let lexerdef = calc_nodefault_yacckind_l::lexerdef();