From 7de0aaeea554abef6261076c15587af07ddf8c24 Mon Sep 17 00:00:00 2001 From: matt rice Date: Fri, 23 May 2025 08:54:15 -0700 Subject: [PATCH] Update the lrlex tool to read %grmtools section, emit diagnostics. --- lrlex/src/main.rs | 124 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 25 deletions(-) diff --git a/lrlex/src/main.rs b/lrlex/src/main.rs index e2ce6647b..eac52a6c9 100644 --- a/lrlex/src/main.rs +++ b/lrlex/src/main.rs @@ -1,16 +1,38 @@ use getopts::Options; use std::{ env, + error::Error, + fmt, fs::File, io::{stderr, stdin, Read, Write}, path::Path, process, - str::FromStr, }; -use cfgrammar::{newlinecache::NewlineCache, Spanned}; -use lrlex::{DefaultLexerTypes, LRNonStreamingLexerDef, LexerDef}; -use lrpar::{Lexeme, Lexer}; +use cfgrammar::header::{GrmtoolsSectionParser, HeaderValue}; +use lrlex::{DefaultLexerTypes, LRNonStreamingLexerDef, LexFlags, LexerDef, LexerKind}; +use lrpar::{ + diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter}, + Lexeme, Lexer, +}; + +const ERROR: &str = "[Error]"; + +/// A string which uses `Display` for it's `Debug` impl. +struct ErrorString(String); +impl fmt::Display for ErrorString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ErrorString(s) = self; + write!(f, "{}", s) + } +} +impl fmt::Debug for ErrorString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ErrorString(s) = self; + write!(f, "{}", s) + } +} +impl Error for ErrorString {} fn usage(prog: &str, msg: &str) { let path = Path::new(prog); @@ -42,43 +64,80 @@ fn read_file(path: &str) -> String { s } -fn main() { +fn main() -> Result<(), Box> { let args: Vec = env::args().collect(); let prog = args[0].clone(); let matches = match Options::new().optflag("h", "help", "").parse(&args[1..]) { Ok(m) => m, Err(f) => { usage(&prog, f.to_string().as_str()); - return; + return Ok(()); } }; if matches.opt_present("h") || matches.free.len() != 2 { usage(&prog, ""); - return; + return Ok(()); } let lex_l_path = &matches.free[0]; let lex_src = read_file(lex_l_path); - let lexerdef = LRNonStreamingLexerDef::>::from_str(&lex_src) - .unwrap_or_else(|errs| { - let nlcache = NewlineCache::from_str(&lex_src).unwrap(); - for e in errs { - if let Some((line, column)) = nlcache - .byte_to_line_num_and_col_num(&lex_src, e.spans().first().unwrap().start()) - { - writeln!( - stderr(), - "{}: {} at line {line} column {column}", - &lex_l_path, - &e - ) - .ok(); - } else { - writeln!(stderr(), "{}: {}", &lex_l_path, &e).ok(); - } + let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, Path::new(lex_l_path)); + let (mut header, _) = match GrmtoolsSectionParser::new(&lex_src, false).parse() { + Ok(x) => x, + Err(es) => { + eprintln!( + "\n{ERROR}{}", + lex_diag.file_location_msg(" parsing the `%grmtools` section", None) + ); + for e in es { + eprintln!( + "{}", + &indent(" ", &lex_diag.format_error(e).to_string()) + ); } process::exit(1); - }); + } + }; + header.mark_used(&"lexerkind".to_string()); + let lexerkind = if let Some(HeaderValue(_, lk_val)) = header.get("lexerkind") { + LexerKind::try_from(lk_val)? + } else { + LexerKind::LRNonStreamingLexer + }; + + let lexerdef = match lexerkind { + LexerKind::LRNonStreamingLexer => { + let lex_flags = LexFlags::try_from(&mut header)?; + let lexerdef = match LRNonStreamingLexerDef::>::new_with_options( + &lex_src, lex_flags, + ) { + Ok(x) => x, + Err(errs) => { + eprintln!("\n{ERROR}{}", lex_diag.file_location_msg("", None)); + for e in errs { + eprintln!( + "{}", + &indent(" ", &lex_diag.format_error(e).to_string()) + ); + } + process::exit(1); + } + }; + lexerdef + } + _ => { + return Err(ErrorString("Unrecognized lexer kind".to_string()))?; + } + }; + { + let unused_header_values = header.unused(); + if !unused_header_values.is_empty() { + return Err(ErrorString(format!( + "Unused header values: {}", + unused_header_values.join(", ") + )))?; + } + } let input = &read_file(&matches.free[1]); for r in lexerdef.lexer(input).iter() { match r { @@ -93,4 +152,19 @@ fn main() { } } } + Ok(()) +} + +/// Indents a multi-line string and trims any trailing newline. +/// This currently assumes that indentation on blank lines does not matter. +/// +/// The algorithm used by this function is: +/// 1. Prefix `s` with the indentation, indenting the first line. +/// 2. Trim any trailing newlines. +/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first. +/// +/// It is plausible that we should a step 4, but currently do not: +/// 4. Replace all `\n{indent}\n` with `\n\n` +fn indent(indent: &str, s: &str) -> String { + format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent)) }