Skip to content

Commit fde9a6d

Browse files
committed
Add source snippets to CTLexerBuilder error types
1 parent 92f97e9 commit fde9a6d

File tree

1 file changed

+71
-37
lines changed

1 file changed

+71
-37
lines changed

lrlex/src/lib/ctbuilder.rs

Lines changed: 71 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ use std::{
55
collections::{HashMap, HashSet},
66
env::{current_dir, var},
77
error::Error,
8-
fmt::{Debug, Display, Write as _},
8+
fmt::{self, Debug, Display, Write as _},
99
fs::{self, create_dir_all, read_to_string, File},
1010
hash::Hash,
1111
io::Write,
1212
path::{Path, PathBuf},
13-
str::FromStr,
1413
sync::Mutex,
1514
};
1615

@@ -21,13 +20,14 @@ use cfgrammar::{
2120
Setting, Value,
2221
},
2322
markmap::MergeBehavior,
24-
newlinecache::NewlineCache,
2523
span::Location,
26-
Spanned,
2724
};
2825
use glob::glob;
2926
use lazy_static::lazy_static;
30-
use lrpar::{CTParserBuilder, LexerTypes};
27+
use lrpar::{
28+
diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
29+
CTParserBuilder, LexerTypes,
30+
};
3131
use num_traits::{AsPrimitive, PrimInt, Unsigned};
3232
use proc_macro2::TokenStream;
3333
use quote::{format_ident, quote, ToTokens, TokenStreamExt};
@@ -37,6 +37,8 @@ use crate::{DefaultLexerTypes, LRNonStreamingLexer, LRNonStreamingLexerDef, LexF
3737

3838
const RUST_FILE_EXT: &str = "rs";
3939

40+
const ERROR: &str = "[Error]";
41+
4042
lazy_static! {
4143
static ref RE_TOKEN_ID: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap();
4244
static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
@@ -199,6 +201,22 @@ impl ToTokens for QuoteToString<'_> {
199201
}
200202
}
201203

204+
/// A string which uses `Display` for it's `Debug` impl.
205+
struct ErrorString(String);
206+
impl fmt::Display for ErrorString {
207+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
208+
let ErrorString(s) = self;
209+
write!(f, "{}", s)
210+
}
211+
}
212+
impl fmt::Debug for ErrorString {
213+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
214+
let ErrorString(s) = self;
215+
write!(f, "{}", s)
216+
}
217+
}
218+
impl Error for ErrorString {}
219+
202220
/// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated
203221
/// lexer.
204222
pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
@@ -447,14 +465,21 @@ where
447465
}
448466
let lex_src = read_to_string(lexerp)
449467
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
468+
let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, lexerp);
450469
let mut header = self.header;
451470
let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
452471
.parse()
453472
.map_err(|es| {
454-
es.iter()
455-
.map(|e| e.to_string())
456-
.collect::<Vec<_>>()
457-
.join("\n")
473+
let mut out = String::new();
474+
out.push_str(&format!(
475+
"\n{ERROR}{}\n",
476+
lex_diag.file_location_msg(" parsing the `%grmtools` section", None)
477+
));
478+
for e in es {
479+
out.push_str(&indent(&lex_diag.format_error(e).to_string(), " "));
480+
out.push('\n');
481+
}
482+
ErrorString(out)
458483
})?;
459484
header.merge_from(parsed_header)?;
460485
header.mark_used(&"lexerkind".to_string());
@@ -468,38 +493,33 @@ where
468493
}
469494
}
470495
};
471-
let line_cache = NewlineCache::from_str(&lex_src).unwrap();
472496
#[cfg(test)]
473497
if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
474498
inspect_lexerkind_cb(lexerkind)?
475499
}
476-
let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) = match lexerkind
477-
{
478-
LexerKind::LRNonStreamingLexer => {
479-
let lex_flags = LexFlags::try_from(&mut header)?;
480-
let lexerdef =
481-
LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(&lex_src, lex_flags)
482-
.map_err(|errs| {
483-
errs.iter()
484-
.map(|e| {
485-
if let Some((line, column)) = line_cache
486-
.byte_to_line_num_and_col_num(
487-
&lex_src,
488-
e.spans().first().unwrap().start(),
489-
)
490-
{
491-
format!("{} at line {line} column {column}", e)
492-
} else {
493-
format!("{}", e)
494-
}
495-
})
496-
.collect::<Vec<_>>()
497-
.join("\n")
498-
})?;
499-
let lex_flags = lexerdef.lex_flags().cloned();
500-
(lexerdef, lex_flags.unwrap())
501-
}
502-
};
500+
let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) =
501+
match lexerkind {
502+
LexerKind::LRNonStreamingLexer => {
503+
let lex_flags = LexFlags::try_from(&mut header)?;
504+
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
505+
&lex_src, lex_flags,
506+
)
507+
.map_err(|errs| {
508+
let mut out = String::new();
509+
out.push_str(&format!(
510+
"\n{ERROR}{}\n",
511+
lex_diag.file_location_msg("", None)
512+
));
513+
for e in errs {
514+
out.push_str(&indent(&lex_diag.format_error(e).to_string(), " "));
515+
out.push('\n');
516+
}
517+
ErrorString(out)
518+
})?;
519+
let lex_flags = lexerdef.lex_flags().cloned();
520+
(lexerdef, lex_flags.unwrap())
521+
}
522+
};
503523

504524
if let Some(ref lrcfg) = self.lrpar_config {
505525
let mut lexerdef = lexerdef.clone();
@@ -1138,6 +1158,20 @@ pub fn ct_token_map<StorageT: Display>(
11381158
Ok(())
11391159
}
11401160

1161+
/// Indents a multi-line string and trims any trailing newline.
1162+
/// This currently assumes that indentation on blank lines does not matter.
1163+
///
1164+
/// The algorithm used by this function is:
1165+
/// 1. Prefix `s` with the indentation, indenting the first line.
1166+
/// 2. Trim any trailing newlines.
1167+
/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
1168+
///
1169+
/// It is plausible that we should a step 4, but currently do not:
1170+
/// 4. Replace all `\n{indent}\n` with `\n\n`
1171+
fn indent(s: &str, indent: &str) -> String {
1172+
format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
1173+
}
1174+
11411175
#[cfg(test)]
11421176
mod test {
11431177
use std::fs::File;

0 commit comments

Comments
 (0)