Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 71 additions & 37 deletions lrlex/src/lib/ctbuilder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ use std::{
collections::{HashMap, HashSet},
env::{current_dir, var},
error::Error,
fmt::{Debug, Display, Write as _},
fmt::{self, Debug, Display, Write as _},
fs::{self, create_dir_all, read_to_string, File},
hash::Hash,
io::Write,
path::{Path, PathBuf},
str::FromStr,
sync::Mutex,
};

Expand All @@ -21,13 +20,14 @@ use cfgrammar::{
Setting, Value,
},
markmap::MergeBehavior,
newlinecache::NewlineCache,
span::Location,
Spanned,
};
use glob::glob;
use lazy_static::lazy_static;
use lrpar::{CTParserBuilder, LexerTypes};
use lrpar::{
diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
CTParserBuilder, LexerTypes,
};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use proc_macro2::TokenStream;
use quote::{format_ident, quote, ToTokens, TokenStreamExt};
Expand All @@ -37,6 +37,8 @@ use crate::{DefaultLexerTypes, LRNonStreamingLexer, LRNonStreamingLexerDef, LexF

const RUST_FILE_EXT: &str = "rs";

const ERROR: &str = "[Error]";

lazy_static! {
static ref RE_TOKEN_ID: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap();
static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
Expand Down Expand Up @@ -199,6 +201,22 @@ impl ToTokens for QuoteToString<'_> {
}
}

/// A string which uses `Display` for it's `Debug` impl.
struct ErrorString(String);
impl fmt::Display for ErrorString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let ErrorString(s) = self;
write!(f, "{}", s)
}
}
impl fmt::Debug for ErrorString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let ErrorString(s) = self;
write!(f, "{}", s)
}
}
impl Error for ErrorString {}

/// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated
/// lexer.
pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
Expand Down Expand Up @@ -447,14 +465,21 @@ where
}
let lex_src = read_to_string(lexerp)
.map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, lexerp);
let mut header = self.header;
let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
.parse()
.map_err(|es| {
es.iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join("\n")
let mut out = String::new();
out.push_str(&format!(
"\n{ERROR}{}\n",
lex_diag.file_location_msg(" parsing the `%grmtools` section", None)
));
for e in es {
out.push_str(&indent(&lex_diag.format_error(e).to_string(), " "));
out.push('\n');
}
ErrorString(out)
})?;
header.merge_from(parsed_header)?;
header.mark_used(&"lexerkind".to_string());
Expand All @@ -468,38 +493,33 @@ where
}
}
};
let line_cache = NewlineCache::from_str(&lex_src).unwrap();
#[cfg(test)]
if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
inspect_lexerkind_cb(lexerkind)?
}
let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) = match lexerkind
{
LexerKind::LRNonStreamingLexer => {
let lex_flags = LexFlags::try_from(&mut header)?;
let lexerdef =
LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(&lex_src, lex_flags)
.map_err(|errs| {
errs.iter()
.map(|e| {
if let Some((line, column)) = line_cache
.byte_to_line_num_and_col_num(
&lex_src,
e.spans().first().unwrap().start(),
)
{
format!("{} at line {line} column {column}", e)
} else {
format!("{}", e)
}
})
.collect::<Vec<_>>()
.join("\n")
})?;
let lex_flags = lexerdef.lex_flags().cloned();
(lexerdef, lex_flags.unwrap())
}
};
let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) =
match lexerkind {
LexerKind::LRNonStreamingLexer => {
let lex_flags = LexFlags::try_from(&mut header)?;
let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
&lex_src, lex_flags,
)
.map_err(|errs| {
let mut out = String::new();
out.push_str(&format!(
"\n{ERROR}{}\n",
lex_diag.file_location_msg("", None)
));
for e in errs {
out.push_str(&indent(&lex_diag.format_error(e).to_string(), " "));
out.push('\n');
}
ErrorString(out)
})?;
let lex_flags = lexerdef.lex_flags().cloned();
(lexerdef, lex_flags.unwrap())
}
};

if let Some(ref lrcfg) = self.lrpar_config {
let mut lexerdef = lexerdef.clone();
Expand Down Expand Up @@ -1138,6 +1158,20 @@ pub fn ct_token_map<StorageT: Display>(
Ok(())
}

/// Indents a multi-line string and trims any trailing newline.
/// This currently assumes that indentation on blank lines does not matter.
///
/// The algorithm used by this function is:
/// 1. Prefix `s` with the indentation, indenting the first line.
/// 2. Trim any trailing newlines.
/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
///
/// It is plausible that we should a step 4, but currently do not:
/// 4. Replace all `\n{indent}\n` with `\n\n`
fn indent(s: &str, indent: &str) -> String {
format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
}

#[cfg(test)]
mod test {
use std::fs::File;
Expand Down
1 change: 1 addition & 0 deletions lrpar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ serde = { workspace = true, features = ["derive"], optional = true }
vob.workspace = true
syn.workspace = true
prettyplease.workspace = true
unicode-width.workspace = true

[target.'cfg(target_arch = "wasm32")'.dependencies]
web-time = "1.1.0"
Expand Down
Loading