Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cfgrammar/src/lib/yacc/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,16 @@ where
m
}

pub fn tokens_map_iter(&self) -> impl Iterator<Item = (&str, TIdx<StorageT>)> {
self.iter_tidxs().filter_map(|tidx| {
if let Some((_, name)) = self.token_names[usize::from(tidx)].as_ref() {
Some((name.as_str(), tidx))
} else {
None
}
})
}

/// Return the index of the token named `n` or `None` if it doesn't exist.
pub fn token_idx(&self, n: &str) -> Option<TIdx<StorageT>> {
self.token_names
Expand Down
208 changes: 85 additions & 123 deletions lrlex/src/lib/ctbuilder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use std::{
any::type_name,
borrow::Borrow,
collections::{HashMap, HashSet},
env::{current_dir, var},
error::Error,
Expand Down Expand Up @@ -446,7 +447,7 @@ where
/// * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
/// module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
/// `_l`).
pub fn build(mut self) -> Result<CTLexer, Box<dyn Error>> {
pub fn build(self) -> Result<CTLexer, Box<dyn Error>> {
let lexerp = self
.lexer_path
.as_ref()
Expand Down Expand Up @@ -521,15 +522,15 @@ where
}
};

if let Some(ref lrcfg) = self.lrpar_config {
let mut lexerdef = lexerdef.clone();
let ct_parser = if let Some(ref lrcfg) = self.lrpar_config {
let mut closure_lexerdef = lexerdef.clone();
let mut ctp = CTParserBuilder::<LexerTypesT>::new().inspect_rt(Box::new(
move |yacc_header, rtpb, rule_ids_map, grm_path| {
let owned_map = rule_ids_map
.iter()
.map(|(x, y)| (&**x, *y))
.collect::<HashMap<_, _>>();
lexerdef.set_rule_ids(&owned_map);
closure_lexerdef.set_rule_ids(&owned_map);
yacc_header.mark_used(&"test_files".to_string());
let test_glob = yacc_header.get("test_files");
match test_glob {
Expand All @@ -540,7 +541,8 @@ where
{
let path = path?;
let input = fs::read_to_string(&path)?;
let l: LRNonStreamingLexer<LexerTypesT> = lexerdef.lexer(&input);
let l: LRNonStreamingLexer<LexerTypesT> =
closure_lexerdef.lexer(&input);
for e in rtpb.parse_noaction(&l) {
Err(format!("parsing {}: {}", path.display(), e))?
}
Expand All @@ -553,9 +555,11 @@ where
},
));
ctp = lrcfg(ctp);
let map = ctp.build()?;
self.rule_ids_map = Some(map.token_map().to_owned());
}
let ct_parser = ctp.build()?;
Some(ct_parser)
} else {
None
};

let mut lexerdef = Box::new(lexerdef);
let unused_header_values = header.unused();
Expand All @@ -566,36 +570,84 @@ where
}

let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
Some(ref rim) => {
// Convert from HashMap<String, _> to HashMap<&str, _>
let owned_map = rim
.iter()
.map(|(x, y)| (&**x, *y))
.collect::<HashMap<_, _>>();
let (x, y) = lexerdef.set_rule_ids(&owned_map);
(
x.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
y.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
)
}
None => (None, None),
Some(ref rim) => lexerdef
.set_rule_ids_spanned_iter(rim.iter().map(|(name, tidx)| (name.as_str(), *tidx))),
None => match &ct_parser {
Some(ct_parser) => lexerdef.set_rule_ids_spanned_iter(
ct_parser.yacc_grammar().iter_tidxs().filter_map(|tidx| {
ct_parser
.yacc_grammar()
.token_name(tidx)
.map(|n| (n, tidx.as_storaget()))
}),
),
None => (None, None),
},
};

let mut has_unallowed_missing = false;
let err_indent = " ".repeat(ERROR.len());
if !self.allow_missing_terms_in_lexer {
if let Some(ref mfl) = missing_from_lexer {
eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:");
for n in mfl {
eprintln!(" {}", n);
if let Some(ct_parser) = &ct_parser {
let grm = ct_parser.yacc_grammar();
let token_spans = mfl
.iter()
.map(|name| {
ct_parser
.yacc_grammar()
.token_span(*grm.tokens_map().get(name).unwrap())
.expect("Given token should have a span")
})
.collect::<Vec<_>>();

let yacc_diag = SpannedDiagnosticFormatter::new(
ct_parser.grammar_src(),
ct_parser.grammar_path(),
);

eprintln!("{ERROR} these tokens are not referenced in the lexer but defined as follows");
eprintln!(
"{err_indent} {}",
yacc_diag.file_location_msg("in the grammar", None)
);
for span in token_spans {
eprintln!(
"{}",
yacc_diag.underline_span_with_text(
span,
"Missing from lexer".to_string(),
'^'
)
);
}
eprintln!();
} else {
eprintln!("{ERROR} the following tokens are used in the grammar but are not defined in the lexer:");
for n in mfl {
eprintln!(" {}", n);
}
}
has_unallowed_missing = true;
}
}
if !self.allow_missing_tokens_in_parser {
if let Some(ref mfp) = missing_from_parser {
eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:");
for n in mfp {
eprintln!(" {}", n);
eprintln!(
"{ERROR} these tokens are not referenced in the grammar but defined as follows"
);
eprintln!(
"{err_indent} {}",
lex_diag.file_location_msg("in the lexer", None)
);
for (_, span) in mfp {
eprintln!(
"{}",
lex_diag.underline_span_with_text(
*span,
"Missing from parser".to_string(),
'^'
)
);
}
has_unallowed_missing = true;
}
Expand Down Expand Up @@ -748,90 +800,12 @@ where
// binary etc).
if let Ok(curs) = read_to_string(outp) {
if curs == outs {
return Ok(CTLexer {
missing_from_lexer,
missing_from_parser,
});
return Ok(CTLexer);
}
}
let mut f = File::create(outp)?;
f.write_all(outs.as_bytes())?;
Ok(CTLexer {
missing_from_lexer,
missing_from_parser,
})
}

/// Given the filename `a/b.l` as input, statically compile the file `src/a/b.l` into a Rust
/// module which can then be imported using `lrlex_mod!("a/b.l")`. This is a convenience
/// function around [`process_file`](struct.CTLexerBuilder.html#method.process_file) which makes
/// it easier to compile `.l` files stored in a project's `src/` directory: please see
/// [`process_file`](#method.process_file) for additional constraints and information about the
/// generated files.
#[deprecated(
since = "0.11.0",
note = "Please use lexer_in_src_dir() and build() instead"
)]
#[allow(deprecated)]
pub fn process_file_in_src(
self,
srcp: &str,
) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>> {
let mut inp = current_dir()?;
inp.push("src");
inp.push(srcp);
let mut outp = PathBuf::new();
outp.push(var("OUT_DIR").unwrap());
outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
create_dir_all(&outp)?;
let mut leaf = Path::new(srcp)
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
write!(leaf, ".{}", RUST_FILE_EXT).ok();
outp.push(leaf);
self.process_file(inp, outp)
}

/// Statically compile the `.l` file `inp` into Rust, placing the output into the file `outp`.
/// The latter defines a module as follows:
///
/// ```text
/// mod modname {
/// pub fn lexerdef() -> LexerDef<LexerTypesT::StorageT> { ... }
///
/// ...
/// }
/// ```
///
/// where:
/// * `modname` is either:
/// * the module name specified [`mod_name`](#method.mod_name)
/// * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
/// module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
/// `_l`).
#[deprecated(
since = "0.11.0",
note = "Please use lexer_in_src_dir() and build() instead"
)]
pub fn process_file<P, Q>(
mut self,
inp: P,
outp: Q,
) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
self.lexer_path = Some(inp.as_ref().to_owned());
self.output_path = Some(outp.as_ref().to_owned());
let cl = self.build()?;
Ok((
cl.missing_from_lexer().map(|x| x.to_owned()),
cl.missing_from_parser().map(|x| x.to_owned()),
))
Ok(CTLexer)
}

/// If passed false, tokens used in the grammar but not defined in the lexer will cause a
Expand Down Expand Up @@ -1064,20 +1038,7 @@ where
}

/// An interface to the result of [CTLexerBuilder::build()].
pub struct CTLexer {
missing_from_lexer: Option<HashSet<String>>,
missing_from_parser: Option<HashSet<String>>,
}

impl CTLexer {
fn missing_from_lexer(&self) -> Option<&HashSet<String>> {
self.missing_from_lexer.as_ref()
}

fn missing_from_parser(&self) -> Option<&HashSet<String>> {
self.missing_from_parser.as_ref()
}
}
pub struct CTLexer;

/// Create a Rust module named `mod_name` that can be imported with
/// [`lrlex_mod!(mod_name)`](crate::lrlex_mod). The module contains one `const` `StorageT` per
Expand Down Expand Up @@ -1105,7 +1066,7 @@ impl CTLexer {
/// ```
pub fn ct_token_map<StorageT: Display>(
mod_name: &str,
token_map: &HashMap<String, StorageT>,
token_map: impl Borrow<HashMap<String, StorageT>>,
rename_map: Option<&HashMap<&str, &str>>,
) -> Result<(), Box<dyn Error>> {
// Record the time that this version of lrlex was built. If the source code changes and rustc
Expand All @@ -1122,6 +1083,7 @@ pub fn ct_token_map<StorageT: Display>(
.ok();
outs.push_str(
&token_map
.borrow()
.iter()
.map(|(k, v)| {
let k = match rename_map {
Expand Down
Loading