Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 66 additions & 3 deletions cfgrammar/src/lib/yacc/ast.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{
collections::{HashMap, HashSet},
error::Error,
fmt,
str::FromStr,
};
Expand All @@ -15,8 +16,25 @@ use crate::{
Span,
header::{GrmtoolsSectionParser, HeaderError, HeaderErrorKind, HeaderValue},
};

/// Any error from the Yacc parser returns an instance of this struct.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ASTModificationError {
kind: YaccGrammarErrorKind,
}

impl Error for ASTModificationError {}

impl fmt::Display for ASTModificationError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.kind)
}
}

/// Contains a `GrammarAST` structure produced from a grammar source file.
/// As well as any errors which occurred during the construction of the AST.
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
pub struct ASTWithValidityInfo {
yacc_kind: YaccKind,
ast: GrammarAST,
Expand Down Expand Up @@ -70,6 +88,23 @@ impl ASTWithValidityInfo {
pub fn errors(&self) -> &[YaccGrammarError] {
self.errs.as_slice()
}

pub fn clone_and_change_start_rule(&self, rule: Rule) -> Result<Self, ASTModificationError> {
if self.ast.get_rule(&rule.name.0).is_some() {
let mut ret = self.clone();
// The `Span`of the `start` field and the `name` field typically differ
// in that `start` is the parameter of a `%start` declaration, while
// `name` refers to the definition site of the rule itself.
//
// Lacking a better `Span` we use the definition site, for the `%start` rule here.
ret.ast.start = Some(rule.name);
Ok(ret)
} else {
Err(ASTModificationError {
kind: YaccGrammarErrorKind::InvalidStartRule(rule.name.0),
})
}
}
}

impl FromStr for ASTWithValidityInfo {
Expand Down Expand Up @@ -110,7 +145,8 @@ impl FromStr for ASTWithValidityInfo {
/// An AST representing a grammar. This is built up gradually: when it is finished, the
/// `complete_and_validate` must be called exactly once in order to finish the set-up. At that
/// point, any further mutations made to the struct lead to undefined behaviour.
#[derive(Debug)]
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(PartialEq))]
#[non_exhaustive]
pub struct GrammarAST {
pub start: Option<(String, Span)>,
Expand Down Expand Up @@ -140,14 +176,15 @@ pub struct GrammarAST {
pub expect_unused: Vec<Symbol>,
}

#[derive(Debug)]
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(Eq, PartialEq))]
pub struct Rule {
pub name: (String, Span),
pub pidxs: Vec<usize>, // index into GrammarAST.prod
pub actiont: Option<String>,
}

#[derive(Debug)]
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(Eq, PartialEq))]
pub struct Production {
pub symbols: Vec<Symbol>,
Expand Down Expand Up @@ -772,4 +809,30 @@ mod test {
.contains(&ast_validity.ast().tokens.get_index_of("b").unwrap())
);
}

#[test]
fn clone_ast_changing_start_rule() {
use super::*;
use crate::yacc::*;
let y_src = r#"
%start AStart
%token A B C
%%
AStart: A ':' BStart ';';
BStart: B ',' C | C ',' B;
"#;

let astart_ast_validity =
ASTWithValidityInfo::new(YaccKind::Original(YaccOriginalActionKind::NoAction), &y_src);
let bstart_rule = astart_ast_validity.ast().get_rule("BStart").unwrap();
let bstart_ast_validity = astart_ast_validity
.clone_and_change_start_rule(bstart_rule.clone())
.unwrap();
assert!(astart_ast_validity.is_valid());
assert!(bstart_ast_validity.is_valid());
assert_eq!(
bstart_ast_validity.ast().start.as_ref(),
Some(&bstart_rule.name)
);
}
}
4 changes: 2 additions & 2 deletions cfgrammar/src/lib/yacc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use quote::quote;
use serde::{Deserialize, Serialize};

/// The particular Yacc variant this grammar makes use of.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[non_exhaustive]
pub enum YaccKind {
Expand All @@ -43,7 +43,7 @@ impl quote::ToTokens for YaccKind {
}
}

#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum YaccOriginalActionKind {
/// Execute user-specified actions attached to each production; also requires a %actiontype
Expand Down
2 changes: 1 addition & 1 deletion lrpar/cttests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ crate-type = ["cdylib"]
[build-dependencies]
cfgrammar = { path = "../../cfgrammar" }
lrlex = { path = "../../lrlex" }
lrpar = { path = "../" }
lrpar = { path = "../", features = ["_unstable_api"] }
glob.workspace = true
yaml-rust2.workspace = true
cfg_aliases = "0.2.1"
Expand Down
107 changes: 86 additions & 21 deletions lrpar/cttests/build.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use cfgrammar::yacc::ast::ASTWithValidityInfo;
use glob::glob;
#[path = "src/cgen_helper.rs"]
mod cgen_helper;
Expand All @@ -23,27 +24,91 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
wasm32_unknown: { all(target_arch = "wasm32", target_os="unknown", target_vendor="unknown") },
}

// Because we're modifying the `StorageT` this isn't something `run_test_path` can do,
// Since it modifies the type of the builder.
CTLexerBuilder::<DefaultLexerTypes<u8>>::new_with_lexemet()
.rust_edition(lrlex::RustEdition::Rust2021)
.output_path(format!(
"{}/storaget.l.rs",
std::env::var("OUT_DIR").unwrap()
))
.lrpar_config(|ctp| {
ctp.rust_edition(lrpar::RustEdition::Rust2021)
.output_path(format!(
"{}/storaget.y.rs",
std::env::var("OUT_DIR").unwrap()
))
.grammar_in_src_dir("storaget.y")
.unwrap()
})
.lexer_in_src_dir("storaget.l")
.unwrap()
.build()
.unwrap();
{
// Because we're modifying the `StorageT` this isn't something `run_test_path` can do,
// Since it modifies the type of the builder.
CTLexerBuilder::<DefaultLexerTypes<u8>>::new_with_lexemet()
.rust_edition(lrlex::RustEdition::Rust2021)
.output_path(format!(
"{}/storaget.l.rs",
std::env::var("OUT_DIR").unwrap()
))
.lrpar_config(|ctp| {
ctp.rust_edition(lrpar::RustEdition::Rust2021)
.output_path(format!(
"{}/storaget.y.rs",
std::env::var("OUT_DIR").unwrap()
))
.grammar_in_src_dir("storaget.y")
.unwrap()
})
.lexer_in_src_dir("storaget.l")
.unwrap()
.build()
.unwrap();
}

{
use lrpar::unstable_api::UnstableApi;
// In this case we'll be building multiple grammars
//
// 1. Parse multi_start_rule.y into an AST
// 2. Clone the original and change the start rule.
// 3. Build a grammar for `multi_start_rule.y` unchanged.
// 4. Build the modified grammar.
let grammar_path = &std::env::current_dir().unwrap().join("src/multi_start.y");
let grammar_src = std::fs::read_to_string(grammar_path).unwrap();
let grammar_src_clone = grammar_src.clone();
let valid_ast = ASTWithValidityInfo::new(cfgrammar::yacc::YaccKind::Grmtools, &grammar_src);
eprintln!("rules {:?}", valid_ast.ast().rules);
let bstart_rule = valid_ast.ast().get_rule("BStart").unwrap().clone();
let modified_ast = valid_ast.clone_and_change_start_rule(bstart_rule).unwrap();
CTLexerBuilder::new()
.lrpar_config(move |ctp| {
ctp.grammar_ast(valid_ast.clone(), UnstableApi)
.with_grammar_src(grammar_src.clone(), UnstableApi)
.grammar_in_src_dir("multi_start.y")
.unwrap()
.mod_name("ast_unmodified_y")
.output_path(format!(
"{}/ast_unmodified.y.rs",
std::env::var("OUT_DIR").unwrap()
))
})
.lexer_in_src_dir("multi_start.l")
.unwrap()
.output_path(format!(
"{}/ast_unmodified.l.rs",
std::env::var("OUT_DIR").unwrap()
))
.mod_name("ast_unmodified_l")
.build()
.unwrap();
CTLexerBuilder::new()
.lrpar_config(move |ctp| {
ctp.grammar_ast(modified_ast.clone(), UnstableApi)
.with_grammar_src(grammar_src_clone.clone(), UnstableApi)
.grammar_in_src_dir("multi_start.y")
.unwrap()
.mod_name("ast_modified_y")
.output_path(format!(
"{}/ast_modified.y.rs",
std::env::var("OUT_DIR").unwrap()
))
// We still need to disable these because they are checked after ast validation.
.warnings_are_errors(false)
.show_warnings(false)
})
.lexer_in_src_dir("multi_start.l")
.unwrap()
.mod_name("ast_modified_l")
.output_path(format!(
"{}/ast_modified.l.rs",
std::env::var("OUT_DIR").unwrap()
))
.build()
.unwrap();
}
println!("cargo::rerun-if-changed=src/storaget.l");
println!(
"cargo::rerun-if-changed={}/storaget.l.rs",
Expand Down
26 changes: 26 additions & 0 deletions lrpar/cttests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ lrpar_mod!("storaget.y");
lrlex_mod!("grmtools_section.l");
lrpar_mod!("grmtools_section.y");

lrlex_mod!("ast_unmodified.l");
lrpar_mod!("ast_unmodified.y");

lrlex_mod!("ast_modified.l");
lrpar_mod!("ast_modified.y");

#[test]
fn multitypes() {
let lexerdef = multitypes_l::lexerdef();
Expand Down Expand Up @@ -423,6 +429,26 @@ fn test_lex_flags() {
}
}

#[test]
fn ast_unmodified() {
let lexerdef = ast_unmodified_l::lexerdef();
let lexer = lexerdef.lexer("A: BBBB, CCCCC;");
match &ast_unmodified_y::parse(&lexer) {
(_, errs) if errs.is_empty() => (),
(_, e) => panic!("{:?}", e),
}
}

#[test]
fn ast_modified() {
let lexerdef = ast_modified_l::lexerdef();
let lexer = lexerdef.lexer("CCCCC, BBBB");
match &ast_modified_y::parse(&lexer) {
(_, errs) if errs.is_empty() => (),
(_, e) => panic!("{:?}", e),
}
}

// Codegen failure tests
#[cfg(test)]
generate_codegen_fail_tests!("src/ctfails/*.test");
8 changes: 8 additions & 0 deletions lrpar/cttests/src/multi_start.l
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
%%
A+ 'A'
B+ 'B'
C+ 'C'
; ';'
: ':'
, ','
[ \n\t] ;
13 changes: 13 additions & 0 deletions lrpar/cttests/src/multi_start.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
%grmtools{yacckind: Grmtools}
%start AStart
%token A B C
%%

AStart -> ()
: A ':' BStart ';' {()}
;

BStart -> ()
: B ',' C {()}
| C ',' B {()}
;
Loading