-
Notifications
You must be signed in to change notification settings - Fork 1
feat(rust/cbork): Add a CDDL preprocessing step #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 16 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
f405f92
move parsers into the separate module
Mr-Leshiy e4acb4a
add validate_cddl pub function
Mr-Leshiy fe1d5e1
add processor module
Mr-Leshiy 5932610
rename `rule` to `expr` and some simple expression processing functions
Mr-Leshiy 6830ae3
wip
Mr-Leshiy c74d50f
wip
Mr-Leshiy 01c3e1d
refactor Ast
Mr-Leshiy e342c95
wip
Mr-Leshiy e108e9e
Merge branch 'main' into feat/cddl-linter
Mr-Leshiy f7dd70c
remove unused deps
Mr-Leshiy 4e4deec
move CDDLTestParser to the tests::common mod
Mr-Leshiy a61eb7e
refactor character_sets tests
Mr-Leshiy 8e18b4a
cleanup tests/identifiers.rs
Mr-Leshiy 2f995d7
refactor tests/rules.rs
Mr-Leshiy 9d79c66
refactor tests/type_declaration
Mr-Leshiy 6f8af96
Merge branch 'main' into feat/cddl-linter
Mr-Leshiy 902a356
rename rule_TEST to expr_TEST
Mr-Leshiy File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,160 +1,27 @@ | ||
| //! A parser for CDDL, utilized for parsing in accordance with RFC 8610. | ||
|
|
||
| #![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR | ||
| mod parser; | ||
| mod preprocessor; | ||
|
|
||
| use derive_more::{Display, From}; | ||
| pub use pest::Parser; | ||
| use pest::{error::Error, iterators::Pairs}; | ||
|
|
||
| pub mod rfc_8610 { | ||
| pub use pest::Parser; | ||
|
|
||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| pub struct RFC8610Parser; | ||
| } | ||
|
|
||
| pub mod rfc_9165 { | ||
| pub use pest::Parser; | ||
|
|
||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| #[grammar = "grammar/rfc_9165.pest"] | ||
| pub struct RFC8610Parser; | ||
| } | ||
|
|
||
| pub mod cddl { | ||
| pub use pest::Parser; | ||
|
|
||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| #[grammar = "grammar/rfc_9165.pest"] | ||
| #[grammar = "grammar/cddl_modules.pest"] | ||
| pub struct RFC8610Parser; | ||
| } | ||
|
|
||
| pub mod cddl_test { | ||
| pub use pest::Parser; | ||
|
|
||
| // Parser with DEBUG rules. These rules are only used in tests. | ||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| #[grammar = "grammar/rfc_9165.pest"] | ||
| #[grammar = "grammar/cddl_modules.pest"] | ||
| #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. | ||
| pub struct CDDLTestParser; | ||
| } | ||
|
|
||
| /// Represents different parser extensions for handling CDDL specifications. | ||
| /// Represents different grammar extensions for handling CDDL specifications. | ||
| pub enum Extension { | ||
| /// RFC8610 ONLY limited parser. | ||
| RFC8610Parser, | ||
| /// RFC8610 and RFC9165 limited parser. | ||
| RFC9165Parser, | ||
| /// RFC8610, RFC9165, and CDDL modules. | ||
| CDDLParser, | ||
| } | ||
|
|
||
| // CDDL Standard Postlude - read from an external file | ||
| pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); | ||
|
|
||
| /// Abstract Syntax Tree (AST) representing parsed CDDL syntax. | ||
| // TODO: this is temporary. need to add more pragmatic nodes | ||
| #[derive(Debug)] | ||
| pub enum AST<'a> { | ||
| /// Represents the AST for RFC 8610 CDDL rules. | ||
| RFC8610(Pairs<'a, rfc_8610::Rule>), | ||
| /// Represents the AST for RFC 9165 CDDL rules. | ||
| RFC9165(Pairs<'a, rfc_9165::Rule>), | ||
| /// Represents the AST for CDDL Modules rules. | ||
| CDDL(Pairs<'a, cddl::Rule>), | ||
| /// RFC8610 ONLY limited grammar. | ||
| RFC8610, | ||
| /// RFC8610 and RFC9165 limited grammar. | ||
| RFC9165, | ||
| /// RFC8610, RFC9165, and CDDL grammar. | ||
| CDDL, | ||
| } | ||
|
|
||
| /// Represents different types of errors related to different types of extension. | ||
| #[derive(Display, Debug)] | ||
| pub enum CDDLErrorType { | ||
| /// An error related to RFC 8610 extension. | ||
| RFC8610(Error<rfc_8610::Rule>), | ||
| /// An error related to RFC 9165 extension. | ||
| RFC9165(Error<rfc_9165::Rule>), | ||
| /// An error related to CDDL modules extension. | ||
| CDDL(Error<cddl::Rule>), | ||
| } | ||
|
|
||
| /// Represents an error that may occur during CDDL parsing. | ||
| #[derive(thiserror::Error, Debug, From)] | ||
| #[error("{0}")] | ||
| pub struct CDDLError(CDDLErrorType); | ||
|
|
||
| /// Parses and checks semantically a CDDL input string. | ||
| /// | ||
| /// # Arguments | ||
| /// | ||
| /// * `input` - A string containing the CDDL input to be parsed. | ||
| /// | ||
| /// # Returns | ||
| /// | ||
| /// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing | ||
| /// a boxed `CDDLError` indicating the parsing error. | ||
| /// Verifies semantically a CDDL input string. | ||
| /// | ||
| /// # Errors | ||
| /// | ||
| /// This function may return an error in the following cases: | ||
| /// | ||
| /// - If there is an issue with parsing the CDDL input. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ```rs | ||
| /// use cbork_cddl_parser::{parse_cddl, Extension}; | ||
| /// use std:fs; | ||
| /// | ||
| /// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap(); | ||
| /// let result = parse_cddl(&mut input, &Extension::CDDLParser); | ||
| /// assert!(result.is_ok()); | ||
| /// ``` | ||
| pub fn parse_cddl<'a>( | ||
| input: &'a mut String, extension: &Extension, | ||
| ) -> Result<AST<'a>, Box<CDDLError>> { | ||
| input.push_str("\n\n"); | ||
| input.push_str(POSTLUDE); | ||
|
|
||
| let result = match extension { | ||
| Extension::RFC8610Parser => { | ||
| rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input) | ||
| .map(AST::RFC8610) | ||
| .map_err(CDDLErrorType::RFC8610) | ||
| }, | ||
| Extension::RFC9165Parser => { | ||
| rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input) | ||
| .map(AST::RFC9165) | ||
| .map_err(CDDLErrorType::RFC9165) | ||
| }, | ||
| Extension::CDDLParser => { | ||
| cddl::RFC8610Parser::parse(cddl::Rule::cddl, input) | ||
| .map(AST::CDDL) | ||
| .map_err(CDDLErrorType::CDDL) | ||
| }, | ||
| }; | ||
|
|
||
| result.map_err(|e| Box::new(CDDLError::from(e))) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use crate::*; | ||
|
|
||
| #[test] | ||
| fn it_works() { | ||
| let mut input = String::new(); | ||
| let result = parse_cddl(&mut input, &Extension::CDDLParser); | ||
|
|
||
| match result { | ||
| Ok(c) => println!("{c:?}"), | ||
| Err(e) => { | ||
| println!("{e:?}"); | ||
| println!("{e}"); | ||
| }, | ||
| } | ||
| } | ||
| pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { | ||
| let ast = parser::parse_cddl(input, extension)?; | ||
| let _ast = preprocessor::process_ast(ast)?; | ||
| Ok(()) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| //! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). | ||
| //! Utilized for parsing in accordance with RFC-8610, RFC-9165. | ||
|
|
||
| use pest::{iterators::Pair, Parser}; | ||
|
|
||
| use crate::Extension; | ||
|
|
||
| /// RFC-8610 parser. | ||
| #[allow(missing_docs)] | ||
| pub(crate) mod rfc_8610 { | ||
| /// A Pest parser for RFC-8610. | ||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| pub(crate) struct Parser; | ||
| } | ||
|
|
||
| /// RFC-9165 parser. | ||
| #[allow(missing_docs)] | ||
| pub(crate) mod rfc_9165 { | ||
| /// A Pest parser for RFC-9165. | ||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| #[grammar = "grammar/rfc_9165.pest"] | ||
| pub(crate) struct Parser; | ||
| } | ||
|
|
||
| /// Full CDDL syntax parser. | ||
| #[allow(missing_docs)] | ||
| pub(crate) mod cddl { | ||
| /// A Pest parser for a full CDDL syntax. | ||
| #[derive(pest_derive::Parser)] | ||
| #[grammar = "grammar/rfc_8610.pest"] | ||
| #[grammar = "grammar/rfc_9165.pest"] | ||
| #[grammar = "grammar/cddl_modules.pest"] | ||
| pub(crate) struct Parser; | ||
| } | ||
|
|
||
| /// CDDL Standard Postlude - read from an external file | ||
| const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); | ||
|
|
||
| /// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax. | ||
| #[derive(Debug)] | ||
| pub(crate) enum Ast<'a> { | ||
| /// Represents the AST for RFC-8610 CDDL rules. | ||
| Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>), | ||
| /// Represents the AST for RFC-9165 CDDL rules. | ||
| Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>), | ||
| /// Represents the AST for CDDL Modules rules. | ||
| Cddl(Vec<Pair<'a, cddl::Rule>>), | ||
| } | ||
|
|
||
| /// Parses and checks semantically a CDDL input string. | ||
| /// | ||
| /// # Arguments | ||
| /// | ||
| /// * `input` - A string containing the CDDL input to be parsed. | ||
| /// | ||
| /// # Returns | ||
| /// | ||
| /// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing | ||
| /// a boxed `CDDLError` indicating the parsing error. | ||
| /// | ||
| /// # Errors | ||
| /// | ||
| /// This function may return an error in the following cases: | ||
| /// | ||
| /// - If there is an issue with parsing the CDDL input. | ||
| pub(crate) fn parse_cddl<'a>( | ||
| input: &'a mut String, extension: &Extension, | ||
| ) -> anyhow::Result<Ast<'a>> { | ||
| input.push_str("\n\n"); | ||
| input.push_str(POSTLUDE); | ||
|
|
||
| let ast = match extension { | ||
| Extension::RFC8610 => { | ||
| rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) | ||
| .map(|p| Ast::Rfc8610(p.collect()))? | ||
| }, | ||
| Extension::RFC9165 => { | ||
| rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) | ||
| .map(|p| Ast::Rfc9165(p.collect()))? | ||
| }, | ||
| Extension::CDDL => { | ||
| cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))? | ||
| }, | ||
| }; | ||
| Ok(ast) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| //! A CDDL AST preprocessor. | ||
| //! | ||
| //! - Validates the root rule of the AST to be a `cddl` rule. | ||
| //! - Filters out all rules that are not `expr` rules. | ||
| //! - (TODO) Resolve #include and #import directives, by just adding the imported rules | ||
| //! into the final expression list | ||
|
|
||
| use anyhow::{anyhow, ensure}; | ||
| use pest::{iterators::Pair, RuleType}; | ||
|
|
||
| use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; | ||
|
|
||
| /// Processes the AST. | ||
| pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> { | ||
| match ast { | ||
| Ast::Rfc8610(ast) => { | ||
| process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr) | ||
| .map(Ast::Rfc8610) | ||
| }, | ||
| Ast::Rfc9165(ast) => { | ||
| process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr) | ||
| .map(Ast::Rfc9165) | ||
| }, | ||
| Ast::Cddl(ast) => { | ||
| process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl) | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| /// Process the root rule of the AST and filter out all non `expected_rule` rules. | ||
| fn process_root_and_filter<R: RuleType>( | ||
| ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R, | ||
| ) -> anyhow::Result<Vec<Pair<'_, R>>> { | ||
| let mut ast_iter = ast.into_iter(); | ||
| let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?; | ||
| ensure!( | ||
| ast_root.as_rule() == root_rule && ast_iter.next().is_none(), | ||
| "AST must have only one root rule, which must be a `{root_rule:?}` rule." | ||
| ); | ||
| Ok(ast_root | ||
| .into_inner() | ||
| .filter(|pair| pair.as_rule() == expected_rule) | ||
| .collect()) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.