Skip to content

Commit 3910b60

Browse files
authored
feat(rust/cbork): Add a CDDL preprocessing step (#80)
* move parsers into the separate module * add validate_cddl pub function * add processor module * rename `rule` to `expr` and some simple expression processing functions * wip * wip * refactor Ast * wip * remove unused deps * move CDDLTestParser to the tests::common mod * refactor character_sets tests * cleanup tests/identifiers.rs * refactor tests/rules.rs * refactor tests/type_declaration * rename rule_TEST to expr_TEST
1 parent d4dc27a commit 3910b60

File tree

17 files changed

+370
-395
lines changed

17 files changed

+370
-395
lines changed

rust/cbork-cddl-parser/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ repository.workspace = true
1515
workspace = true
1616

1717
[dependencies]
18-
derive_more = {version = "1.0.0", features = ["from","display"] }
1918
pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] }
2019
pest_derive = { version = "2.7.13", features = ["grammar-extras"] }
21-
thiserror = "1.0.64"
20+
anyhow = "1.0.89"

rust/cbork-cddl-parser/src/grammar/cddl_test.pest

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
// cspell: words assigng genericparm genericarg rangeop ctlop
88
// cspell: words grpchoice grpent memberkey bareword optcom
99

10-
/// Test Expression for the `rule` Rule.
11-
rule_TEST = ${ SOI ~ rule ~ EOI }
10+
/// Test Expression for the `expr` Rule.
11+
expr_TEST = ${ SOI ~ expr ~ EOI }
1212

1313
/// Test Expression for the `typename` Rule.
1414
typename_TEST = ${ SOI ~ typename ~ EOI }

rust/cbork-cddl-parser/src/grammar/rfc_8610.pest

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77

88
cddl = ${
99
SOI
10-
~ S ~ (rule ~ S)+
10+
~ S ~ (expr ~ S)+
1111
~ EOI
1212
}
1313

1414
// -----------------------------------------------------------------------------
1515
// Rules
16-
rule = ${
16+
expr = ${
1717
(typename ~ genericparm? ~ S ~ assignt ~ S ~ type)
1818
| (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent)
1919
}

rust/cbork-cddl-parser/src/lib.rs

Lines changed: 14 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,27 @@
11
//! A parser for CDDL, utilized for parsing in accordance with RFC 8610.
22
3-
#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR
3+
mod parser;
4+
mod preprocessor;
45

5-
use derive_more::{Display, From};
6-
pub use pest::Parser;
7-
use pest::{error::Error, iterators::Pairs};
8-
9-
pub mod rfc_8610 {
10-
pub use pest::Parser;
11-
12-
#[derive(pest_derive::Parser)]
13-
#[grammar = "grammar/rfc_8610.pest"]
14-
pub struct RFC8610Parser;
15-
}
16-
17-
pub mod rfc_9165 {
18-
pub use pest::Parser;
19-
20-
#[derive(pest_derive::Parser)]
21-
#[grammar = "grammar/rfc_8610.pest"]
22-
#[grammar = "grammar/rfc_9165.pest"]
23-
pub struct RFC8610Parser;
24-
}
25-
26-
pub mod cddl {
27-
pub use pest::Parser;
28-
29-
#[derive(pest_derive::Parser)]
30-
#[grammar = "grammar/rfc_8610.pest"]
31-
#[grammar = "grammar/rfc_9165.pest"]
32-
#[grammar = "grammar/cddl_modules.pest"]
33-
pub struct RFC8610Parser;
34-
}
35-
36-
pub mod cddl_test {
37-
pub use pest::Parser;
38-
39-
// Parser with DEBUG rules. These rules are only used in tests.
40-
#[derive(pest_derive::Parser)]
41-
#[grammar = "grammar/rfc_8610.pest"]
42-
#[grammar = "grammar/rfc_9165.pest"]
43-
#[grammar = "grammar/cddl_modules.pest"]
44-
#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests.
45-
pub struct CDDLTestParser;
46-
}
47-
48-
/// Represents different parser extensions for handling CDDL specifications.
6+
/// Represents different grammar extensions for handling CDDL specifications.
497
pub enum Extension {
50-
/// RFC8610 ONLY limited parser.
51-
RFC8610Parser,
52-
/// RFC8610 and RFC9165 limited parser.
53-
RFC9165Parser,
54-
/// RFC8610, RFC9165, and CDDL modules.
55-
CDDLParser,
56-
}
57-
58-
// CDDL Standard Postlude - read from an external file
59-
pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl");
60-
61-
/// Abstract Syntax Tree (AST) representing parsed CDDL syntax.
62-
// TODO: this is temporary. need to add more pragmatic nodes
63-
#[derive(Debug)]
64-
pub enum AST<'a> {
65-
/// Represents the AST for RFC 8610 CDDL rules.
66-
RFC8610(Pairs<'a, rfc_8610::Rule>),
67-
/// Represents the AST for RFC 9165 CDDL rules.
68-
RFC9165(Pairs<'a, rfc_9165::Rule>),
69-
/// Represents the AST for CDDL Modules rules.
70-
CDDL(Pairs<'a, cddl::Rule>),
8+
/// RFC8610 ONLY limited grammar.
9+
RFC8610,
10+
/// RFC8610 and RFC9165 limited grammar.
11+
RFC9165,
12+
/// RFC8610, RFC9165, and CDDL grammar.
13+
CDDL,
7114
}
7215

73-
/// Represents different types of errors related to different types of extension.
74-
#[derive(Display, Debug)]
75-
pub enum CDDLErrorType {
76-
/// An error related to RFC 8610 extension.
77-
RFC8610(Error<rfc_8610::Rule>),
78-
/// An error related to RFC 9165 extension.
79-
RFC9165(Error<rfc_9165::Rule>),
80-
/// An error related to CDDL modules extension.
81-
CDDL(Error<cddl::Rule>),
82-
}
83-
84-
/// Represents an error that may occur during CDDL parsing.
85-
#[derive(thiserror::Error, Debug, From)]
86-
#[error("{0}")]
87-
pub struct CDDLError(CDDLErrorType);
88-
89-
/// Parses and checks semantically a CDDL input string.
90-
///
91-
/// # Arguments
92-
///
93-
/// * `input` - A string containing the CDDL input to be parsed.
94-
///
95-
/// # Returns
96-
///
97-
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
98-
/// a boxed `CDDLError` indicating the parsing error.
16+
/// Verifies semantically a CDDL input string.
9917
///
10018
/// # Errors
10119
///
10220
/// This function may return an error in the following cases:
10321
///
10422
/// - If there is an issue with parsing the CDDL input.
105-
///
106-
/// # Examples
107-
///
108-
/// ```rs
109-
/// use cbork_cddl_parser::{parse_cddl, Extension};
110-
/// use std:fs;
111-
///
112-
/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap();
113-
/// let result = parse_cddl(&mut input, &Extension::CDDLParser);
114-
/// assert!(result.is_ok());
115-
/// ```
116-
pub fn parse_cddl<'a>(
117-
input: &'a mut String, extension: &Extension,
118-
) -> Result<AST<'a>, Box<CDDLError>> {
119-
input.push_str("\n\n");
120-
input.push_str(POSTLUDE);
121-
122-
let result = match extension {
123-
Extension::RFC8610Parser => {
124-
rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input)
125-
.map(AST::RFC8610)
126-
.map_err(CDDLErrorType::RFC8610)
127-
},
128-
Extension::RFC9165Parser => {
129-
rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input)
130-
.map(AST::RFC9165)
131-
.map_err(CDDLErrorType::RFC9165)
132-
},
133-
Extension::CDDLParser => {
134-
cddl::RFC8610Parser::parse(cddl::Rule::cddl, input)
135-
.map(AST::CDDL)
136-
.map_err(CDDLErrorType::CDDL)
137-
},
138-
};
139-
140-
result.map_err(|e| Box::new(CDDLError::from(e)))
141-
}
142-
143-
#[cfg(test)]
144-
mod tests {
145-
use crate::*;
146-
147-
#[test]
148-
fn it_works() {
149-
let mut input = String::new();
150-
let result = parse_cddl(&mut input, &Extension::CDDLParser);
151-
152-
match result {
153-
Ok(c) => println!("{c:?}"),
154-
Err(e) => {
155-
println!("{e:?}");
156-
println!("{e}");
157-
},
158-
}
159-
}
23+
pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> {
24+
let ast = parser::parse_cddl(input, extension)?;
25+
let _ast = preprocessor::process_ast(ast)?;
26+
Ok(())
16027
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest).
2+
//! Utilized for parsing in accordance with RFC-8610, RFC-9165.
3+
4+
use pest::{iterators::Pair, Parser};
5+
6+
use crate::Extension;
7+
8+
/// RFC-8610 parser.
9+
#[allow(missing_docs)]
10+
pub(crate) mod rfc_8610 {
11+
/// A Pest parser for RFC-8610.
12+
#[derive(pest_derive::Parser)]
13+
#[grammar = "grammar/rfc_8610.pest"]
14+
pub(crate) struct Parser;
15+
}
16+
17+
/// RFC-9165 parser.
18+
#[allow(missing_docs)]
19+
pub(crate) mod rfc_9165 {
20+
/// A Pest parser for RFC-9165.
21+
#[derive(pest_derive::Parser)]
22+
#[grammar = "grammar/rfc_8610.pest"]
23+
#[grammar = "grammar/rfc_9165.pest"]
24+
pub(crate) struct Parser;
25+
}
26+
27+
/// Full CDDL syntax parser.
28+
#[allow(missing_docs)]
29+
pub(crate) mod cddl {
30+
/// A Pest parser for a full CDDL syntax.
31+
#[derive(pest_derive::Parser)]
32+
#[grammar = "grammar/rfc_8610.pest"]
33+
#[grammar = "grammar/rfc_9165.pest"]
34+
#[grammar = "grammar/cddl_modules.pest"]
35+
pub(crate) struct Parser;
36+
}
37+
38+
/// CDDL Standard Postlude - read from an external file
39+
const POSTLUDE: &str = include_str!("grammar/postlude.cddl");
40+
41+
/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax.
42+
#[derive(Debug)]
43+
pub(crate) enum Ast<'a> {
44+
/// Represents the AST for RFC-8610 CDDL rules.
45+
Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>),
46+
/// Represents the AST for RFC-9165 CDDL rules.
47+
Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>),
48+
/// Represents the AST for CDDL Modules rules.
49+
Cddl(Vec<Pair<'a, cddl::Rule>>),
50+
}
51+
52+
/// Parses and checks semantically a CDDL input string.
53+
///
54+
/// # Arguments
55+
///
56+
/// * `input` - A string containing the CDDL input to be parsed.
57+
///
58+
/// # Returns
59+
///
60+
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
61+
/// a boxed `CDDLError` indicating the parsing error.
62+
///
63+
/// # Errors
64+
///
65+
/// This function may return an error in the following cases:
66+
///
67+
/// - If there is an issue with parsing the CDDL input.
68+
pub(crate) fn parse_cddl<'a>(
69+
input: &'a mut String, extension: &Extension,
70+
) -> anyhow::Result<Ast<'a>> {
71+
input.push_str("\n\n");
72+
input.push_str(POSTLUDE);
73+
74+
let ast = match extension {
75+
Extension::RFC8610 => {
76+
rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input)
77+
.map(|p| Ast::Rfc8610(p.collect()))?
78+
},
79+
Extension::RFC9165 => {
80+
rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input)
81+
.map(|p| Ast::Rfc9165(p.collect()))?
82+
},
83+
Extension::CDDL => {
84+
cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))?
85+
},
86+
};
87+
Ok(ast)
88+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//! A CDDL AST preprocessor.
2+
//!
3+
//! - Validates the root rule of the AST to be a `cddl` rule.
4+
//! - Filters out all rules that are not `expr` rules.
5+
//! - (TODO) Resolve #include and #import directives, by just adding the imported rules
6+
//! into the final expression list
7+
8+
use anyhow::{anyhow, ensure};
9+
use pest::{iterators::Pair, RuleType};
10+
11+
use crate::parser::{cddl, rfc_8610, rfc_9165, Ast};
12+
13+
/// Processes the AST.
14+
pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> {
15+
match ast {
16+
Ast::Rfc8610(ast) => {
17+
process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)
18+
.map(Ast::Rfc8610)
19+
},
20+
Ast::Rfc9165(ast) => {
21+
process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)
22+
.map(Ast::Rfc9165)
23+
},
24+
Ast::Cddl(ast) => {
25+
process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl)
26+
},
27+
}
28+
}
29+
30+
/// Process the root rule of the AST and filter out all non `expected_rule` rules.
31+
fn process_root_and_filter<R: RuleType>(
32+
ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R,
33+
) -> anyhow::Result<Vec<Pair<'_, R>>> {
34+
let mut ast_iter = ast.into_iter();
35+
let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?;
36+
ensure!(
37+
ast_root.as_rule() == root_rule && ast_iter.next().is_none(),
38+
"AST must have only one root rule, which must be a `{root_rule:?}` rule."
39+
);
40+
Ok(ast_root
41+
.into_inner()
42+
.filter(|pair| pair.as_rule() == expected_rule)
43+
.collect())
44+
}

rust/cbork-cddl-parser/tests/byte_sequences.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
// cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm
22

3-
use cbork_cddl_parser::cddl_test::Rule;
4-
53
mod common;
6-
use common::byte_sequences::*;
4+
use common::{byte_sequences::*, Rule};
75

86
#[test]
97
/// Test if the `HEX_PAIR` rule passes properly.

rust/cbork-cddl-parser/tests/cddl.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{ffi::OsStr, fs, io::Result};
22

3-
use cbork_cddl_parser::{parse_cddl, Extension};
3+
use cbork_cddl_parser::{validate_cddl, Extension};
44

55
#[test]
66
/// # Panics
@@ -32,7 +32,7 @@ fn parse_cddl_files() {
3232
for file_path in valid_file_paths {
3333
let mut content = fs::read_to_string(file_path).unwrap();
3434

35-
if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) {
35+
if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) {
3636
err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1));
3737
}
3838
}
@@ -41,7 +41,7 @@ fn parse_cddl_files() {
4141
for file_path in invalid_file_paths {
4242
let mut content = fs::read_to_string(file_path).unwrap();
4343

44-
let result = parse_cddl(&mut content, &Extension::CDDLParser);
44+
let result = validate_cddl(&mut content, &Extension::CDDL);
4545

4646
assert!(result.is_err(), "{:?} is expected to fail", &file_path);
4747
}

0 commit comments

Comments
 (0)