Skip to content

Commit 891c6a6

Browse files
committed
DSL simplification in order to propose a compilation sketch
1 parent 4a7d2dc commit 891c6a6

File tree

9 files changed

+57
-73
lines changed

9 files changed

+57
-73
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ The `alter` is an annotation where:
3838
- `^` allows the capability to recognize negation,
3939
- `!` allows the capability to backtrack on failure and
4040
- `#` allows the capability to capture all chars.
41-
- `/` allows the capability to lookahead without consuming scanned elements.
4241

4342
The `#` alteration is important because it prevents massive list construction in memory.
4443

@@ -207,6 +206,18 @@ optimisations. To this end, the `AST` is translated directly into a parser
207206
parser using the `core` library.
208207
cf. [celma parser in direct style](https://github.com/d-plaindoux/celma/blob/master/lang/v0/parser/src/parser.rs).
209208

209+
### Benchmarks
210+
211+
Material:
212+
213+
```sh
214+
test json_apache ... bench: 1,652,349 ns/iter (+/- 119,721) = 75 MB/s
215+
test json_canada_nom ... bench: 135,309 ns/iter (+/- 4,034) = 68 MB/s
216+
test json_canada_pest ... bench: 61,084,354 ns/iter (+/- 2,746,834) = 36 MB/s
217+
test json_data ... bench: 133,788 ns/iter (+/- 10,607) = 69 MB/s
218+
```
219+
220+
210221
## V1
211222

212223
This version targets an aggressive and an efficient parser compilation. For this

lang/v0/ast/src/syntax.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ pub enum ASTParsec {
2929
PCheck(Box<ASTParsec>),
3030
POptional(Box<ASTParsec>),
3131
PRepeat(bool, Box<ASTParsec>),
32-
PLookahead(Box<ASTParsec>),
3332
}
3433

3534
impl ASTParsec {

lang/v0/parser/src/parser.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ use celma_v0_core::stream::specs::Stream;
2929
use std::ops::Range;
3030

3131
use celma_v0_ast::syntax::ASTParsec::{
32-
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PLookahead, PMap, PNot, POptional,
33-
PRepeat, PSequence, PTry,
32+
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PMap, PNot, POptional, PRepeat,
33+
PSequence, PTry,
3434
};
3535
use celma_v0_ast::syntax::{ASTParsec, ASTParsecRule};
3636

@@ -204,10 +204,6 @@ where
204204
.and_left(skip())
205205
.and_right(atom2())
206206
.map(|p| PCheck(p.wrap())))
207-
.or(a_char('/')
208-
.and_left(skip())
209-
.and_right(atom2())
210-
.map(|p| PLookahead(p.wrap())))
211207
.or(atom2())
212208
}
213209

lang/v0/parser/src/transpiler.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
extern crate proc_macro;
1818

1919
use celma_v0_ast::syntax::ASTParsec::{
20-
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PLookahead, PMap, PNot, POptional,
21-
PRepeat, PSequence, PTry,
20+
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PMap, PNot, POptional, PRepeat,
21+
PSequence, PTry,
2222
};
2323
use celma_v0_ast::syntax::{ASTParsec, ASTParsecRule};
2424
use proc_macro2::{Span, TokenStream};
@@ -178,10 +178,6 @@ impl TranspileBody<(Option<String>, TokenStream)> for ASTParsec {
178178
Ok((None, quote!(#pt.rep())))
179179
}
180180
}
181-
PLookahead(p) => {
182-
let (_, pt) = p.transpile_body()?;
183-
Ok((None, quote!(lookahead(#pt))))
184-
}
185181
}
186182
}
187183
}

lang/v0/parser/tests/transpiler/celma_transpiler_tests.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,25 +76,6 @@ mod tests_and {
7676
};
7777
}
7878

79-
#[test]
80-
fn it_transpile_two_characters_with_lookahead_on_the_second_one() {
81-
let response = celma_parsec()
82-
.parse(CharStream::new("'a' /'b'"))
83-
.map(|ast| ast.transpile_body());
84-
85-
match response {
86-
Success(Ok((_, ast)), _, _) => assert_eq!(
87-
ast.to_string(),
88-
quote!(
89-
celma_v0_core::parser::char::a_char('a')
90-
.and_right(lookahead(celma_v0_core::parser::char::a_char('b')))
91-
)
92-
.to_string()
93-
),
94-
_ => assert_eq!(true, false),
95-
};
96-
}
97-
9879
#[test]
9980
fn it_transpile_two_characters_bind_left() {
10081
let response = celma_parsec()

lang/v1/ast/src/syntax.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pub enum ASTType {
3333

3434
#[derive(Clone, Debug, Eq, PartialEq)]
3535
pub enum ASTParsec<I> {
36+
PEpsilon,
3637
PIdent(String),
3738
PAtom(I),
3839
PAtoms(Vec<I>),
@@ -46,7 +47,6 @@ pub enum ASTParsec<I> {
4647
PCheck(Box<ASTParsec<I>>),
4748
POptional(Box<ASTParsec<I>>),
4849
PRepeat(bool, Box<ASTParsec<I>>),
49-
PLookahead(Box<ASTParsec<I>>),
5050
}
5151

5252
impl<I> ASTParsec<I> {

lang/v1/normalizer/src/ast.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,29 @@ pub enum ASTGrammar<A> {
2323
Rec(String, Box<AST<A>>),
2424
Var(String)
2525
}
26-
2726
/*
28-
-- Pre-normalization
29-
PN : ASTParsec -> ASTGrammar
27+
PAtom(char), // Single char
28+
PAtoms(Vec<char>), // Char sequence
29+
PBind(String, Box<ASTParsec>), // Variable
30+
PCode(String), // Production
31+
PMap(Box<ASTParsec>, String), // Remove?
32+
PNot(Box<ASTParsec>), // ?
33+
PCheck(Box<ASTParsec>), // No capture
34+
35+
-- Pre-normalization
36+
37+
PN : ASTParsec -> (string -> ASTParsec) -> string list -> ASTGrammar
3038
31-
PN[PSequence(T1,T2]] = Seq(PN[T1],PN[T2])
32-
PN[PChoice(T1,T2]] = Choice(PN[T1],PN[T2])
33-
PN[PRepeat(true, T)] = Rec(a,Choice(Epsilon, Seq(PN[T],String(a)) // When a is not in FV(T)
34-
PN[PRepeat(false, T)] = Choice(PN[T],PN[PRepeat(true, T)])
35-
PN[POptional(T)] = Choice(Epsilon, PN[T])
39+
/ Var(n) if n in l
40+
PN[PIdent(n)]gl = {
41+
\ mu(n,PN[g(n)]g(n::l) otherwise
42+
43+
PN[PSequence(T1,T2]]gl = Seq(PN[T1]gl,PN[T2]gl)
44+
PN[PChoice(T1,T2)]gl = Choice(PN[T1]gl,PN[T2]gl)
45+
PN[PRepeat(false, T)]gl = Choice(PN[T]gl,PN[PRepeat(true, T)]gl)
46+
47+
PN[PRepeat(true, T)]gl = PN[PChoice(PRepeat(false, T),PEpsilon)]gl
48+
PN[POptional(T)]gl = PN[PChoice(T,PEpsilon)]gl
49+
PN[PTry(T)gl] = PN[T]gl
3650
*/
51+

lang/v1/parser/src/parser.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
limitations under the License.
1515
*/
1616

17-
use celma_v0_core::parser::core::eos;
17+
use celma_v0_core::parser::core::{eos, returns, Returns};
1818
use celma_v0_core::parser::literal::{delimited_char, delimited_string};
1919

2020
use celma_v0_macro::parsec_rules;
2121
use celma_v1_ast::syntax::ASTParsec::{
22-
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PLookahead, PMap, PNot, POptional,
23-
PRepeat, PSequence, PTry,
22+
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PMap, PNot, POptional, PRepeat,
23+
PSequence, PTry,
2424
};
2525
use celma_v1_ast::syntax::{ASTParsec, ASTParsecRule};
2626

@@ -63,11 +63,11 @@ fn mk_ast_parsec(
6363
occ
6464
};
6565

66-
let add = if let Some(value) = add {
67-
if value.0 {
68-
PChoice(bind.wrap(), value.1.wrap())
66+
let add = if let Some((choice, parser)) = add {
67+
if choice {
68+
PChoice(bind.wrap(), parser.wrap())
6969
} else {
70-
PSequence(bind.wrap(), value.1.wrap())
70+
PSequence(bind.wrap(), parser.wrap())
7171
}
7272
} else {
7373
bind
@@ -85,22 +85,25 @@ fn mk_atom(operation: Option<char>, parsec: ASTParsec<char>) -> ASTParsec<char>
8585
Some('^') => PNot(parsec.wrap()),
8686
Some('!') => PTry(parsec.wrap()),
8787
Some('#') => PCheck(parsec.wrap()),
88-
Some('/') => PLookahead(parsec.wrap()),
8988
_ => parsec,
9089
}
9190
}
9291

92+
fn epsilon() -> Returns<()> {
93+
returns(())
94+
}
95+
9396
parsec_rules!(
9497
let skip = (' '|'\t'|'\n'|'\r')* -> {}
9598
let ident:{String} = (skip i=#(alpha (alpha|digit|'_')*) skip) -> { i.into_iter().collect() }
9699

97-
let rkind = (/'>' -> {})
98-
| (^('<'|'>')+ rkind -> {})
100+
let rkind = (^('<'|'>')+ rkind -> {})
99101
| ('<' rkind '>' rkind -> {})
102+
| epsilon
100103

101-
let rcode = (/'}' -> {})
102-
| (^('}'|'{')+ rcode -> {})
104+
let rcode = (^('}'|'{')+ rcode -> {})
103105
| ('{' rcode '}' rcode -> {})
106+
| epsilon
104107

105108
let kind:{String} = (skip '<' c=#rkind '>' skip) -> { c.into_iter().collect() }
106109
let code:{String} = (skip '{' c=#rcode '}' skip) -> { c.into_iter().collect() }

lang/v1/parser/tests/parser_tests.rs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ mod parser_tests {
2323
atom_char, atom_code, atom_ident, atom_string, code, kind, parsec, rcode, rule,
2424
};
2525
use celma_v1_ast::syntax::ASTParsec::{
26-
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PLookahead, PNot, POptional,
26+
PAtom, PAtoms, PBind, PCheck, PChoice, PCode, PIdent, PNot, POptional,
2727
PSequence, PTry,
2828
};
2929
use celma_v1_ast::syntax::ASTParsecRule;
@@ -92,7 +92,7 @@ mod parser_tests {
9292

9393
#[test]
9494
fn should_parse_ident_body_with_block_unbalanced_rcode() {
95-
let response = rcode().parse(CharStream::new("{ titi }"));
95+
let response = rcode().parse(CharStream::new("{ titi"));
9696

9797
assert_eq!(response.fold(|_, _, _| false, |_, _| true), true);
9898
}
@@ -245,23 +245,6 @@ mod parser_tests {
245245
);
246246
}
247247

248-
#[test]
249-
fn should_parse_bind_optional_ident_body_with_lookahead() {
250-
let response = parsec().parse(CharStream::new("a=/entry"));
251-
252-
assert_eq!(
253-
response.fold(
254-
|v, _, _| v
255-
== PBind(
256-
String::from("a"),
257-
PLookahead(PIdent(String::from("entry")).wrap()).wrap()
258-
),
259-
|_, _| false
260-
),
261-
true
262-
);
263-
}
264-
265248
#[test]
266249
fn should_parse_protected_simple_rule() {
267250
let response = rule().parse(CharStream::new("let x = entry"));

0 commit comments

Comments
 (0)