Skip to content

Commit 50922b4

Browse files
committed
feat: basic lexer support
1 parent 0198549 commit 50922b4

File tree

13 files changed

+389
-202
lines changed

13 files changed

+389
-202
lines changed

parse-it-codegen/src/lexer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod backend;
12
mod frontend;
23
mod middle;
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
use proc_macro2::{Span, TokenStream};
2+
use quote::{format_ident, quote};
3+
use syn::{punctuated::Punctuated, visit_mut::VisitMut};
4+
5+
use crate::lexer::middle::{Action, LexerImpl, Middle};
6+
7+
pub struct Context {
8+
crate_name: TokenStream,
9+
lexbuf: syn::Ident,
10+
debug: bool,
11+
}
12+
13+
impl Middle {
14+
pub fn expand(self) -> Result<TokenStream, TokenStream> {
15+
let mut result = TokenStream::new();
16+
let ctx = Context {
17+
crate_name: self.crate_name,
18+
lexbuf: format_ident!("r#__lexbuf", span = Span::call_site()),
19+
debug: self.debug,
20+
};
21+
22+
for lexer in self.lexers {
23+
result.extend(lexer.expand(&ctx)?);
24+
}
25+
26+
let mod_name = self.mod_name;
27+
let attrs = self.attrs;
28+
let items = self.items;
29+
Ok(quote! {
30+
#[allow(non_snake_case)]
31+
#(#attrs)*
32+
mod #mod_name {
33+
#(#items)*
34+
#result
35+
}
36+
})
37+
}
38+
}
39+
40+
impl LexerImpl {
41+
pub fn expand(self, ctx: &Context) -> Result<TokenStream, TokenStream> {
42+
let name = self.name;
43+
let vis = self.vis;
44+
let inputs = self.inputs;
45+
let ret_ty = if let Some(ref ret_ty) = self.ret_ty {
46+
quote! { #ret_ty }
47+
} else {
48+
quote! { () }
49+
};
50+
51+
let mut regexes = vec![];
52+
let mut actions = vec![];
53+
for (i, rule) in self.rules.into_iter().enumerate() {
54+
regexes.push(rule.pattern);
55+
let (action, _) = rule.actions.1.into_iter().try_fold(
56+
rule.actions.0.expand(ctx)?,
57+
|(inner, inner_ty), it| -> Result<_, TokenStream> {
58+
let (action, ret_ty) = it.expand(ctx)?;
59+
Ok((
60+
quote! {{
61+
let __self: #inner_ty = #inner;
62+
#action
63+
}},
64+
ret_ty,
65+
))
66+
},
67+
)?;
68+
actions.push(quote! {
69+
#i => #action
70+
});
71+
}
72+
73+
let crate_name = &ctx.crate_name;
74+
let lexbuf = &ctx.lexbuf;
75+
Ok(quote! {
76+
#vis struct #name;
77+
78+
impl #name {
79+
thread_local! {
80+
static REGEX: #crate_name::lexer::Regex = #crate_name::lexer::Regex::new_many(
81+
&[#(#regexes),*]
82+
).unwrap();
83+
}
84+
85+
#[allow(
86+
dead_code,
87+
unreachable_code,
88+
clippy::never_loop,
89+
clippy::let_unit_value,
90+
clippy::unit_arg,
91+
clippy::useless_conversion
92+
)]
93+
pub fn run(
94+
#lexbuf: &mut #crate_name::lexer::LexerState,
95+
#(#inputs),*
96+
) -> Result<Option<#ret_ty>, ()> {
97+
Self::REGEX.with(|regex| {
98+
'lex: loop {
99+
if let Some(pat) = #lexbuf.run(regex) {
100+
let __self = #lexbuf.lexeme();
101+
let value = match pat.as_u32() as usize {
102+
#(#actions,)*
103+
_ => unreachable!(),
104+
};
105+
return Ok(Some(value));
106+
} else {
107+
return Err(());
108+
}
109+
}
110+
Ok(None)
111+
})
112+
}
113+
}
114+
})
115+
}
116+
}
117+
118+
struct ExpandLexMacroVisitor {
119+
crate_name: TokenStream,
120+
lexbuf: syn::Ident,
121+
failure: Vec<TokenStream>,
122+
}
123+
124+
impl ExpandLexMacroVisitor {
125+
pub fn new(crate_name: TokenStream, lexbuf: syn::Ident) -> Self {
126+
Self {
127+
crate_name,
128+
lexbuf,
129+
failure: vec![],
130+
}
131+
}
132+
133+
pub fn failure(self) -> Option<TokenStream> {
134+
self.failure.into_iter().reduce(|mut a, b| {
135+
a.extend(b);
136+
a
137+
})
138+
}
139+
}
140+
141+
impl VisitMut for ExpandLexMacroVisitor {
142+
fn visit_macro_mut(&mut self, i: &mut syn::Macro) {
143+
if i.path.is_ident("lex") {
144+
struct LexMacro {
145+
pub lexer: syn::Ident,
146+
pub args: Vec<syn::Expr>,
147+
}
148+
impl syn::parse::Parse for LexMacro {
149+
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
150+
let lexer = input.parse()?;
151+
let args = if input.peek(syn::token::Paren) {
152+
let content;
153+
syn::parenthesized!(content in input);
154+
155+
let args =
156+
Punctuated::<syn::Expr, syn::Token![,]>::parse_terminated(&content)?;
157+
args.into_iter().collect()
158+
} else {
159+
vec![]
160+
};
161+
Ok(Self { lexer, args })
162+
}
163+
}
164+
165+
let crate_name = &self.crate_name;
166+
let lexbuf = &self.lexbuf;
167+
match syn::parse2::<LexMacro>(i.tokens.clone()) {
168+
Ok(lex_macro) => {
169+
let LexMacro { lexer, args } = lex_macro;
170+
i.path = syn::parse_quote!(#crate_name::identity);
171+
i.tokens = quote! { #lexer::run(#lexbuf, #(#args),*)? };
172+
}
173+
Err(e) => self.failure.push(e.to_compile_error()),
174+
}
175+
}
176+
}
177+
}
178+
179+
impl Action {
180+
pub fn expand(&self, ctx: &Context) -> Result<(TokenStream, TokenStream), TokenStream> {
181+
let mut action = self.action.clone();
182+
183+
let mut visitor = ExpandLexMacroVisitor::new(ctx.crate_name.clone(), ctx.lexbuf.clone());
184+
visitor.visit_expr_mut(&mut action);
185+
if let Some(failure) = visitor.failure() {
186+
return Err(failure);
187+
}
188+
189+
let ret_ty = self.ret_ty();
190+
Ok((
191+
quote! {
192+
#action
193+
},
194+
ret_ty,
195+
))
196+
}
197+
198+
pub fn ret_ty(&self) -> TokenStream {
199+
if let Some(ref ret_ty) = self.ret_ty {
200+
quote! { #ret_ty }
201+
} else {
202+
quote! { () }
203+
}
204+
}
205+
}

parse-it-codegen/src/lexer/frontend.rs

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
1+
use std::rc::Rc;
2+
13
use proc_macro2::TokenStream;
24
use quote::{quote, quote_spanned};
5+
use syn::visit_mut::VisitMut;
36

47
use crate::{
58
hash::HashMap,
6-
lexer::middle::{LexerImpl, Middle, Rule},
7-
syntax::{Lexer, LexerMod, LexerPattern},
9+
lexer::middle::{Action, LexerImpl, Middle, Rule},
10+
syntax::{Lexer, LexerMod, LexerPattern, LexerRule},
11+
utils::RewriteSelfVisitor,
812
};
913

14+
#[derive(Default)]
15+
struct Context {
16+
pub parse_macros: Rc<Vec<syn::Path>>,
17+
}
18+
1019
impl LexerMod {
1120
pub fn compile(self) -> Result<Middle, TokenStream> {
21+
let ctx = Context {
22+
parse_macros: self.config.parse_macros.clone(),
23+
};
1224
let crate_name = match &self.config.crate_name {
1325
Some(crate_name) => quote! { #crate_name },
1426
None => quote! { ::parse_it },
@@ -22,7 +34,7 @@ impl LexerMod {
2234
let lexers = self
2335
.lexers
2436
.iter()
25-
.map(|lexer| lexer.compile(&lexers))
37+
.map(|lexer| lexer.compile(&lexers, &ctx))
2638
.collect::<Result<Vec<_>, _>>()?;
2739

2840
let middle = Middle {
@@ -38,10 +50,11 @@ impl LexerMod {
3850
}
3951

4052
impl Lexer {
41-
pub fn full_rules(
53+
fn full_rules(
4254
&self,
4355
lexers: &HashMap<syn::Ident, &Lexer>,
4456
stack: &mut Vec<syn::Ident>,
57+
ctx: &Context,
4558
) -> Result<Vec<Rule>, TokenStream> {
4659
stack.push(self.name.clone());
4760
let mut rules = vec![];
@@ -54,7 +67,7 @@ impl Lexer {
5467
}
5568
rules.push(Rule {
5669
pattern: lit_str.clone(),
57-
action: vec![(rule.action.clone(), self.ty.clone())],
70+
actions: (rule.compile(self.ty.clone(), ctx), vec![]),
5871
});
5972
}
6073
LexerPattern::Name(ident) => {
@@ -70,30 +83,49 @@ impl Lexer {
7083
let e = format!("Cannot include lexer `{ident}` in another lexer, it has inputs defined");
7184
return Err(quote_spanned! { ident.span() => compile_error!(#e) });
7285
}
73-
let action = rule.action.clone();
74-
rules.extend(
75-
lexer
76-
.full_rules(lexers, stack)?
77-
.into_iter()
78-
.map(|mut rule| {
79-
rule.action.push((action.clone(), self.ty.clone()));
80-
rule
81-
}),
82-
);
86+
let action = rule.compile(self.ty.clone(), ctx);
87+
rules.extend(lexer.full_rules(lexers, stack, ctx)?.into_iter().map(
88+
|mut rule| {
89+
rule.actions.1.push(action.clone());
90+
rule
91+
},
92+
));
8393
}
8494
}
8595
}
8696
stack.pop();
8797
Ok(rules)
8898
}
8999

90-
pub fn compile(&self, lexers: &HashMap<syn::Ident, &Lexer>) -> Result<LexerImpl, TokenStream> {
91-
let rules = self.full_rules(lexers, &mut vec![])?;
100+
fn compile(
101+
&self,
102+
lexers: &HashMap<syn::Ident, &Lexer>,
103+
ctx: &Context,
104+
) -> Result<LexerImpl, TokenStream> {
105+
let rules = self.full_rules(lexers, &mut vec![], ctx)?;
106+
let inputs = self.inputs.iter().cloned().collect();
92107
Ok(LexerImpl {
93108
name: self.name.clone(),
94109
rules,
95110
vis: self.vis.clone(),
111+
inputs,
96112
ret_ty: self.ty.clone(),
97113
})
98114
}
99115
}
116+
117+
impl LexerRule {
118+
fn compile(&self, ret_ty: Option<syn::Type>, ctx: &Context) -> Action {
119+
let mut action = self.action.clone();
120+
121+
let mut visitor = RewriteSelfVisitor::new(ctx.parse_macros.clone());
122+
visitor.visit_expr_mut(&mut action);
123+
let self_ident = visitor.self_ident;
124+
125+
Action {
126+
action,
127+
ret_ty,
128+
self_ident,
129+
}
130+
}
131+
}

parse-it-codegen/src/lexer/middle.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
11
use proc_macro2::TokenStream;
22

3+
#[derive(Debug, Clone)]
4+
pub struct Action {
5+
pub action: syn::Expr,
6+
pub ret_ty: Option<syn::Type>,
7+
/// replace `self` with this ident
8+
pub self_ident: syn::Ident,
9+
}
10+
11+
#[derive(Debug, Clone)]
312
pub struct Rule {
413
pub pattern: syn::LitStr,
5-
pub action: Vec<(syn::Expr, Option<syn::Type>)>,
14+
pub actions: (Action, Vec<Action>),
615
}
716

17+
#[derive(Debug, Clone)]
818
pub struct LexerImpl {
919
pub name: syn::Ident,
1020
pub rules: Vec<Rule>,
1121
pub vis: syn::Visibility,
22+
pub inputs: Vec<syn::PatType>,
1223
pub ret_ty: Option<syn::Type>,
1324
}
1425

26+
#[derive(Debug, Clone)]
1527
pub struct Middle {
1628
pub attrs: Vec<syn::Attribute>,
1729
pub crate_name: TokenStream,

parse-it-codegen/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ mod hash;
22
pub mod lexer;
33
pub mod parser;
44
pub mod syntax;
5+
mod utils;

parse-it-codegen/src/parser/backend.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@ use proc_macro2::{Span, TokenStream};
22
use quote::{format_ident, quote, quote_spanned};
33
use syn::spanned::Spanned;
44

5-
use crate::{
6-
hash::HashMap,
7-
parser::middle::{Capture, MemoKind, Middle, ParseOp, ParserImpl, Parsing, Value},
8-
};
5+
use crate::parser::middle::{Capture, MemoKind, Middle, ParseOp, ParserImpl, Parsing, Value};
96

107
pub struct Context {
118
crate_name: TokenStream,
@@ -52,12 +49,8 @@ impl Middle {
5249
crate_name: self.crate_name,
5350
debug: self.debug,
5451
};
55-
let mut ret_ty = HashMap::default();
56-
let mut depends = HashMap::default();
5752

5853
for parser in self.parsers {
59-
ret_ty.insert(parser.name.clone(), parser.ret_ty.clone());
60-
depends.insert(parser.name.clone(), parser.depends.clone());
6154
result.extend(parser.expand(&ctx)?);
6255
}
6356

0 commit comments

Comments
 (0)