Skip to content

Commit df60780

Browse files
committed
feat: first lexer+parser example
1 parent 55b00f4 commit df60780

File tree

8 files changed

+79
-68
lines changed

8 files changed

+79
-68
lines changed

parse-it-codegen/src/lexer/backend.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ impl LexerImpl {
108108
clippy::never_loop,
109109
clippy::let_unit_value,
110110
clippy::unit_arg,
111-
clippy::useless_conversion
111+
clippy::useless_conversion,
112+
clippy::diverging_sub_expression
112113
)]
113114
pub fn run<'lex>(
114115
#lexbuf: &mut #crate_name::lexer::LexerState<'lex>,

parse-it-codegen/src/parser/backend.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ impl ParserImpl {
108108

109109
let state_token = StateToken::new();
110110
let state = state_token.to_ident();
111-
let parser = self.parser.expand(state_token)?;
111+
let parser = self.parser.expand(state_token, ctx)?;
112112
let parse_impl = quote! {
113113
fn parse_impl(
114114
&self,
@@ -200,11 +200,16 @@ impl ParserImpl {
200200
}
201201

202202
impl Parsing {
203-
pub fn expand(self, state_token: StateToken) -> Result<TokenStream, TokenStream> {
203+
pub fn expand(
204+
self,
205+
state_token: StateToken,
206+
ctx: &Context,
207+
) -> Result<TokenStream, TokenStream> {
204208
let mut result = TokenStream::new();
205209
let span = self.span;
206210
let state = state_token.to_ident();
207211
let value = self.result();
212+
let crate_name = &ctx.crate_name;
208213
for (value, op) in self.into_iter() {
209214
let value = value.to_ident();
210215
let op = match op {
@@ -213,10 +218,10 @@ impl Parsing {
213218
syn::Lit::Str(lit_str) => {
214219
quote_spanned! { span => #state.parse_str(#lit_str) }
215220
}
216-
syn::Lit::Char(lit_char) => {
217-
quote_spanned! { span => #state.parse_char(#lit_char) }
218-
}
219-
syn::Lit::Int(_) | syn::Lit::Float(_) | syn::Lit::Bool(_) => {
221+
syn::Lit::Char(_)
222+
| syn::Lit::Int(_)
223+
| syn::Lit::Float(_)
224+
| syn::Lit::Bool(_) => {
220225
quote_spanned! { span => #state.parse_literal(#c) }
221226
}
222227
_ => {
@@ -248,7 +253,7 @@ impl Parsing {
248253
}
249254
ParseOp::Then { prev, next } => {
250255
let prev = prev.to_ident();
251-
let next = next.expand(state_token)?;
256+
let next = next.expand(state_token, ctx)?;
252257
quote_spanned! { span =>
253258
let #value = match #prev {
254259
Ok(v1) => #next.map(|v2| (v1, v2)),
@@ -258,7 +263,7 @@ impl Parsing {
258263
}
259264
ParseOp::ThenIgnore { prev, next } => {
260265
let prev = prev.to_ident();
261-
let next = next.expand(state_token)?;
266+
let next = next.expand(state_token, ctx)?;
262267
quote_spanned! { span =>
263268
let #value = match #prev {
264269
Ok(v) => #next.map(|_| v),
@@ -268,7 +273,7 @@ impl Parsing {
268273
}
269274
ParseOp::IgnoreThen { prev, next } => {
270275
let prev = prev.to_ident();
271-
let next = next.expand(state_token)?;
276+
let next = next.expand(state_token, ctx)?;
272277
quote_spanned! { span =>
273278
let #value = match #prev {
274279
Ok(_) => #next,
@@ -279,7 +284,7 @@ impl Parsing {
279284
ParseOp::Repeat { parser, at_least } => {
280285
let fork_token = state_token.fork();
281286
let fork = fork_token.to_ident();
282-
let parser = parser.expand(fork_token)?;
287+
let parser = parser.expand(fork_token, ctx)?;
283288
let repeat = quote_spanned! { span =>
284289
let #fork = &mut #state.fork();
285290
let mut results = vec![];
@@ -291,7 +296,7 @@ impl Parsing {
291296
if at_least == 0 {
292297
quote_spanned! { span =>
293298
#repeat
294-
let #value = Ok(results);
299+
let #value: ::std::result::Result<_, #crate_name::Error> = Ok(results);
295300
}
296301
} else {
297302
quote_spanned! { span =>
@@ -305,13 +310,13 @@ impl Parsing {
305310
}
306311
}
307312
ParseOp::Optional { parser } => {
308-
let parser = parser.expand(state_token)?;
313+
let parser = parser.expand(state_token, ctx)?;
309314
quote_spanned! { span => let #value = #parser.ok(); }
310315
}
311316
ParseOp::LookAhead { parser } => {
312317
let fork_token = state_token.fork();
313318
let fork = fork_token.to_ident();
314-
let parser = parser.expand(fork_token)?;
319+
let parser = parser.expand(fork_token, ctx)?;
315320
quote_spanned! { span =>
316321
let #fork = &mut #state.fork();
317322
let #value = #parser.map(|_| ());
@@ -320,7 +325,7 @@ impl Parsing {
320325
ParseOp::LookAheadNot { parser } => {
321326
let fork_token = state_token.fork();
322327
let fork = fork_token.to_ident();
323-
let parser = parser.expand(fork_token)?;
328+
let parser = parser.expand(fork_token, ctx)?;
324329
quote_spanned! { span =>
325330
let #fork = &mut #state.fork();
326331
let #value = if let Ok(value) = #parser {
@@ -335,7 +340,7 @@ impl Parsing {
335340
let fork = fork_token.to_ident();
336341
let parsers = parsers
337342
.into_iter()
338-
.map(|p| p.expand(fork_token))
343+
.map(|p| p.expand(fork_token, ctx))
339344
.collect::<Result<Vec<_>, _>>()?;
340345
quote_spanned! { span =>
341346
let mut fork;

parse-it-codegen/src/parser/frontend.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,15 @@ impl Parser {
135135
.iter()
136136
.map(|(p, i)| (i.clone(), p.clone()))
137137
.collect();
138-
let mut parser = self.rules.0.compile(ctx)?;
139-
if !self.rules.1.is_empty() {
140-
parser = parser.choice_nocap(self.rules.1.into_iter().map(|rule| rule.compile(ctx)))?;
138+
if self.rules.is_empty() {
139+
return Err(
140+
quote_spanned! { self.name.span() => compile_error!("parser must have at least one rule"); },
141+
);
141142
}
143+
let parser = Parsing::choice_nocap(
144+
self.rules.into_iter().map(|rule| rule.compile(ctx)),
145+
self.name.span(),
146+
)?;
142147

143148
let memo = if ctx.left_recursion.contains(&self.name) {
144149
MemoKind::LeftRec
@@ -162,7 +167,7 @@ impl Parser {
162167
.entry(self.name.clone())
163168
.or_insert_with(move || {
164169
let mut set = HashSet::default();
165-
for rule in self.rules() {
170+
for rule in &self.rules {
166171
set.extend(rule.left_calls());
167172
}
168173
set
@@ -177,7 +182,7 @@ impl Parser {
177182
.entry(self.name.clone())
178183
.or_insert_with(move || {
179184
let mut depends = OrderedMap::default();
180-
for rule in self.rules() {
185+
for rule in &self.rules {
181186
rule.production
182187
.analyze_direct_depends(&mut depends, &self.name);
183188
}

parse-it-codegen/src/parser/middle.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,9 @@ impl Parsing {
213213
}
214214

215215
pub fn choice_nocap(
216-
self,
217-
rest: impl Iterator<Item = Result<Parsing, TokenStream>>,
216+
parsers: impl Iterator<Item = Result<Parsing, TokenStream>>,
217+
span: Span,
218218
) -> Result<Self, TokenStream> {
219-
let span = self.span;
220-
let parsers = std::iter::once(Ok(self)).chain(rest);
221219
let parsers = parsers.collect::<Result<Vec<_>, _>>()?;
222220
let op = ParseOp::Choice { parsers };
223221
Ok(Self::from_op(op, Capture::Loud, span))

parse-it-codegen/src/syntax.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,20 +171,14 @@ impl ParserMod {
171171
}
172172

173173
/// ```text
174-
/// Parser ::= Vis Name '->' Type '{' Rule+ '}'
174+
/// Parser ::= Vis Name '->' Type '{' Rule* '}'
175175
/// ```
176176
#[derive(Debug)]
177177
pub struct Parser {
178178
pub vis: syn::Visibility,
179179
pub name: syn::Ident,
180180
pub ty: syn::Type,
181-
pub rules: (Rule, Vec<Rule>),
182-
}
183-
184-
impl Parser {
185-
pub fn rules(&self) -> impl Iterator<Item = &Rule> {
186-
std::iter::once(&self.rules.0).chain(self.rules.1.iter())
187-
}
181+
pub rules: Vec<Rule>,
188182
}
189183

190184
impl syn::parse::Parse for Parser {
@@ -197,13 +191,11 @@ impl syn::parse::Parse for Parser {
197191
let content;
198192
syn::braced!(content in input);
199193

200-
let first_rule = content.parse::<Rule>()?;
201194
let mut rules = vec![];
202195
while !content.is_empty() {
203196
let rule = content.parse::<Rule>()?;
204197
rules.push(rule);
205198
}
206-
let rules = (first_rule, rules);
207199

208200
Ok(Parser {
209201
vis,

parse-it/examples/json.rs

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,29 +60,57 @@ parse_it::parse_it! {
6060
use parse_it::lexer::Token;
6161
use super::JsonValue;
6262

63-
type Lexer = super::lex::Initial;
63+
type Lexer = super::Debug;
6464

6565
Object -> JsonValue {
66-
'{' ( Key ':' Value )* '}' => {
67-
let map = self.into_iter().collect::<HashMap<_, _>>();
66+
'{' '}' => JsonValue::Object(HashMap::new()),
67+
'{' ps:( Key ':' Value ',' )* p:( Key ':' Value ) '}' => {
68+
let map = ps.into_iter().chain(std::iter::once(p)).collect::<HashMap<_, _>>();
6869
JsonValue::Object(map)
6970
}
7071
}
7172

73+
Array -> JsonValue {
74+
'[' ']' => JsonValue::Array(Vec::new()),
75+
'[' vs:(Value ',')* v:Value ']' => {
76+
let vec = vs.into_iter().chain(std::iter::once(v)).collect();
77+
JsonValue::Array(vec)
78+
}
79+
}
80+
7281
Key -> String {
7382
Token::Custom(buf) => buf.clone()
7483
}
7584

7685
pub Value -> JsonValue {
7786
i:<f64> => JsonValue::Number(i),
7887
Token::Custom(buf) => JsonValue::String(buf.clone()),
79-
"true" => JsonValue::Boolean(true),
80-
"false" => JsonValue::Boolean(false),
88+
true => JsonValue::Boolean(true),
89+
false => JsonValue::Boolean(false),
8190
"null" => JsonValue::Null,
91+
Object => self,
92+
Array => self,
8293
}
8394
}
8495
}
8596

97+
#[derive(Clone)]
98+
pub struct Debug;
99+
100+
impl parse_it::LexIt for Debug {
101+
type Token<'a> = parse_it::lexer::Token<'a, String>;
102+
103+
fn new() -> Self {
104+
Self
105+
}
106+
107+
fn next<'a>(&self, lexbuf: &mut parse_it::LexerState<'a>) -> Option<Self::Token<'a>> {
108+
let result = lex::Initial.next(lexbuf);
109+
eprintln!("Lexing: {:?} at {:?}", result, lexbuf.span());
110+
result
111+
}
112+
}
113+
86114
fn main() {
87115
let input = r#"{"name": "Alice", "age": 30, "is_student": false, "courses": ["Math", "Science"], "address": null}"#;
88116

parse-it/src/lexer.rs

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,6 @@ impl<T: Copy> TryConvert<T> for T {
2828
}
2929

3030
pub trait AsLiteral {
31-
fn as_literal<T>(&self) -> Option<T>
32-
where
33-
Self: TryConvert<T>,
34-
T: Copy,
35-
{
36-
self.try_convert()
37-
}
38-
39-
fn as_char(&self) -> Option<char> {
40-
None
41-
}
42-
4331
fn as_str<'a>(&self) -> Option<Cow<'a, str>>
4432
where
4533
Self: 'a,
@@ -282,9 +270,8 @@ impl TryConvert<bool> for LiteralToken<'_> {
282270
}
283271
}
284272

285-
impl AsLiteral for LiteralToken<'_> {
286-
/// Try converting the token to a `char` value.
287-
fn as_char(&self) -> Option<char> {
273+
impl TryConvert<char> for LiteralToken<'_> {
274+
fn try_convert(&self) -> Option<char> {
288275
match *self {
289276
LiteralToken::Char(c) => Some(c),
290277
LiteralToken::Str(s) => {
@@ -308,7 +295,9 @@ impl AsLiteral for LiteralToken<'_> {
308295
_ => None,
309296
}
310297
}
298+
}
311299

300+
impl AsLiteral for LiteralToken<'_> {
312301
/// Try converting the token to a `String` value.
313302
fn as_str<'a>(&self) -> Option<Cow<'a, str>>
314303
where

parse-it/src/parser.rs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,25 +192,18 @@ impl<'a, L: LexIt + Clone> ParserState<'a, L> {
192192
impl<'a, L: LexIt + Clone + 'a> ParserState<'a, L> {
193193
pub fn parse_literal<T>(&mut self, literal: T) -> Result<T, Error>
194194
where
195-
L::Token<'a>: AsLiteral + TryConvert<T>,
196-
T: PartialEq + Copy,
195+
L::Token<'a>: TryConvert<T>,
196+
T: PartialEq,
197197
{
198-
self.parse_with(|tt| tt.as_literal().and_then(|l| (l == literal).then_some(l)))
198+
self.parse_with(|tt| tt.try_convert().and_then(|l| (l == literal).then_some(l)))
199199
}
200200

201201
pub fn parse_literal_type<T>(&mut self) -> Result<T, Error>
202202
where
203-
L::Token<'a>: AsLiteral + TryConvert<T>,
204-
T: PartialEq + Copy,
203+
L::Token<'a>: TryConvert<T>,
204+
T: PartialEq,
205205
{
206-
self.parse_with(|tt| tt.as_literal().and_then(|l| l.try_convert()))
207-
}
208-
209-
pub fn parse_char(&mut self, literal: char) -> Result<char, Error>
210-
where
211-
L::Token<'a>: AsLiteral,
212-
{
213-
self.parse_with(|tt| tt.as_char().and_then(|c| (c == literal).then_some(c)))
206+
self.parse_with(|tt| tt.try_convert())
214207
}
215208

216209
pub fn parse_str(&mut self, literal: &'a str) -> Result<Cow<'a, str>, Error>

0 commit comments

Comments
 (0)