Skip to content

Commit f7fe8f9

Browse files
committed
track the current state when we recurse
Are we in Left position, right position, etc. (see docs) This will be used to guide parsing of variants so that we can properly handle recursion.
1 parent ac30d3b commit f7fe8f9

File tree

2 files changed

+123
-25
lines changed

2 files changed

+123
-25
lines changed

crates/formality-core/src/parse/parser.rs

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::str::FromStr;
33

44
use crate::{
55
language::{CoreParameter, HasKind, Language},
6+
parse::parser::left_recursion::{CurrentState, LeftRight},
67
set,
78
variable::CoreVariable,
89
Downcast, DowncastFrom, Set, Upcast,
@@ -56,8 +57,10 @@ pub struct ActiveVariant<'s, 't, L>
5657
where
5758
L: Language,
5859
{
60+
precedence: Precedence,
5961
scope: &'s Scope<L>,
60-
text: &'t str,
62+
start_text: &'t str,
63+
current_text: &'t str,
6164
reductions: Vec<&'static str>,
6265
is_cast_variant: bool,
6366
}
@@ -94,6 +97,8 @@ where
9497
nonterminal_name: &'static str,
9598
mut op: impl FnMut(&mut Self),
9699
) -> ParseResult<'t, T> {
100+
let text = skip_whitespace(text);
101+
97102
left_recursion::enter(scope, text, || {
98103
let tracing_span = tracing::span!(
99104
tracing::Level::TRACE,
@@ -154,12 +159,8 @@ where
154159

155160
let variant_precedence = Precedence(variant_precedence);
156161

157-
let mut active_variant = ActiveVariant {
158-
scope: self.scope,
159-
text: self.start_text,
160-
reductions: vec![],
161-
is_cast_variant: false,
162-
};
162+
let mut active_variant =
163+
ActiveVariant::new(variant_precedence, self.scope, self.start_text);
163164
let result = op(&mut active_variant);
164165

165166
// Drop the guard here so that the "success" or "error" results appear outside the variant span.
@@ -175,7 +176,7 @@ where
175176
}
176177

177178
self.successes.push(SuccessfulParse {
178-
text: active_variant.text,
179+
text: active_variant.current_text,
179180
reductions: active_variant.reductions,
180181
precedence: variant_precedence,
181182
value,
@@ -271,19 +272,50 @@ impl<'s, 't, L> ActiveVariant<'s, 't, L>
271272
where
272273
L: Language,
273274
{
275+
fn new(precedence: Precedence, scope: &'s Scope<L>, start_text: &'t str) -> Self {
276+
let start_text = skip_whitespace(start_text);
277+
Self {
278+
precedence,
279+
scope,
280+
start_text,
281+
current_text: start_text,
282+
reductions: vec![],
283+
is_cast_variant: false,
284+
}
285+
}
286+
fn current_state(&self) -> CurrentState {
287+
// Determine whether we are in Left or Right position -- Left means
288+
// that we have not yet consumed any tokens. Right means that we have.
289+
// See `LeftRight` type for more details.
290+
//
291+
// Subtle-ish: this comparison assumes there is no whitespace,
292+
// but we establish that invariant in `Self::new`.
293+
debug_assert_eq!(self.start_text, skip_whitespace(self.start_text));
294+
let left_right = if self.start_text == self.current_text {
295+
LeftRight::Left
296+
} else {
297+
LeftRight::Right
298+
};
299+
300+
CurrentState {
301+
left_right,
302+
precedence: self.precedence,
303+
}
304+
}
305+
274306
/// The current text remaining to be consumed.
275307
pub fn text(&self) -> &'t str {
276-
self.text
308+
self.current_text
277309
}
278310

279311
/// Skips whitespace in the input, producing no reduction.
280312
pub fn skip_whitespace(&mut self) {
281-
self.text = skip_whitespace(self.text);
313+
self.current_text = skip_whitespace(self.current_text);
282314
}
283315

284316
/// Skips a comma in the input, producing no reduction.
285317
pub fn skip_trailing_comma(&mut self) {
286-
self.text = skip_trailing_comma(self.text);
318+
self.current_text = skip_trailing_comma(self.current_text);
287319
}
288320

289321
/// Marks this variant as an cast variant,
@@ -366,7 +398,7 @@ where
366398
/// Consume next identifier-like string, requiring that it be equal to `expected`.
367399
#[tracing::instrument(level = "trace", ret)]
368400
pub fn expect_keyword(&mut self, expected: &str) -> Result<(), Set<ParseError<'t>>> {
369-
let text0 = self.text;
401+
let text0 = self.current_text;
370402
match self.identifier_like_string() {
371403
Ok(ident) if &*ident == expected => Ok(()),
372404
_ => Err(ParseError::at(
@@ -379,7 +411,7 @@ where
379411
/// Accepts any of the given keywords.
380412
#[tracing::instrument(level = "trace", ret)]
381413
pub fn expect_keyword_in(&mut self, expected: &[&str]) -> Result<String, Set<ParseError<'t>>> {
382-
let text0 = self.text;
414+
let text0 = self.current_text;
383415
match self.identifier_like_string() {
384416
Ok(ident) if expected.iter().any(|&kw| ident == kw) => Ok(ident),
385417
_ => Err(ParseError::at(
@@ -398,7 +430,9 @@ where
398430
err: impl FnOnce(T) -> Set<ParseError<'t>>,
399431
) -> Result<(), Set<ParseError<'t>>> {
400432
let mut this = ActiveVariant {
401-
text: self.text,
433+
precedence: self.precedence,
434+
start_text: self.start_text,
435+
current_text: self.current_text,
402436
reductions: vec![],
403437
scope: self.scope,
404438
is_cast_variant: false,
@@ -420,7 +454,7 @@ where
420454
|p| p.expect_keyword_in(keywords),
421455
|ident| {
422456
ParseError::at(
423-
self.text,
457+
self.current_text,
424458
format!("expected identified, found keyword `{ident:?}`"),
425459
)
426460
},
@@ -456,13 +490,15 @@ where
456490
op: impl FnOnce(&mut ActiveVariant<'_, 't, L>) -> R,
457491
) -> R {
458492
let mut av = ActiveVariant {
493+
precedence: self.precedence,
459494
scope: &scope,
460-
text: self.text,
495+
start_text: self.start_text,
496+
current_text: self.current_text,
461497
reductions: vec![],
462498
is_cast_variant: false,
463499
};
464500
let result = op(&mut av);
465-
self.text = av.text;
501+
self.current_text = av.current_text;
466502
self.reductions.extend(av.reductions);
467503
result
468504
}
@@ -475,7 +511,7 @@ where
475511
|p| p.variable(),
476512
|var| {
477513
ParseError::at(
478-
self.text,
514+
self.current_text,
479515
format!("found unexpected in-scope variable {:?}", var),
480516
)
481517
},
@@ -510,7 +546,7 @@ where
510546
{
511547
self.skip_whitespace();
512548
let type_name = std::any::type_name::<R>();
513-
let text0 = self.text;
549+
let text0 = self.current_text;
514550
let id = self.identifier()?;
515551
match self.scope.lookup(&id) {
516552
Some(parameter) => match parameter.downcast() {
@@ -535,7 +571,7 @@ where
535571
T: FromStr + std::fmt::Debug,
536572
{
537573
let description = std::any::type_name::<T>();
538-
let text0 = self.text;
574+
let text0 = self.current_text;
539575
let s = self.string(char::is_numeric, char::is_numeric, description)?;
540576
match T::from_str(&s) {
541577
Ok(t) => Ok(t),
@@ -562,7 +598,7 @@ where
562598
) -> Result<T, Set<ParseError<'t>>> {
563599
self.skip_whitespace();
564600
let value;
565-
(value, self.text) = op(self.text)?;
601+
(value, self.current_text) = op(self.current_text)?;
566602
Ok(value)
567603
}
568604

@@ -590,15 +626,15 @@ where
590626
where
591627
T: CoreParse<L>,
592628
{
593-
let text0 = self.text;
629+
let text0 = self.current_text;
594630
match self.nonterminal() {
595631
Ok(v) => Ok(Some(v)),
596632
Err(mut errs) => {
597633
errs.retain(|e| e.consumed_any_since(text0));
598634
if errs.is_empty() {
599635
// If no errors consumed anything, then self.text
600636
// must not have advanced.
601-
assert_eq!(skip_whitespace(text0), self.text);
637+
assert_eq!(skip_whitespace(text0), self.current_text);
602638
Ok(None)
603639
} else {
604640
Err(errs)
@@ -682,10 +718,10 @@ where
682718
reductions,
683719
precedence: _,
684720
value,
685-
} = op(self.scope, self.text)?;
721+
} = left_recursion::recurse(self.current_state(), || op(self.scope, self.current_text))?;
686722

687723
// Adjust our point in the input text
688-
self.text = text;
724+
self.current_text = text;
689725

690726
// Some value was produced, so there must have been a reduction
691727
assert!(!reductions.is_empty());

crates/formality-core/src/parse/parser/left_recursion.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ use crate::{
1414
parse::{ParseError, ParseResult, Scope, SuccessfulParse},
1515
};
1616

17+
use super::Precedence;
18+
1719
thread_local! {
1820
static STACK: RefCell<Vec<StackEntry>> = Default::default()
1921
}
@@ -26,6 +28,8 @@ struct StackEntry {
2628
/// The starting text: we use `*const` instead of `&'t str`
2729
start_text: *const str,
2830

31+
current_state: Option<CurrentState>,
32+
2933
/// The TypeId of the type `T`.
3034
type_id: TypeId,
3135

@@ -37,6 +41,37 @@ struct StackEntry {
3741
observed: bool,
3842
}
3943

44+
#[allow(dead_code)]
45+
pub(super) struct CurrentState {
46+
pub left_right: LeftRight,
47+
pub precedence: Precedence,
48+
}
49+
50+
/// Determines the kind of recursion the current variant
51+
/// would have if it recursed. For example, given a grammar
52+
/// with a variant
53+
///
54+
/// ```text
55+
/// E = E + E
56+
/// ````
57+
///
58+
/// when `E` recurses, the first `E` is considered `Left`
59+
/// because it occurs before any tokens have been consumed.
60+
/// The second `E` is considered `Right`.
61+
///
62+
/// This terminology is a bit weird if you have three recursions,
63+
/// e.g. `E = E + E + E`. Really we should consider any further
64+
/// recursions as `Other`, I suppose, but I'm too lazy to deal with that
65+
/// right now.
66+
#[allow(dead_code)]
67+
pub(super) enum LeftRight {
68+
/// Have not yet consumed any tokens.
69+
Left,
70+
71+
/// Consumed some tokens.
72+
Right,
73+
}
74+
4075
impl StackEntry {
4176
pub fn new<L, T>(scope: &Scope<L>, start_text: &str) -> Self
4277
where
@@ -45,6 +80,7 @@ impl StackEntry {
4580
{
4681
Self {
4782
scope: erase_type(scope),
83+
current_state: None,
4884
start_text,
4985
type_id: TypeId::of::<T>(),
5086
value: None,
@@ -70,6 +106,10 @@ impl StackEntry {
70106
{
71107
assert_eq!(self.start_text, start_text as *const str);
72108
assert_eq!(self.type_id, TypeId::of::<T>());
109+
assert!(
110+
self.current_state.is_some(),
111+
"observed a stack frame with no current state (forgot to call `recuse`?)"
112+
);
73113

74114
self.observed = true;
75115

@@ -236,6 +276,28 @@ where
236276
}
237277
}
238278

279+
pub fn recurse<'s, 't, R>(current_state: CurrentState, op: impl FnOnce() -> R) -> R {
280+
STACK.with_borrow_mut(|stack| {
281+
let top = stack.last_mut().unwrap();
282+
assert!(
283+
top.current_state.is_none(),
284+
"top of stack already has a current state"
285+
);
286+
top.current_state = Some(current_state);
287+
});
288+
289+
final_fn::final_fn!(STACK.with_borrow_mut(|stack| {
290+
let top = stack.last_mut().unwrap();
291+
assert!(
292+
top.current_state.is_some(),
293+
"top of stack no longer has a current state"
294+
);
295+
top.current_state = None;
296+
}));
297+
298+
op()
299+
}
300+
239301
fn erase_type<T>(s: &T) -> *const () {
240302
s as *const T as *const ()
241303
}

0 commit comments

Comments
 (0)