Skip to content

Commit 7430462

Browse files
nikomatsakisNiko Matsakis
authored andcommitted
track reduction-kind and give appr preference
resolves the parsing challenges :)
1 parent aefad9f commit 7430462

File tree

4 files changed

+138
-73
lines changed

4 files changed

+138
-73
lines changed

crates/formality-core/src/lib.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,14 @@ macro_rules! id {
214214
}
215215
}
216216

217+
impl From<String> for $n {
218+
fn from(s: String) -> Self {
219+
$n {
220+
data: std::sync::Arc::new(s),
221+
}
222+
}
223+
}
224+
217225
impl std::ops::Deref for $n {
218226
type Target = String;
219227

@@ -248,10 +256,7 @@ macro_rules! id {
248256
scope: &parse::Scope<crate::FormalityLang>,
249257
text: &'t str,
250258
) -> parse::ParseResult<'t, Self> {
251-
$crate::parse::Parser::single_variant(scope, text, stringify!($n), |p| {
252-
let string = p.identifier()?;
253-
Ok($n::new(&string))
254-
})
259+
$crate::parse::Parser::identifier(scope, text, stringify!($n))
255260
}
256261
}
257262

crates/formality-core/src/parse.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,68 @@ pub struct SuccessfulParse<'t, T> {
7575
/// given `enum Expr { #[cast] Base(BaseExpr), ... }`, only the reductions
7676
/// from `BaseExpr` would be recorded, there would be no `Expr::Base`
7777
/// reduction.
78-
reductions: Vec<&'static str>,
78+
reductions: Vec<(&'static str, ReductionKind)>,
7979

8080
/// The precedence of this parse, which is derived from the value given
8181
/// to `parse_variant`.
8282
precedence: Precedence,
8383

84-
/// Did this result from parsing an in-scope variable? If so,
85-
/// we prefer this parse to other parses that consumed the same set of
86-
/// input tokens.
87-
is_in_scope_var: bool,
88-
8984
/// The value produced.
9085
value: T,
9186
}
9287

88+
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
89+
pub enum ReductionKind {
90+
#[default]
91+
Normal,
92+
93+
/// A "cast" reduction is created by invoking `Parser::parse_variant_cast`
94+
/// or from the `#[cast]` variants in an enum. This kind of reduction
95+
/// represents an "is-a" relationship and does not add semantic meaning.
96+
///
97+
/// Cast variants interact differently with ambiguity detection.
98+
/// Consider this grammar:
99+
///
100+
/// ```text
101+
/// X = Y | Z // X has two variants
102+
/// Y = A // Y has 1 variant
103+
/// Z = A B // Z has 1 variant
104+
/// A = "a" // A has 1 variant
105+
/// B = "b" // B has 1 variant
106+
/// ```
107+
///
108+
/// If you mark the two `X` variants (`X = Y` and `X = Z`)
109+
/// as cast variants, then the input `"a b"` is considered
110+
/// unambiguous and is parsed as `X = (Z = (A = "a') (B = "b))`
111+
/// with no remainder.
112+
///
113+
/// If you don't mark those variants as cast variants,
114+
/// then we consider this *ambiguous*, because
115+
/// it could be that you want `X = (Y = (A = "a"))` with
116+
/// a remainder of `"b"`. This is appropriate
117+
/// if choosing Y vs Z has different semantic meaning.
118+
Cast,
119+
120+
/// A "variable" reduction results from invoking
121+
/// `Parser::parse_variant_variable` to parse an in-scope variable
122+
/// or from a `#[variable]` variant in an enum.
123+
///
124+
/// In general we prefer to interpret
125+
/// an identifier as an in-scope variable if there is one
126+
/// (see the test `parser-var-id-ambiguity` for examples).
127+
Variable,
128+
129+
/// A "identifier" reduction results from invoking
130+
/// `Parser::parse_variant_identifier` to parse an identifier
131+
/// or from parsing a type declared with the `formality_core::id!`
132+
/// macro.
133+
///
134+
/// In general we prefer to interpret
135+
/// an identifier as an in-scope variable if there is one
136+
/// (see the test `parser-var-id-ambiguity` for examples).
137+
Identifier,
138+
}
139+
93140
impl<'t, T> SuccessfulParse<'t, T> {
94141
/// Extract the value parsed and the remaining text,
95142
/// ignoring the reductions.
@@ -104,7 +151,6 @@ impl<'t, T> SuccessfulParse<'t, T> {
104151
text: self.text,
105152
reductions: self.reductions,
106153
precedence: self.precedence,
107-
is_in_scope_var: self.is_in_scope_var,
108154
value: op(self.value),
109155
}
110156
}
@@ -120,7 +166,6 @@ where
120166
text: term.text,
121167
reductions: term.reductions,
122168
precedence: term.precedence,
123-
is_in_scope_var: term.is_in_scope_var,
124169
value: term.value.upcast(),
125170
}
126171
}

crates/formality-core/src/parse/parser.rs

Lines changed: 71 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use crate::{
99
Set, Upcast,
1010
};
1111

12-
use super::{CoreParse, ParseError, ParseResult, Scope, SuccessfulParse, TokenResult};
12+
use super::{
13+
CoreParse, ParseError, ParseResult, ReductionKind, Scope, SuccessfulParse, TokenResult,
14+
};
1315

1416
mod left_recursion;
1517

@@ -151,9 +153,8 @@ where
151153
scope: &'s Scope<L>,
152154
start_text: &'t str,
153155
current_text: &'t str,
154-
reductions: Vec<&'static str>,
155-
is_cast_variant: bool,
156-
is_in_scope_var: bool,
156+
reductions: Vec<(&'static str, ReductionKind)>,
157+
variant_kind: ReductionKind,
157158

158159
/// A variant is 'committed' when we have seen enough success
159160
is_committed: bool,
@@ -218,6 +219,20 @@ where
218219
})
219220
}
220221

222+
pub fn identifier(
223+
scope: &'s Scope<L>,
224+
text: &'t str,
225+
nonterminal_name: &'static str,
226+
) -> ParseResult<'t, T>
227+
where
228+
String: Into<T>,
229+
{
230+
Self::single_variant(scope, text, nonterminal_name, |p| {
231+
p.mark_as_identifier();
232+
Ok(p.identifier()?.into())
233+
})
234+
}
235+
221236
/// Shorthand for `parse_variant` where the parsing operation is to
222237
/// parse the type `V` and then upcast it to the desired result type.
223238
/// Also marks the variant as a cast variant.
@@ -289,15 +304,14 @@ where
289304
Ok(value) => {
290305
// Subtle: for cast variants, don't record the variant name in the reduction list,
291306
// as it doesn't carry semantic weight. See `mark_as_cast_variant` for more details.
292-
if !active_variant.is_cast_variant {
293-
active_variant.reductions.push(variant_name);
294-
}
307+
active_variant
308+
.reductions
309+
.push((variant_name, active_variant.variant_kind));
295310

296311
self.successes.push(SuccessfulParse {
297312
text: active_variant.current_text,
298313
reductions: active_variant.reductions,
299314
precedence: variant_precedence,
300-
is_in_scope_var: active_variant.is_in_scope_var,
301315
value: value.upcast(),
302316
});
303317
tracing::trace!("success: {:?}", self.successes.last().unwrap());
@@ -380,14 +394,42 @@ where
380394
}
381395

382396
fn is_preferable(s_i: &SuccessfulParse<T>, s_j: &SuccessfulParse<T>) -> bool {
383-
fn has_prefix<T: Eq>(l1: &[T], l2: &[T]) -> bool {
384-
l1.len() > l2.len() && (0..l2.len()).all(|i| l1[i] == l2[i])
397+
let mut reductions_i = s_i.reductions.iter().peekable();
398+
let mut reductions_j = s_j.reductions.iter().peekable();
399+
400+
loop {
401+
match (reductions_i.peek(), reductions_j.peek()) {
402+
// Drop casts from left or right if we see them, as they
403+
// are not significant. (See `ReductionKind::Cast`)
404+
(Some((_, ReductionKind::Cast)), _) => {
405+
reductions_i.next();
406+
}
407+
(_, Some((_, ReductionKind::Cast))) => {
408+
reductions_j.next();
409+
}
410+
(Some((_, ReductionKind::Variable)), Some((_, ReductionKind::Identifier))) => {
411+
// at some point, s_i parsed an in-scope variable and s_j parsed an identifier;
412+
// we tilt in favor of in-scope variables in this kind of case
413+
return true;
414+
}
415+
(Some(_), None) => {
416+
// s_j is a prefix of s_i -- prefer s_i
417+
return true;
418+
}
419+
(None, Some(_)) | (None, None) => {
420+
// s_i is a prefix of s_j or they are equal -- do NOT prefer s_i
421+
return false;
422+
}
423+
(Some(reduction_i), Some(reduction_j)) => {
424+
if reduction_i == reduction_j {
425+
reductions_i.next();
426+
reductions_j.next();
427+
} else {
428+
return false;
429+
}
430+
}
431+
}
385432
}
386-
387-
has_prefix(&s_i.reductions, &s_j.reductions)
388-
|| (s_i.is_in_scope_var
389-
&& !s_j.is_in_scope_var
390-
&& skip_whitespace(s_i.text) == skip_whitespace(s_j.text))
391433
}
392434
}
393435

@@ -403,8 +445,7 @@ where
403445
start_text,
404446
current_text: start_text,
405447
reductions: vec![],
406-
is_cast_variant: false,
407-
is_in_scope_var: false,
448+
variant_kind: ReductionKind::default(),
408449
is_committed: true,
409450
}
410451
}
@@ -470,45 +511,22 @@ where
470511
self.current_text = skip_trailing_comma(self.current_text);
471512
}
472513

473-
/// Marks this variant as an cast variant,
474-
/// which means there is no semantic difference
475-
/// between the thing you parsed and the reduced form.
476-
/// We do this automatically for enum variants marked
477-
/// as `#[cast]` or calls to `parse_variant_cast`.
478-
///
479-
/// Cast variants interact differently with ambiguity detection.
480-
/// Consider this grammar:
481-
///
482-
/// ```text
483-
/// X = Y | Z // X has two variants
484-
/// Y = A // Y has 1 variant
485-
/// Z = A B // Z has 1 variant
486-
/// A = "a" // A has 1 variant
487-
/// B = "b" // B has 1 variant
488-
/// ```
489-
///
490-
/// If you mark the two `X` variants (`X = Y` and `X = Z`)
491-
/// as cast variants, then the input `"a b"` is considered
492-
/// unambiguous and is parsed as `X = (Z = (A = "a') (B = "b))`
493-
/// with no remainder.
494-
///
495-
/// If you don't mark those variants as cast variants,
496-
/// then we consider this *ambiguous*, because
497-
/// it could be that you want `X = (Y = (A = "a"))` with
498-
/// a remainder of `"b"`. This is appropriate
499-
/// if choosing Y vs Z has different semantic meaning.
514+
/// Marks this variant as an cast variant.
515+
/// See [`ReductionKind::Cast`].
500516
pub fn mark_as_cast_variant(&mut self) {
501-
self.is_cast_variant = true;
517+
self.variant_kind = ReductionKind::Cast;
502518
}
503519

504520
/// Indicates that this variant is parsing *only* an in-scope variable.
505-
/// We have special treatment for disambiguation such that if a variant marked
506-
/// as parsing an "in-scope variable" and some other variant both consume the
507-
/// same part of the input string, we prefer the variable. This method is automatically
508-
/// invoked on `#[variable]` variants by the generated parser code and you generally
509-
/// wouldn't want to call it yourself.
521+
/// See [`ReducgionKind::Variable`].
510522
pub fn mark_as_in_scope_var(&mut self) {
511-
self.is_in_scope_var = true;
523+
self.variant_kind = ReductionKind::Variable;
524+
}
525+
526+
/// Indicates that this variant is parsing *only* an identifier.
527+
/// See [`ReducgionKind::Identifier`].
528+
pub fn mark_as_identifier(&mut self) {
529+
self.variant_kind = ReductionKind::Identifier;
512530
}
513531

514532
/// Expect *exactly* the given text (after skipping whitespace)
@@ -598,8 +616,7 @@ where
598616
current_text: self.current_text,
599617
reductions: vec![],
600618
scope: self.scope,
601-
is_cast_variant: false,
602-
is_in_scope_var: false,
619+
variant_kind: ReductionKind::default(),
603620
is_committed: true,
604621
};
605622

@@ -659,8 +676,7 @@ where
659676
start_text: self.start_text,
660677
current_text: self.current_text,
661678
reductions: vec![],
662-
is_cast_variant: false,
663-
is_in_scope_var: false,
679+
variant_kind: ReductionKind::default(),
664680
is_committed: true,
665681
};
666682
let result = op(&mut av);
@@ -890,7 +906,6 @@ where
890906
text,
891907
reductions,
892908
precedence: _,
893-
is_in_scope_var: _,
894909
value,
895910
} = left_recursion::recurse(self.current_state(), || op(self.scope, self.current_text))?;
896911

tests/parser_var_id_ambiguity.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,20 @@ fn parse_assoc() {
147147
/// and then integrate that into the type, but we could also parse
148148
/// P as a type itself.
149149
#[test]
150-
#[should_panic] // FIXME
151150
fn parse_apply() {
152151
let value: Binder<Ty> = ptt::term("<perm P> P i32");
153152
expect_test::expect![[r#"
154153
Binder {
155154
kinds: [
156-
Ty,
155+
Perm,
157156
],
158-
term: Assoc(
157+
term: Apply(
158+
Variable(
159+
^perm0_0,
160+
),
159161
Id(
160162
i32,
161163
),
162-
T,
163164
),
164165
}
165166
"#]]
@@ -171,7 +172,6 @@ fn parse_apply() {
171172
/// and then integrate that into the type, but we could also parse
172173
/// P as a type itself.
173174
#[test]
174-
#[should_panic] // FIXME
175175
fn parse_parameter() {
176176
let value: Binder<Parameter> = ptt::term("<perm P> P");
177177
expect_test::expect![[r#"

0 commit comments

Comments
 (0)