Skip to content

Commit 4a73ed8

Browse files
hirataraljedrz
andauthored
handle free variables correctly (#58)
* feat: add `Context` for handling free variables * Introduce `Context` to define free variables for parsing and display * Add `parse_with_context` to to resolve free variables based on a `Context` * Add `Term::with_context` for displaying terms using a given `Context` * test: add unit tests for Context and related features * refactor: Remove unnecessary use of VecDeque revert 190c29f * refactor: use slice for function parameter Accepting a slice is more flexible and idiomatic than a vector reference (@ljedrz) Co-authored-by: ljedrz <[email protected]> * fix: remove redundant tests causing clippy warnings * fix: address clippy::doc_lazy_continuation warning in doc comments * fix: make lifetime syntax consistent per Clippy --------- Co-authored-by: ljedrz <[email protected]>
1 parent c961a86 commit 4a73ed8

File tree

6 files changed

+378
-42
lines changed

6 files changed

+378
-42
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ pub mod combinators;
99
pub mod parser;
1010
pub mod reduction;
1111

12-
pub use self::parser::parse;
12+
pub use self::parser::{parse, parse_with_context};
1313
pub use self::reduction::beta;
1414
pub use self::reduction::Order::*;
1515
pub use self::term::Notation::*;

src/parser.rs

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ use self::CToken::*;
44
use self::Expression::*;
55
use self::ParseError::*;
66
use self::Token::*;
7+
use crate::term::Context;
78
pub use crate::term::Notation::*;
89
use crate::term::Term::*;
910
use crate::term::{abs, app, Notation, Term};
10-
use std::collections::VecDeque;
1111
use std::error::Error;
1212
use std::fmt;
1313

@@ -16,6 +16,8 @@ use std::fmt;
1616
pub enum ParseError {
1717
/// lexical error; contains the invalid character and its index
1818
InvalidCharacter((usize, char)),
19+
/// lexical error; an undefined free variable was found (classic notation only)
20+
UndefinedFreeVariable,
1921
/// syntax error; the expression is invalid
2022
InvalidExpression,
2123
/// syntax error; the expression is empty
@@ -28,6 +30,9 @@ impl fmt::Display for ParseError {
2830
ParseError::InvalidCharacter((idx, char)) => {
2931
write!(f, "lexical error; invalid character '{}' at {}", char, idx)
3032
}
33+
ParseError::UndefinedFreeVariable => {
34+
write!(f, "lexical error; an undefined free variable was used")
35+
}
3136
ParseError::InvalidExpression => write!(f, "syntax error; the expression is invalid"),
3237
ParseError::EmptyExpression => write!(f, "syntax error; the expression is empty"),
3338
}
@@ -142,50 +147,50 @@ pub fn tokenize_cla(input: &str) -> Result<Vec<CToken>, ParseError> {
142147
}
143148

144149
#[doc(hidden)]
145-
pub fn convert_classic_tokens(tokens: &[CToken]) -> Vec<Token> {
146-
_convert_classic_tokens(tokens, &mut VecDeque::with_capacity(tokens.len()), &mut 0)
150+
pub fn convert_classic_tokens(ctx: &Context, tokens: &[CToken]) -> Result<Vec<Token>, ParseError> {
151+
let mut stack = Vec::with_capacity(tokens.len());
152+
stack.extend(ctx.iter().rev());
153+
_convert_classic_tokens(tokens, &mut stack, &mut 0)
147154
}
148155

149156
fn _convert_classic_tokens<'t>(
150157
tokens: &'t [CToken],
151-
stack: &mut VecDeque<&'t str>,
158+
stack: &mut Vec<&'t str>,
152159
pos: &mut usize,
153-
) -> Vec<Token> {
160+
) -> Result<Vec<Token>, ParseError> {
154161
let mut output = Vec::with_capacity(tokens.len() - *pos);
155162
let mut inner_stack_count = 0;
156163

157164
while let Some(token) = tokens.get(*pos) {
158165
match *token {
159166
CLambda(ref name) => {
160167
output.push(Lambda);
161-
stack.push_back(name);
168+
stack.push(name);
162169
inner_stack_count += 1;
163170
}
164171
CLparen => {
165172
output.push(Lparen);
166173
*pos += 1;
167-
output.append(&mut _convert_classic_tokens(tokens, stack, pos));
174+
output.append(&mut _convert_classic_tokens(tokens, stack, pos)?);
168175
}
169176
CRparen => {
170177
output.push(Rparen);
171178
stack.truncate(stack.len() - inner_stack_count);
172-
return output;
179+
return Ok(output);
173180
}
174181
CName(ref name) => {
175182
if let Some(index) = stack.iter().rev().position(|t| t == name) {
176183
output.push(Number(index + 1))
177184
} else {
178-
// a new free variable
179-
stack.push_front(name);
180-
// index of the last element + 1
181-
output.push(Number(stack.len()))
185+
// a free variable not defined in the `Context`
186+
return Err(UndefinedFreeVariable);
182187
}
183188
}
184189
}
185190
*pos += 1;
186191
}
187192

188-
output
193+
Ok(output)
189194
}
190195

191196
#[derive(Debug, PartialEq)]
@@ -231,7 +236,7 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
231236
/// Attempts to parse the input `&str` as a lambda `Term` encoded in the given `Notation`.
232237
///
233238
/// - lambdas can be represented either with the greek letter (λ) or a backslash (\\ -
234-
/// less aesthetic, but only one byte in size)
239+
/// less aesthetic, but only one byte in size)
235240
/// - the identifiers in `Classic` notation are `String`s of alphabetic Unicode characters
236241
/// - `Classic` notation ignores whitespaces where unambiguous
237242
/// - the indices in the `DeBruijn` notation start with 1 and are hexadecimal digits
@@ -253,10 +258,42 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
253258
///
254259
/// Returns a `ParseError` when a lexing or syntax error is encountered.
255260
pub fn parse(input: &str, notation: Notation) -> Result<Term, ParseError> {
261+
parse_with_context(&Context::empty(), input, notation)
262+
}
263+
264+
/// Attempts to parse the input `&str` using a provided context of free variables.
265+
///
266+
/// This function is identical to `parse()`, but it allows defining a set of named
267+
/// free variables that are considered valid during parsing in `Classic` notation.
268+
///
269+
/// # Examples
270+
/// ```
271+
/// use lambda_calculus::{*, term::Context};
272+
///
273+
/// let ctx = Context::new(&["x", "y"]);
274+
///
275+
/// // `z` is not in the context, so it will be an error.
276+
/// assert!(parse_with_context(&ctx, "z", Classic).is_err());
277+
///
278+
/// // `y` is in the context, so it's parsed as the outermost free variable (Var(2)).
279+
/// assert_eq!(parse_with_context(&ctx, "y", Classic), Ok(Var(2)));
280+
///
281+
/// // In `λa.y`, `y` is still the outermost free variable, but its index is now 3.
282+
/// assert_eq!(parse_with_context(&ctx, "λa.y", Classic), Ok(abs(Var(3))));
283+
/// ```
284+
///
285+
/// # Errors
286+
///
287+
/// Returns a `ParseError` when a lexing or syntax error is encountered.
288+
pub fn parse_with_context(
289+
ctx: &Context,
290+
input: &str,
291+
notation: Notation,
292+
) -> Result<Term, ParseError> {
256293
let tokens = if notation == DeBruijn {
257294
tokenize_dbr(input)?
258295
} else {
259-
convert_classic_tokens(&tokenize_cla(input)?)
296+
convert_classic_tokens(ctx, &tokenize_cla(input)?)?
260297
};
261298
let ast = get_ast(&tokens)?;
262299

@@ -362,15 +399,19 @@ mod tests {
362399
assert!(tokens_dbr.is_ok());
363400

364401
assert_eq!(
365-
convert_classic_tokens(&tokens_cla.unwrap()),
402+
convert_classic_tokens(&Context::empty(), &tokens_cla.unwrap()).unwrap(),
366403
tokens_dbr.unwrap()
367404
);
368405
}
369406

370407
#[test]
371408
fn tokenization_success_classic_with_free_variables() {
409+
let ctx = Context::new(&["a", "b"]);
372410
let blc_dbr = "12";
373-
let blc_cla = parse(blc_dbr, DeBruijn).unwrap().to_string();
411+
let blc_cla = parse(blc_dbr, DeBruijn)
412+
.unwrap()
413+
.with_context(&ctx)
414+
.to_string();
374415

375416
let tokens_cla = tokenize_cla(&blc_cla);
376417
let tokens_dbr = tokenize_dbr(blc_dbr);
@@ -379,9 +420,29 @@ mod tests {
379420
assert!(tokens_dbr.is_ok());
380421

381422
assert_eq!(
382-
convert_classic_tokens(&tokens_cla.unwrap()),
383-
tokens_dbr.unwrap()
423+
tokens_cla.and_then(|tokens| convert_classic_tokens(&ctx, &tokens)),
424+
tokens_dbr
425+
);
426+
}
427+
428+
#[test]
429+
fn parse_classic_with_undefined_variable_error() {
430+
let ctx_with_y = Context::new(&["y"]);
431+
432+
// "x" is not defined in the empty context
433+
assert_eq!(
434+
parse_with_context(&Context::empty(), "x", Classic),
435+
Err(UndefinedFreeVariable)
384436
);
437+
438+
// "x" is not defined in this context either
439+
assert_eq!(
440+
parse_with_context(&ctx_with_y, "λz.x", Classic),
441+
Err(UndefinedFreeVariable)
442+
);
443+
444+
// "y" is defined, so this should be OK
445+
assert!(parse_with_context(&ctx_with_y, "y", Classic).is_ok());
385446
}
386447

387448
#[test]

src/reduction.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use std::{cmp, fmt, mem};
1010
///
1111
/// - the `NOR`, `HNO`, `APP` and `HAP` orders reduce expressions to their normal form
1212
/// - the `APP` order will fail to fully reduce expressions containing terms without a normal form,
13-
/// e.g. the `Y` combinator (they will expand forever)
13+
/// e.g. the `Y` combinator (they will expand forever)
1414
/// - the `CBN` order reduces to weak head normal form
1515
/// - the `CBV` order reduces to weak normal form
1616
/// - the `HSP` order reduces to head normal form

0 commit comments

Comments
 (0)