@@ -4,10 +4,10 @@ use self::CToken::*;
44use self :: Expression :: * ;
55use self :: ParseError :: * ;
66use self :: Token :: * ;
7+ use crate :: term:: Context ;
78pub use crate :: term:: Notation :: * ;
89use crate :: term:: Term :: * ;
910use crate :: term:: { abs, app, Notation , Term } ;
10- use std:: collections:: VecDeque ;
1111use std:: error:: Error ;
1212use std:: fmt;
1313
@@ -16,6 +16,8 @@ use std::fmt;
1616pub enum ParseError {
1717 /// lexical error; contains the invalid character and its index
1818 InvalidCharacter ( ( usize , char ) ) ,
19+ /// lexical error; an undefined free variable was found (classic notation only)
20+ UndefinedFreeVariable ,
1921 /// syntax error; the expression is invalid
2022 InvalidExpression ,
2123 /// syntax error; the expression is empty
@@ -28,6 +30,9 @@ impl fmt::Display for ParseError {
2830 ParseError :: InvalidCharacter ( ( idx, char) ) => {
2931 write ! ( f, "lexical error; invalid character '{}' at {}" , char , idx)
3032 }
33+ ParseError :: UndefinedFreeVariable => {
34+ write ! ( f, "lexical error; an undefined free variable was used" )
35+ }
3136 ParseError :: InvalidExpression => write ! ( f, "syntax error; the expression is invalid" ) ,
3237 ParseError :: EmptyExpression => write ! ( f, "syntax error; the expression is empty" ) ,
3338 }
@@ -142,50 +147,50 @@ pub fn tokenize_cla(input: &str) -> Result<Vec<CToken>, ParseError> {
142147}
143148
144149#[ doc( hidden) ]
145- pub fn convert_classic_tokens ( tokens : & [ CToken ] ) -> Vec < Token > {
146- _convert_classic_tokens ( tokens, & mut VecDeque :: with_capacity ( tokens. len ( ) ) , & mut 0 )
150+ pub fn convert_classic_tokens ( ctx : & Context , tokens : & [ CToken ] ) -> Result < Vec < Token > , ParseError > {
151+ let mut stack = Vec :: with_capacity ( tokens. len ( ) ) ;
152+ stack. extend ( ctx. iter ( ) . rev ( ) ) ;
153+ _convert_classic_tokens ( tokens, & mut stack, & mut 0 )
147154}
148155
149156fn _convert_classic_tokens < ' t > (
150157 tokens : & ' t [ CToken ] ,
151- stack : & mut VecDeque < & ' t str > ,
158+ stack : & mut Vec < & ' t str > ,
152159 pos : & mut usize ,
153- ) -> Vec < Token > {
160+ ) -> Result < Vec < Token > , ParseError > {
154161 let mut output = Vec :: with_capacity ( tokens. len ( ) - * pos) ;
155162 let mut inner_stack_count = 0 ;
156163
157164 while let Some ( token) = tokens. get ( * pos) {
158165 match * token {
159166 CLambda ( ref name) => {
160167 output. push ( Lambda ) ;
161- stack. push_back ( name) ;
168+ stack. push ( name) ;
162169 inner_stack_count += 1 ;
163170 }
164171 CLparen => {
165172 output. push ( Lparen ) ;
166173 * pos += 1 ;
167- output. append ( & mut _convert_classic_tokens ( tokens, stack, pos) ) ;
174+ output. append ( & mut _convert_classic_tokens ( tokens, stack, pos) ? ) ;
168175 }
169176 CRparen => {
170177 output. push ( Rparen ) ;
171178 stack. truncate ( stack. len ( ) - inner_stack_count) ;
172- return output;
179+ return Ok ( output) ;
173180 }
174181 CName ( ref name) => {
175182 if let Some ( index) = stack. iter ( ) . rev ( ) . position ( |t| t == name) {
176183 output. push ( Number ( index + 1 ) )
177184 } else {
178- // a new free variable
179- stack. push_front ( name) ;
180- // index of the last element + 1
181- output. push ( Number ( stack. len ( ) ) )
185+ // a free variable not defined in the `Context`
186+ return Err ( UndefinedFreeVariable ) ;
182187 }
183188 }
184189 }
185190 * pos += 1 ;
186191 }
187192
188- output
193+ Ok ( output)
189194}
190195
191196#[ derive( Debug , PartialEq ) ]
@@ -231,7 +236,7 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
231236/// Attempts to parse the input `&str` as a lambda `Term` encoded in the given `Notation`.
232237///
233238/// - lambdas can be represented either with the greek letter (λ) or a backslash (\\ -
234- /// less aesthetic, but only one byte in size)
239+ /// less aesthetic, but only one byte in size)
235240/// - the identifiers in `Classic` notation are `String`s of alphabetic Unicode characters
236241/// - `Classic` notation ignores whitespaces where unambiguous
237242/// - the indices in the `DeBruijn` notation start with 1 and are hexadecimal digits
@@ -253,10 +258,42 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
253258///
254259/// Returns a `ParseError` when a lexing or syntax error is encountered.
255260pub fn parse ( input : & str , notation : Notation ) -> Result < Term , ParseError > {
261+ parse_with_context ( & Context :: empty ( ) , input, notation)
262+ }
263+
264+ /// Attempts to parse the input `&str` using a provided context of free variables.
265+ ///
266+ /// This function is identical to `parse()`, but it allows defining a set of named
267+ /// free variables that are considered valid during parsing in `Classic` notation.
268+ ///
269+ /// # Examples
270+ /// ```
271+ /// use lambda_calculus::{*, term::Context};
272+ ///
273+ /// let ctx = Context::new(&["x", "y"]);
274+ ///
275+ /// // `z` is not in the context, so it will be an error.
276+ /// assert!(parse_with_context(&ctx, "z", Classic).is_err());
277+ ///
278+ /// // `y` is in the context, so it's parsed as the outermost free variable (Var(2)).
279+ /// assert_eq!(parse_with_context(&ctx, "y", Classic), Ok(Var(2)));
280+ ///
281+ /// // In `λa.y`, `y` is still the outermost free variable, but its index is now 3.
282+ /// assert_eq!(parse_with_context(&ctx, "λa.y", Classic), Ok(abs(Var(3))));
283+ /// ```
284+ ///
285+ /// # Errors
286+ ///
287+ /// Returns a `ParseError` when a lexing or syntax error is encountered.
288+ pub fn parse_with_context (
289+ ctx : & Context ,
290+ input : & str ,
291+ notation : Notation ,
292+ ) -> Result < Term , ParseError > {
256293 let tokens = if notation == DeBruijn {
257294 tokenize_dbr ( input) ?
258295 } else {
259- convert_classic_tokens ( & tokenize_cla ( input) ?)
296+ convert_classic_tokens ( ctx , & tokenize_cla ( input) ?) ?
260297 } ;
261298 let ast = get_ast ( & tokens) ?;
262299
@@ -362,15 +399,19 @@ mod tests {
362399 assert ! ( tokens_dbr. is_ok( ) ) ;
363400
364401 assert_eq ! (
365- convert_classic_tokens( & tokens_cla. unwrap( ) ) ,
402+ convert_classic_tokens( & Context :: empty ( ) , & tokens_cla. unwrap( ) ) . unwrap ( ) ,
366403 tokens_dbr. unwrap( )
367404 ) ;
368405 }
369406
370407 #[ test]
371408 fn tokenization_success_classic_with_free_variables ( ) {
409+ let ctx = Context :: new ( & [ "a" , "b" ] ) ;
372410 let blc_dbr = "12" ;
373- let blc_cla = parse ( blc_dbr, DeBruijn ) . unwrap ( ) . to_string ( ) ;
411+ let blc_cla = parse ( blc_dbr, DeBruijn )
412+ . unwrap ( )
413+ . with_context ( & ctx)
414+ . to_string ( ) ;
374415
375416 let tokens_cla = tokenize_cla ( & blc_cla) ;
376417 let tokens_dbr = tokenize_dbr ( blc_dbr) ;
@@ -379,9 +420,29 @@ mod tests {
379420 assert ! ( tokens_dbr. is_ok( ) ) ;
380421
381422 assert_eq ! (
382- convert_classic_tokens( & tokens_cla. unwrap( ) ) ,
383- tokens_dbr. unwrap( )
423+ tokens_cla. and_then( |tokens| convert_classic_tokens( & ctx, & tokens) ) ,
424+ tokens_dbr
425+ ) ;
426+ }
427+
428+ #[ test]
429+ fn parse_classic_with_undefined_variable_error ( ) {
430+ let ctx_with_y = Context :: new ( & [ "y" ] ) ;
431+
432+ // "x" is not defined in the empty context
433+ assert_eq ! (
434+ parse_with_context( & Context :: empty( ) , "x" , Classic ) ,
435+ Err ( UndefinedFreeVariable )
384436 ) ;
437+
438+ // "x" is not defined in this context either
439+ assert_eq ! (
440+ parse_with_context( & ctx_with_y, "λz.x" , Classic ) ,
441+ Err ( UndefinedFreeVariable )
442+ ) ;
443+
444+ // "y" is defined, so this should be OK
445+ assert ! ( parse_with_context( & ctx_with_y, "y" , Classic ) . is_ok( ) ) ;
385446 }
386447
387448 #[ test]
0 commit comments