@@ -280,13 +280,15 @@ A struct that is used to manage state when lexing. It stores:
280280 error message to the user on a parse error
281281 * `peek_char`: the next `Char` in the `io`
282282 * `peek_tokens`: the list of upcoming tokens that we have already peeked
283+ * `current_token`: the most recent token that we have `read`
283284"""
284285mutable struct _LexerState{O<: IO }
285286 io:: O
286287 line:: Int
287288 peek_char:: Union{Nothing,Char}
288289 peek_tokens:: Vector{_Token}
289- _LexerState (io:: IO ) = new {typeof(io)} (io, 1 , nothing , _Token[])
290+ current_token:: Union{Nothing,_Token}
291+ _LexerState (io:: IO ) = new {typeof(io)} (io, 1 , nothing , _Token[], nothing )
290292end
291293
292294"""
@@ -351,6 +353,7 @@ function Base.read(state::_LexerState, ::Type{_Token})
351353 )
352354 end
353355 popfirst! (state. peek_tokens)
356+ state. current_token = token
354357 return token
355358end
356359
371374
372375_is_number (c:: Char ) = isdigit (c) || c in (' .' , ' e' , ' E' , ' +' , ' -' )
373376
377+ _nothing_or_newline (:: Nothing ) = true
378+ _nothing_or_newline (t:: _Token ) = t. kind == _TOKEN_NEWLINE
379+
380+ function _prior_token (state:: _LexerState )
381+ if length (state. peek_tokens) <= 1
382+ return state. current_token
383+ end
384+ return state. peek_tokens[end - 1 ]
385+ end
386+
374387function Base. peek (state:: _LexerState , :: Type{_Token} , n:: Int = 1 )
375388 @assert n >= 1
376389 while length (state. peek_tokens) < n
@@ -379,22 +392,47 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
379392 return nothing
380393 end
381394 push! (state. peek_tokens, token)
382- if _compare_case_insenstive (token, " subject" )
395+ if token. kind != _TOKEN_IDENTIFIER
396+ continue
397+ end
398+ # Here we have a _TOKEN_IDENTIFIER. If it is preceeded by a
399+ # _TOKEN_NEWLINE, it may be a _TOKEN_KEYWORD.
400+ if ! _nothing_or_newline (_prior_token (state))
401+ continue # It can't be a keyword
402+ end
403+ # It might be a _TOKEN_KEYWORD.
404+ kw = _case_insenstive_identifier_to_keyword (token. value)
405+ if kw != = nothing
406+ # The token matches a single word keyword. All keywords are followed
407+ # by a new line, or an EOF.
383408 t = _peek_inner (state)
384- if _compare_case_insenstive (t, " to" )
385- state. peek_tokens[end ] =
386- _Token (_TOKEN_KEYWORD, " CONSTRAINTS" , token. pos)
387- else
388- push! (state. peek_tokens, t)
409+ if _nothing_or_newline (t)
410+ state. peek_tokens[end ] = _Token (_TOKEN_KEYWORD, kw, token. pos)
389411 end
390- elseif _compare_case_insenstive (token, " such" )
391- t = _peek_inner (state)
392- if _compare_case_insenstive (t, " that" )
393- state. peek_tokens[end ] =
394- _Token (_TOKEN_KEYWORD, " CONSTRAINTS" , token. pos)
395- else
412+ if t != = nothing
396413 push! (state. peek_tokens, t)
397414 end
415+ continue
416+ end
417+ for (a, b) in [" subject" => " to" , " such" => " that" ]
418+ if _compare_case_insenstive (token, a)
419+ # This _might_ be `subject to`, or it might just be a variable
420+ # named `subject`, like `obj:\n subject\n`.
421+ t = _peek_inner (state)
422+ if t != = nothing
423+ t2 = _peek_inner (state)
424+ if _compare_case_insenstive (t, b) && _nothing_or_newline (t2)
425+ state. peek_tokens[end ] =
426+ _Token (_TOKEN_KEYWORD, " CONSTRAINTS" , token. pos)
427+ else
428+ push! (state. peek_tokens, t)
429+ end
430+ if t2 != = nothing
431+ push! (state. peek_tokens, t2)
432+ end
433+ end
434+ continue
435+ end
398436 end
399437 end
400438 return state. peek_tokens[n]
@@ -426,11 +464,7 @@ function _peek_inner(state::_LexerState)
426464 write (buf, c)
427465 _ = read (state, Char)
428466 end
429- val = String (take! (buf))
430- if (kw = _case_insenstive_identifier_to_keyword (val)) != = nothing
431- return _Token (_TOKEN_KEYWORD, kw, pos)
432- end
433- return _Token (_TOKEN_IDENTIFIER, val, pos)
467+ return _Token (_TOKEN_IDENTIFIER, String (take! (buf)), pos)
434468 elseif (op = get (_OPERATORS, c, nothing )) != = nothing
435469 _ = read (state, Char) # Skip c
436470 if c == ' -' && peek (state, Char) == ' >'
@@ -473,6 +507,19 @@ function _skip_newlines(state::_LexerState)
473507 return
474508end
475509
510+ function _next_non_newline (state:: _LexerState )
511+ n = 1
512+ while true
513+ t = peek (state, _Token, n)
514+ if t === nothing
515+ return nothing
516+ elseif t. kind != _TOKEN_NEWLINE
517+ return t
518+ end
519+ n += 1
520+ end
521+ end
522+
476523# IDENTIFIER := "string"
477524#
478525# There _are_ rules to what an identifier can be. We handle these when lexing.
@@ -605,14 +652,10 @@ function _parse_quad_expression(
605652 )
606653 end
607654 end
608- while _next_token_is (state, _TOKEN_NEWLINE)
609- if _next_token_is (state, _TOKEN_KEYWORD, 2 )
610- break
611- end
612- _ = read (state, _Token, _TOKEN_NEWLINE)
613- end
614- if _next_token_is (state, _TOKEN_DIVISION)
615- _ = read (state, _Token) # /
655+ t = _next_non_newline (state)
656+ if t. kind == _TOKEN_DIVISION
657+ _skip_newlines (state)
658+ _ = read (state, _Token, _TOKEN_DIVISION) # /
616659 # Must be /2
617660 n = read (state, _Token, _TOKEN_NUMBER)
618661 if n. value != " 2"
@@ -634,10 +677,11 @@ function _parse_quad_expression(
634677end
635678
636679# TERM :=
637- # "+" TERM
680+ # [\n*] TERM
681+ # | "+" TERM
638682# | "-" TERM
639- # | NUMBER
640683# | IDENTIFIER
684+ # | NUMBER
641685# | NUMBER IDENTIFIER
642686# | NUMBER "*" IDENTIFIER
643687# | QUADRATIC_EXPRESSION
@@ -670,12 +714,25 @@ function _parse_term(
670714 _ = read (state, _Token, _TOKEN_MULTIPLICATION)
671715 x = _parse_variable (state, cache)
672716 return MOI. ScalarAffineTerm (coef, x)
673- elseif _next_token_is (state, _TOKEN_NEWLINE) ||
674- _next_token_is (state, _TOKEN_ADDITION) ||
675- _next_token_is (state, _TOKEN_SUBTRACTION)
676- # NUMBER
677- return coef
717+ elseif _next_token_is (state, _TOKEN_NEWLINE)
718+ # This could either be NUMBER \nEND-OF-TERM, or it could be a term
719+ # split by a new line, like `2\nx`.
720+ t = _next_non_newline (state)
721+ if t. kind == _TOKEN_MULTIPLICATION
722+ # NUMBER \n * [\n] IDENTIFIER
723+ _skip_newlines (state)
724+ _ = read (state, _Token, _TOKEN_MULTIPLICATION)
725+ _skip_newlines (state)
726+ x = _parse_variable (state, cache)
727+ return MOI. ScalarAffineTerm (coef, x)
728+ elseif t. kind == _TOKEN_IDENTIFIER
729+ # NUMBER \n IDENTIFIER
730+ x = _parse_variable (state, cache)
731+ return MOI. ScalarAffineTerm (coef, x)
732+ end
678733 end
734+ # NUMBER
735+ return coef
679736 elseif _next_token_is (state, _TOKEN_OPEN_BRACKET)
680737 # QUADRATIC_EXPRESSION
681738 return _parse_quad_expression (state, cache, prefix)
0 commit comments