@@ -280,13 +280,15 @@ A struct that is used to manage state when lexing. It stores:
280280 error message to the user on a parse error
281281 * `peek_char`: the next `Char` in the `io`
282282 * `peek_tokens`: the list of upcoming tokens that we have already peeked
283+ * `current_token`: the most recent token that we have `read`
283284"""
284285mutable struct _LexerState{O<: IO }
285286 io:: O
286287 line:: Int
287288 peek_char:: Union{Nothing,Char}
288289 peek_tokens:: Vector{_Token}
289- _LexerState (io:: IO ) = new {typeof(io)} (io, 1 , nothing , _Token[])
290+ current_token:: Union{Nothing,_Token}
291+ _LexerState (io:: IO ) = new {typeof(io)} (io, 1 , nothing , _Token[], nothing )
290292end
291293
292294"""
@@ -351,6 +353,7 @@ function Base.read(state::_LexerState, ::Type{_Token})
351353 )
352354 end
353355 popfirst! (state. peek_tokens)
356+ state. current_token = token
354357 return token
355358end
356359
371374
372375_is_number (c:: Char ) = isdigit (c) || c in (' .' , ' e' , ' E' , ' +' , ' -' )
373376
377+ _nothing_or_newline (:: Nothing ) = true
378+ _nothing_or_newline (t:: _Token ) = t. kind == _TOKEN_NEWLINE
379+
380+ function _prior_token (state:: _LexerState )
381+ if length (state. peek_tokens) <= 1
382+ return state. current_token
383+ end
384+ return state. peek_tokens[end - 1 ]
385+ end
386+
374387function Base. peek (state:: _LexerState , :: Type{_Token} , n:: Int = 1 )
375388 @assert n >= 1
376389 while length (state. peek_tokens) < n
@@ -379,22 +392,58 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
379392 return nothing
380393 end
381394 push! (state. peek_tokens, token)
382- if _compare_case_insenstive (token, " subject" )
395+ if token. kind != _TOKEN_IDENTIFIER
396+ continue
397+ end
398+ # Here we have a _TOKEN_IDENTIFIER. But if it is not preceeded by a
399+ # _TOKEN_NEWLINE, it cannot be a _TOKEN_KEYWORD.
400+ if ! _nothing_or_newline (_prior_token (state))
401+ continue
402+ end
403+ # It might be a _TOKEN_KEYWORD.
404+ (kw = _case_insenstive_identifier_to_keyword (token. value))
405+ if kw != = nothing
406+ # The token matches a single word keyword. All keywords are followed
407+ # by a new line, or an EOF.
383408 t = _peek_inner (state)
384- if _compare_case_insenstive (t, " to " )
385- state. peek_tokens[end ] =
386- _Token (_TOKEN_KEYWORD, " CONSTRAINTS " , token . pos)
387- else
409+ if _nothing_or_newline (t )
410+ state. peek_tokens[end ] = _Token (_TOKEN_KEYWORD, kw, token . pos)
411+ end
412+ if t != = nothing
388413 push! (state. peek_tokens, t)
389414 end
390- elseif _compare_case_insenstive (token, " such" )
391- t = _peek_inner (state)
392- if _compare_case_insenstive (t, " that" )
415+ continue
416+ end
417+ # There are two keyword that contain whitespace: `subject to` and
418+ # `such that`
419+ for (a, b) in (" subject" => " to" , " such" => " that" )
420+ if ! _compare_case_insenstive (token, a)
421+ continue
422+ end
423+ # This _might_ be `subject to`, or it might just be a variable
424+ # named `subject`, like `obj:\n subject\n`.
425+ token_b = _peek_inner (state)
426+ if token_b === nothing
427+ # The next token is EOF. Nothing to do here.
428+ break
429+ elseif ! _compare_case_insenstive (token_b, b)
430+ # The second token doesn't match. Store `token_b` and break
431+ push! (state. peek_tokens, token_b)
432+ break
433+ end
434+ # We have something that matches (a, b), but a TOKEN_KEYWORD needs
435+ # to be followed by a new line.
436+ token_nl = _peek_inner (state)
437+ if _nothing_or_newline (token_nl)
393438 state. peek_tokens[end ] =
394439 _Token (_TOKEN_KEYWORD, " CONSTRAINTS" , token. pos)
395440 else
396- push! (state. peek_tokens, t)
441+ push! (state. peek_tokens, token_b)
442+ end
443+ if token_nl != = nothing
444+ push! (state. peek_tokens, token_nl)
397445 end
446+ break
398447 end
399448 end
400449 return state. peek_tokens[n]
@@ -426,11 +475,7 @@ function _peek_inner(state::_LexerState)
426475 write (buf, c)
427476 _ = read (state, Char)
428477 end
429- val = String (take! (buf))
430- if (kw = _case_insenstive_identifier_to_keyword (val)) != = nothing
431- return _Token (_TOKEN_KEYWORD, kw, pos)
432- end
433- return _Token (_TOKEN_IDENTIFIER, val, pos)
478+ return _Token (_TOKEN_IDENTIFIER, String (take! (buf)), pos)
434479 elseif (op = get (_OPERATORS, c, nothing )) != = nothing
435480 _ = read (state, Char) # Skip c
436481 if c == ' -' && peek (state, Char) == ' >'
@@ -473,6 +518,19 @@ function _skip_newlines(state::_LexerState)
473518 return
474519end
475520
521+ function _next_non_newline (state:: _LexerState )
522+ n = 1
523+ while true
524+ t = peek (state, _Token, n)
525+ if t === nothing
526+ return nothing
527+ elseif t. kind != _TOKEN_NEWLINE
528+ return t
529+ end
530+ n += 1
531+ end
532+ end
533+
476534# IDENTIFIER := "string"
477535#
478536# There _are_ rules to what an identifier can be. We handle these when lexing.
@@ -605,14 +663,10 @@ function _parse_quad_expression(
605663 )
606664 end
607665 end
608- while _next_token_is (state, _TOKEN_NEWLINE)
609- if _next_token_is (state, _TOKEN_KEYWORD, 2 )
610- break
611- end
612- _ = read (state, _Token, _TOKEN_NEWLINE)
613- end
614- if _next_token_is (state, _TOKEN_DIVISION)
615- _ = read (state, _Token) # /
666+ t = _next_non_newline (state)
667+ if t != = nothing && t. kind == _TOKEN_DIVISION
668+ _skip_newlines (state)
669+ _ = read (state, _Token, _TOKEN_DIVISION) # /
616670 # Must be /2
617671 n = read (state, _Token, _TOKEN_NUMBER)
618672 if n. value != " 2"
@@ -634,10 +688,11 @@ function _parse_quad_expression(
634688end
635689
636690# TERM :=
637- # "+" TERM
691+ # [\n*] TERM
692+ # | "+" TERM
638693# | "-" TERM
639- # | NUMBER
640694# | IDENTIFIER
695+ # | NUMBER
641696# | NUMBER IDENTIFIER
642697# | NUMBER "*" IDENTIFIER
643698# | QUADRATIC_EXPRESSION
@@ -670,12 +725,28 @@ function _parse_term(
670725 _ = read (state, _Token, _TOKEN_MULTIPLICATION)
671726 x = _parse_variable (state, cache)
672727 return MOI. ScalarAffineTerm (coef, x)
673- elseif _next_token_is (state, _TOKEN_NEWLINE) ||
674- _next_token_is (state, _TOKEN_ADDITION) ||
675- _next_token_is (state, _TOKEN_SUBTRACTION)
676- # NUMBER
677- return coef
728+ elseif _next_token_is (state, _TOKEN_NEWLINE)
729+ # This could either be NUMBER \nEND-OF-TERM, or it could be a term
730+ # split by a new line, like `2\nx`.
731+ t = _next_non_newline (state)
732+ if t === nothing
733+ # NUMBER
734+ return coef
735+ elseif t. kind == _TOKEN_MULTIPLICATION
736+ # NUMBER \n * [\n] IDENTIFIER
737+ _skip_newlines (state)
738+ _ = read (state, _Token, _TOKEN_MULTIPLICATION)
739+ _skip_newlines (state)
740+ x = _parse_variable (state, cache)
741+ return MOI. ScalarAffineTerm (coef, x)
742+ elseif t. kind == _TOKEN_IDENTIFIER
743+ # NUMBER \n IDENTIFIER
744+ x = _parse_variable (state, cache)
745+ return MOI. ScalarAffineTerm (coef, x)
746+ end
678747 end
748+ # NUMBER
749+ return coef
679750 elseif _next_token_is (state, _TOKEN_OPEN_BRACKET)
680751 # QUADRATIC_EXPRESSION
681752 return _parse_quad_expression (state, cache, prefix)
0 commit comments