Skip to content

Commit 3d89c03

Browse files
committed
[FileFormats.LP] allow newline in term and improve keyword identification
1 parent b845d44 commit 3d89c03

File tree

2 files changed

+149
-33
lines changed

2 files changed

+149
-33
lines changed

src/FileFormats/LP/read.jl

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -280,13 +280,15 @@ A struct that is used to manage state when lexing. It stores:
280280
error message to the user on a parse error
281281
* `peek_char`: the next `Char` in the `io`
282282
* `peek_tokens`: the list of upcoming tokens that we have already peeked
283+
* `current_token`: the most recent token that we have `read`
283284
"""
284285
mutable struct _LexerState{O<:IO}
285286
io::O
286287
line::Int
287288
peek_char::Union{Nothing,Char}
288289
peek_tokens::Vector{_Token}
289-
_LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[])
290+
current_token::Union{Nothing,_Token}
291+
_LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[], nothing)
290292
end
291293

292294
"""
@@ -351,6 +353,7 @@ function Base.read(state::_LexerState, ::Type{_Token})
351353
)
352354
end
353355
popfirst!(state.peek_tokens)
356+
state.current_token = token
354357
return token
355358
end
356359

@@ -371,6 +374,16 @@ end
371374

372375
_is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
373376

377+
_nothing_or_newline(::Nothing) = true
378+
_nothing_or_newline(t::_Token) = t.kind == _TOKEN_NEWLINE
379+
380+
function _prior_token(state::_LexerState)
381+
if length(state.peek_tokens) <= 1
382+
return state.current_token
383+
end
384+
return state.peek_tokens[end-1]
385+
end
386+
374387
function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
375388
@assert n >= 1
376389
while length(state.peek_tokens) < n
@@ -379,22 +392,47 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
379392
return nothing
380393
end
381394
push!(state.peek_tokens, token)
382-
if _compare_case_insenstive(token, "subject")
395+
if token.kind != _TOKEN_IDENTIFIER
396+
continue
397+
end
398+
# Here we have a _TOKEN_IDENTIFIER. If it is preceeded by a
399+
# _TOKEN_NEWLINE, it may be a _TOKEN_KEYWORD.
400+
if !_nothing_or_newline(_prior_token(state))
401+
continue # It can't be a keyword
402+
end
403+
# It might be a _TOKEN_KEYWORD.
404+
kw = _case_insenstive_identifier_to_keyword(token.value)
405+
if kw !== nothing
406+
# The token matches a single word keyword. All keywords are followed
407+
# by a new line, or an EOF.
383408
t = _peek_inner(state)
384-
if _compare_case_insenstive(t, "to")
385-
state.peek_tokens[end] =
386-
_Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
387-
else
388-
push!(state.peek_tokens, t)
409+
if _nothing_or_newline(t)
410+
state.peek_tokens[end] = _Token(_TOKEN_KEYWORD, kw, token.pos)
389411
end
390-
elseif _compare_case_insenstive(token, "such")
391-
t = _peek_inner(state)
392-
if _compare_case_insenstive(t, "that")
393-
state.peek_tokens[end] =
394-
_Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
395-
else
412+
if t !== nothing
396413
push!(state.peek_tokens, t)
397414
end
415+
continue
416+
end
417+
for (a, b) in ["subject" => "to", "such" => "that"]
418+
if _compare_case_insenstive(token, a)
419+
# This _might_ be `subject to`, or it might just be a variable
420+
# named `subject`, like `obj:\n subject\n`.
421+
t = _peek_inner(state)
422+
if t !== nothing
423+
t2 = _peek_inner(state)
424+
if _compare_case_insenstive(t, b) && _nothing_or_newline(t2)
425+
state.peek_tokens[end] =
426+
_Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
427+
else
428+
push!(state.peek_tokens, t)
429+
end
430+
if t2 !== nothing
431+
push!(state.peek_tokens, t2)
432+
end
433+
end
434+
continue
435+
end
398436
end
399437
end
400438
return state.peek_tokens[n]
@@ -426,11 +464,7 @@ function _peek_inner(state::_LexerState)
426464
write(buf, c)
427465
_ = read(state, Char)
428466
end
429-
val = String(take!(buf))
430-
if (kw = _case_insenstive_identifier_to_keyword(val)) !== nothing
431-
return _Token(_TOKEN_KEYWORD, kw, pos)
432-
end
433-
return _Token(_TOKEN_IDENTIFIER, val, pos)
467+
return _Token(_TOKEN_IDENTIFIER, String(take!(buf)), pos)
434468
elseif (op = get(_OPERATORS, c, nothing)) !== nothing
435469
_ = read(state, Char) # Skip c
436470
if c == '-' && peek(state, Char) == '>'
@@ -473,6 +507,19 @@ function _skip_newlines(state::_LexerState)
473507
return
474508
end
475509

510+
function _next_non_newline(state::_LexerState)
511+
n = 1
512+
while true
513+
t = peek(state, _Token, n)
514+
if t === nothing
515+
return nothing
516+
elseif t.kind != _TOKEN_NEWLINE
517+
return t
518+
end
519+
n += 1
520+
end
521+
end
522+
476523
# IDENTIFIER := "string"
477524
#
478525
# There _are_ rules to what an identifier can be. We handle these when lexing.
@@ -605,14 +652,10 @@ function _parse_quad_expression(
605652
)
606653
end
607654
end
608-
while _next_token_is(state, _TOKEN_NEWLINE)
609-
if _next_token_is(state, _TOKEN_KEYWORD, 2)
610-
break
611-
end
612-
_ = read(state, _Token, _TOKEN_NEWLINE)
613-
end
614-
if _next_token_is(state, _TOKEN_DIVISION)
615-
_ = read(state, _Token) # /
655+
t = _next_non_newline(state)
656+
if t.kind == _TOKEN_DIVISION
657+
_skip_newlines(state)
658+
_ = read(state, _Token, _TOKEN_DIVISION) # /
616659
# Must be /2
617660
n = read(state, _Token, _TOKEN_NUMBER)
618661
if n.value != "2"
@@ -634,10 +677,11 @@ function _parse_quad_expression(
634677
end
635678

636679
# TERM :=
637-
# "+" TERM
680+
# [\n*] TERM
681+
# | "+" TERM
638682
# | "-" TERM
639-
# | NUMBER
640683
# | IDENTIFIER
684+
# | NUMBER
641685
# | NUMBER IDENTIFIER
642686
# | NUMBER "*" IDENTIFIER
643687
# | QUADRATIC_EXPRESSION
@@ -670,12 +714,25 @@ function _parse_term(
670714
_ = read(state, _Token, _TOKEN_MULTIPLICATION)
671715
x = _parse_variable(state, cache)
672716
return MOI.ScalarAffineTerm(coef, x)
673-
elseif _next_token_is(state, _TOKEN_NEWLINE) ||
674-
_next_token_is(state, _TOKEN_ADDITION) ||
675-
_next_token_is(state, _TOKEN_SUBTRACTION)
676-
# NUMBER
677-
return coef
717+
elseif _next_token_is(state, _TOKEN_NEWLINE)
718+
# This could either be NUMBER \nEND-OF-TERM, or it could be a term
719+
# split by a new line, like `2\nx`.
720+
t = _next_non_newline(state)
721+
if t.kind == _TOKEN_MULTIPLICATION
722+
# NUMBER \n * [\n] IDENTIFIER
723+
_skip_newlines(state)
724+
_ = read(state, _Token, _TOKEN_MULTIPLICATION)
725+
_skip_newlines(state)
726+
x = _parse_variable(state, cache)
727+
return MOI.ScalarAffineTerm(coef, x)
728+
elseif t.kind == _TOKEN_IDENTIFIER
729+
# NUMBER \n IDENTIFIER
730+
x = _parse_variable(state, cache)
731+
return MOI.ScalarAffineTerm(coef, x)
732+
end
678733
end
734+
# NUMBER
735+
return coef
679736
elseif _next_token_is(state, _TOKEN_OPEN_BRACKET)
680737
# QUADRATIC_EXPRESSION
681738
return _parse_quad_expression(state, cache, prefix)

test/FileFormats/LP/LP.jl

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,65 @@ function test_new_line_edge_case_fails()
15541554
return
15551555
end
15561556

1557+
function test_parse_keyword_edge_cases_identifier_is_keyword()
1558+
for name in ["max", "min", "st", "such", "bounds", "obj", "free"]
1559+
io = IOBuffer("""
1560+
maximize
1561+
obj: $name
1562+
subject to
1563+
$name <= 1
1564+
bounds
1565+
$name free
1566+
end
1567+
""")
1568+
seekstart(io)
1569+
model = LP.Model()
1570+
MOI.read!(io, model)
1571+
x = only(MOI.get(model, MOI.ListOfVariableIndices()))
1572+
@test MOI.get(model, MOI.VariableName(), x) == name
1573+
end
1574+
return
1575+
end
1576+
1577+
function test_parse_keyword_subject_to_errors()
1578+
for line in ["subject", "subject too", "subject to a:"]
1579+
io = IOBuffer("""
1580+
maximize
1581+
obj: x
1582+
$line
1583+
x <= 1
1584+
bounds
1585+
x free
1586+
end
1587+
""")
1588+
seekstart(io)
1589+
model = LP.Model()
1590+
@test_throws LP.ParseError MOI.read!(io, model)
1591+
end
1592+
return
1593+
end
1594+
1595+
function test_parse_keyword_subject_to_errors()
1596+
for obj in ["2 x", "\n2 x", "2\nx", "2*\nx", "2\n*x", "2\n\n*\n\n\nx\n"]
1597+
io = IOBuffer("""
1598+
maximize
1599+
obj: $obj
1600+
subject to
1601+
bounds
1602+
x free
1603+
end
1604+
""")
1605+
seekstart(io)
1606+
model = LP.Model()
1607+
MOI.read!(io, model)
1608+
x = MOI.get(model, MOI.VariableIndex, "x")
1609+
f = 2.0 * x
1610+
g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}())
1611+
@test isapprox(f, g)
1612+
end
1613+
return
1614+
end
1615+
15571616
end # module
15581617

15591618
TestLP.runtests()

0 commit comments

Comments
 (0)