Skip to content

Commit 844f3d4

Browse files
committed
Add a better error handler
1 parent 2bf6592 commit 844f3d4

File tree

2 files changed

+173
-50
lines changed

2 files changed

+173
-50
lines changed

src/FileFormats/LP/read.jl

Lines changed: 163 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,23 @@ function Base.read!(io::IO, model::Model{T}) where {T}
6464
_parse_bound(state, cache)
6565
elseif keyword == :SOS
6666
_parse_constraint(state, cache)
67+
elseif keyword == :END
68+
_throw_unexpected_token(
69+
state,
70+
token,
71+
"No file contents are allowed after `end`.",
72+
)
6773
else
68-
throw(UnexpectedToken(token))
74+
_throw_unexpected_token(
75+
state,
76+
token,
77+
"Parsing this section is not supported by the current reader.",
78+
)
6979
end
7080
end
81+
# if keyword != :END
82+
# TODO(odow): decide if we should throw an error here.
83+
# end
7184
for x in cache.variable_with_default_bound
7285
MOI.add_constraint(model, x, MOI.GreaterThan(0.0))
7386
end
@@ -141,6 +154,27 @@ const _KEYWORDS = Dict(
141154
_TOKEN_NEWLINE,
142155
_TOKEN_UNKNOWN,
143156
)
157+
158+
const _KIND_TO_MSG = Dict{_TokenKind,String}(
159+
_TOKEN_KEYWORD => "a keyword",
160+
_TOKEN_IDENTIFIER => "a variable name",
161+
_TOKEN_NUMBER => "a number",
162+
_TOKEN_ADDITION => "the symbol `+`",
163+
_TOKEN_SUBTRACTION => "the symbol `-`",
164+
_TOKEN_MULTIPLICATION => "the symbol `*`",
165+
_TOKEN_DIVISION => "the symbol `/`",
166+
_TOKEN_EXPONENT => "the symbol `^`",
167+
_TOKEN_OPEN_BRACKET => "the symbol `[`",
168+
_TOKEN_CLOSE_BRACKET => "the symbol `]`",
169+
_TOKEN_GREATER_THAN => "the symbol `>=`",
170+
_TOKEN_LESS_THAN => "the symbol `<=`",
171+
_TOKEN_EQUAL_TO => "the symbol `==`",
172+
_TOKEN_COLON => "the symbol `:`",
173+
_TOKEN_IMPLIES => "the symbol `->`",
174+
_TOKEN_NEWLINE => "a new line",
175+
_TOKEN_UNKNOWN => "some unknown symbol",
176+
)
177+
144178
"""
145179
const _OPERATORS::Dict{Char,_TokenKind}
146180
@@ -175,28 +209,7 @@ unprocessed value.
175209
struct Token
176210
kind::_TokenKind
177211
value::Union{Nothing,String}
178-
end
179-
180-
"""
181-
struct UnexpectedToken <: Exception
182-
token::Token
183-
end
184-
185-
This error is thrown when we encounter an unexpected token when parsing the LP
186-
file. No other information is available.
187-
188-
TODO: we could improve this by storing line information or other context to help
189-
the user diagnose the problem.
190-
"""
191-
struct UnexpectedToken <: Exception
192-
token::Token
193-
end
194-
195-
function _expect(token::Token, kind::_TokenKind)
196-
if token.kind != kind
197-
throw(UnexpectedToken(token))
198-
end
199-
return token
212+
pos::Int
200213
end
201214

202215
"""
@@ -216,9 +229,53 @@ It stores:
216229
"""
217230
mutable struct LexerState
218231
io::IO
232+
line::Int
219233
peek_char::Union{Nothing,Char}
220234
peek_tokens::Vector{Token}
221-
LexerState(io::IO) = new(io, nothing, Token[])
235+
LexerState(io::IO) = new(io, 1, nothing, Token[])
236+
end
237+
238+
"""
239+
struct UnexpectedToken <: Exception
240+
token::Token
241+
end
242+
243+
This error is thrown when we encounter an unexpected token when parsing the LP
244+
file. No other information is available.
245+
"""
246+
struct UnexpectedToken <: Exception
247+
token::Token
248+
line::Int
249+
msg::String
250+
end
251+
252+
function _throw_unexpected_token(state::LexerState, token::Token, msg::String)
253+
offset = min(40, token.pos)
254+
seek(state.io, token.pos - offset)
255+
line = String(read(state.io, 2 * offset))
256+
i = something(findprev('\n', line, offset-1), 0)
257+
j = something(findnext('\n', line, offset), length(line) + 1)
258+
help = string(line[i+1:j-1], "\n", " "^(offset - i + - 1), "^\n", msg)
259+
return throw(UnexpectedToken(token, state.line, help))
260+
end
261+
262+
function Base.showerror(io::IO, err::UnexpectedToken)
263+
return print(
264+
io,
265+
"Error parsing LP file. Got an unexpected token on line $(err.line):\n",
266+
err.msg,
267+
)
268+
end
269+
270+
function _expect(state::LexerState, token::Token, kind::_TokenKind)
271+
if token.kind != kind
272+
_throw_unexpected_token(
273+
state,
274+
token,
275+
string("We expected this token to be ", _KIND_TO_MSG[kind]),
276+
)
277+
end
278+
return token
222279
end
223280

224281
function Base.peek(state::LexerState, ::Type{Char})
@@ -236,16 +293,20 @@ end
236293

237294
function Base.read(state::LexerState, ::Type{Token})
238295
token = peek(state, Token, 1)
239-
if isempty(state.peek_tokens)
240-
throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "EOF")))
296+
if isempty(state.peek_tokens)
297+
_throw_unexpected_token(
298+
state,
299+
Token(_TOKEN_UNKNOWN, "EOF", position(state.io)),
300+
"Unexpected end to the file. We weren't finished yet.",
301+
)
241302
end
242303
popfirst!(state.peek_tokens)
243304
return token
244305
end
245306

246307
function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind)
247308
token = read(state, Token)
248-
return _expect(token, kind)
309+
return _expect(state, token, kind)
249310
end
250311

251312
# We're a bit more relaxed than typical, allowing any letter or digit, not just
@@ -274,9 +335,11 @@ end
274335

275336
function _peek_inner(state::LexerState)
276337
while (c = peek(state, Char)) !== nothing
338+
pos = position(state.io)
277339
if c == '\n'
340+
state.line += 1
278341
_ = read(state, Char)
279-
return Token(_TOKEN_NEWLINE, nothing)
342+
return Token(_TOKEN_NEWLINE, nothing, pos)
280343
elseif isspace(c) # Whitespace
281344
_ = read(state, Char)
282345
elseif c == '\\' # Comment: backslash until newline
@@ -288,7 +351,7 @@ function _peek_inner(state::LexerState)
288351
write(buf, c)
289352
_ = read(state, Char)
290353
end
291-
return Token(_TOKEN_NUMBER, String(take!(buf)))
354+
return Token(_TOKEN_NUMBER, String(take!(buf)), pos)
292355
elseif _is_starting_identifier(c) # Identifier / keyword
293356
buf = IOBuffer()
294357
while (c = peek(state, Char)) !== nothing && _is_identifier(c)
@@ -301,33 +364,37 @@ function _peek_inner(state::LexerState)
301364
t = peek(state, Token)
302365
if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to"
303366
_ = read(state, Token) # Skip "to"
304-
return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
367+
return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
305368
end
306369
elseif l_val == "such"
307370
t = peek(state, Token)
308371
if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that"
309372
_ = read(state, Token) # Skip "such"
310-
return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
373+
return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
311374
end
312375
end
313376
if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing
314-
return Token(_TOKEN_KEYWORD, string(kw))
377+
return Token(_TOKEN_KEYWORD, string(kw), pos)
315378
end
316-
return Token(_TOKEN_IDENTIFIER, val)
379+
return Token(_TOKEN_IDENTIFIER, val, pos)
317380
elseif (op = get(_OPERATORS, c, nothing)) !== nothing
318381
_ = read(state, Char) # Skip c
319382
if c == '-' && peek(state, Char) == '>'
320383
_ = read(state, Char)
321-
return Token(_TOKEN_IMPLIES, nothing)
384+
return Token(_TOKEN_IMPLIES, nothing, pos)
322385
elseif c == '=' && peek(state, Char) in ('<', '>')
323386
c = read(state, Char) # Allow =< and => as <= and >=
324-
return Token(_OPERATORS[c], nothing)
387+
return Token(_OPERATORS[c], nothing, pos)
325388
elseif c in ('<', '>', '=') && peek(state, Char) == '='
326389
_ = read(state, Char) # Allow <=, >=, and ==
327390
end
328-
return Token(op, nothing)
391+
return Token(op, nothing, pos)
329392
else
330-
throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c")))
393+
_throw_unexpected_token(
394+
state,
395+
Token(_TOKEN_UNKNOWN, "$c", pos),
396+
"This character is not supported an LP file.",
397+
)
331398
end
332399
end
333400
return
@@ -391,13 +458,21 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T}
391458
if v == "inf" || v == "infinity"
392459
return typemax(T)
393460
else
394-
throw(UnexpectedToken(token))
461+
_throw_unexpected_token(
462+
state,
463+
token,
464+
"We expected this to be a number.",
465+
)
395466
end
396467
end
397-
_expect(token, _TOKEN_NUMBER)
468+
_expect(state, token, _TOKEN_NUMBER)
398469
ret = tryparse(T, token.value)
399470
if ret === nothing
400-
throw(UnexpectedToken(token))
471+
_throw_unexpected_token(
472+
state,
473+
token,
474+
"We expected this to be a number.",
475+
)
401476
end
402477
return ret
403478
end
@@ -435,7 +510,7 @@ function _parse_quad_term(
435510
_skip_newlines(state)
436511
n = read(state, Token, _TOKEN_NUMBER)
437512
if n.value != "2"
438-
throw(UnexpectedToken(n))
513+
_throw_unexpected_token(state, n, "Only `^ 2` is supported.")
439514
end
440515
return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1)
441516
end
@@ -471,7 +546,11 @@ function _parse_quad_expression(
471546
_ = read(state, Token)
472547
break
473548
else
474-
return throw(UnexpectedToken(p))
549+
_throw_unexpected_token(
550+
state,
551+
p,
552+
"We expected this to be a ] to end the quadratic expresssion.",
553+
)
475554
end
476555
end
477556
_skip_newlines(state)
@@ -480,7 +559,11 @@ function _parse_quad_expression(
480559
# Must be /2
481560
n = read(state, Token, _TOKEN_NUMBER)
482561
if n.value != "2"
483-
throw(UnexpectedToken(n))
562+
_throw_unexpected_token(
563+
state,
564+
n,
565+
"The only supported value here is `] / 2`.",
566+
)
484567
end
485568
for (i, term) in enumerate(f.quadratic_terms)
486569
f.quadratic_terms[i] = MOI.ScalarQuadraticTerm(
@@ -530,15 +613,22 @@ function _parse_term(
530613
_ = read(state, Token, _TOKEN_MULTIPLICATION)
531614
x = _parse_variable(state, cache)
532615
return MOI.ScalarAffineTerm(coef, x)
533-
else
616+
elseif _next_token_is(state, _TOKEN_NEWLINE) ||
617+
_next_token_is(state, _TOKEN_ADDITION) ||
618+
_next_token_is(state, _TOKEN_SUBTRACTION)
534619
# NUMBER
535620
return coef
536621
end
537622
elseif _next_token_is(state, _TOKEN_OPEN_BRACKET)
538623
# QUADRATIC_EXPRESSION
539624
return _parse_quad_expression(state, cache, prefix)
540625
end
541-
return throw(UnexpectedToken(peek(state, Token)))
626+
token = peek(state, Token)
627+
return _throw_unexpected_token(
628+
state,
629+
token,
630+
"Got $(_KIND_TO_MSG[token.kind]), But we expected this to be a new term in the expression.",
631+
)
542632
end
543633

544634
function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T}
@@ -611,7 +701,11 @@ function _parse_set_suffix(state, cache)
611701
rhs = _parse_number(state, cache)
612702
return MOI.EqualTo(rhs)
613703
else
614-
throw(UnexpectedToken(p))
704+
_throw_unexpected_token(
705+
state,
706+
p,
707+
"We expected this to be an inequality like `>=`, `<=` ,or `==`.",
708+
)
615709
end
616710
end
617711

@@ -633,7 +727,11 @@ function _parse_set_prefix(state, cache)
633727
elseif p.kind == _TOKEN_EQUAL_TO
634728
return MOI.EqualTo(lhs)
635729
else
636-
throw(UnexpectedToken(p))
730+
_throw_unexpected_token(
731+
state,
732+
p,
733+
"We expected this to be an inequality like `>=`, `<=` ,or `==`.",
734+
)
637735
end
638736
end
639737

@@ -731,12 +829,24 @@ end
731829
function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
732830
t = read(state, Token, _TOKEN_IDENTIFIER) # Si
733831
if !(t.value == "S1" || t.value == "S2")
734-
throw(UnexpectedToken(t))
832+
_throw_unexpected_token(
833+
state,
834+
t,
835+
"This must be either `S1` for SOS-I or `S2` for SOS-II.",
836+
)
735837
end
736838
_ = read(state, Token, _TOKEN_COLON)
737839
_ = read(state, Token, _TOKEN_COLON)
738840
f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[]
739841
while true
842+
if _next_token_is(state, _TOKEN_NEWLINE)
843+
t = peek(state, Token)
844+
_throw_unexpected_token(
845+
state,
846+
t,
847+
"SOS constraints cannot be spread across lines.",
848+
)
849+
end
740850
push!(f.variables, _parse_variable(state, cache))
741851
_ = read(state, Token, _TOKEN_COLON)
742852
push!(w, _parse_number(state, cache))
@@ -773,7 +883,11 @@ function _parse_indicator_constraint(
773883
elseif t.value == "1"
774884
MOI.ACTIVATE_ON_ONE
775885
else
776-
throw(UnexpectedToken(t))
886+
_throw_unexpected_token(
887+
state,
888+
t,
889+
"This must be either `= 0` or `= 1`.",
890+
)
777891
end
778892
_ = read(state, Token, _TOKEN_IMPLIES)
779893
f = _parse_expression(state, cache)

test/FileFormats/LP/LP.jl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1091,7 +1091,16 @@ function test_invalid_token_in_sos()
10911091
""",
10921092
)
10931093
seekstart(io)
1094-
@test_throws LP.UnexpectedToken read!(io, model)
1094+
contents = try
1095+
read!(io, model)
1096+
catch err
1097+
sprint(showerror, err)
1098+
end
1099+
@test contents == """
1100+
Error parsing LP file. Got an unexpected token on line 5:
1101+
c11: S1:: x 1.0 y 2.0
1102+
^
1103+
We expected this token to be the symbol `:`"""
10951104
return
10961105
end
10971106

0 commit comments

Comments
 (0)