Skip to content

Commit 4d9f987

Browse files
committed
[FileFormats.LP] remove lowercase usage to reduce allocations
1 parent 79ff5d0 commit 4d9f987

File tree

1 file changed

+63
-57
lines changed

1 file changed

+63
-57
lines changed

src/FileFormats/LP/read.jl

Lines changed: 63 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -106,52 +106,62 @@ function Base.read!(io::IO, model::Model{T}) where {T}
106106
return
107107
end
108108

109+
110+
# We want an efficient way to check if `test.value` is a case-insensitive
111+
# version of `target`. This is run for every identifier, so it needs to be fast.
112+
function _compare_case_insenstive(test::String, target::String)
113+
if length(test) != length(target)
114+
return false
115+
end
116+
return all(lowercase(a) == b for (a, b) in zip(test, target))
117+
end
118+
119+
function _compare_case_insenstive(input::String, c::Char, args)
120+
if lowercase(first(input)) != c
121+
return false
122+
end
123+
return any(_compare_case_insenstive(input, arg) for arg in args)
124+
end
125+
126+
const _MAXIMIZE_KEYWORDS = ("max", "maximize", "maximise", "maximum")
127+
const _MINIMIZE_KEYWORDS = ("min", "minimize", "minimise", "minimum")
128+
109129
"""
110-
const _KEYWORDS::Dict{String,Symbol}
130+
_case_insenstive_identifier_to_keyword(input::String)
111131
112-
The LP file format is very permissive in what it allows users to call the
113-
various sections. Here is a dictionary that maps possible user words
114-
(normalized to lowercase, even though users can use mixed case) to the section.
132+
We need to check if identifiers are case insensitive keywords.
115133
116-
If you find new spellings for the section names, add them here.
134+
An obvious way to do this is something like `dict[lowercase(identifier)]`, but
135+
this involves a moderately expensive `lowercase` operation and a dict lookup for
136+
every identifier.
117137
118-
Special handling is needed in the lexer for the keywords that contain spaces.
138+
This function tries to be a little cleverer and doesn't allocate.
119139
"""
120-
const _KEYWORDS = Dict(
121-
# MAXIMIZE
122-
"max" => :MAXIMIZE,
123-
"maximize" => :MAXIMIZE,
124-
"maximise" => :MAXIMIZE,
125-
"maximum" => :MAXIMIZE,
126-
# MINIMIZE
127-
"min" => :MINIMIZE,
128-
"minimize" => :MINIMIZE,
129-
"minimise" => :MINIMIZE,
130-
"minimum" => :MINIMIZE,
131-
# CONSTRAINTS
132-
"subject to" => :CONSTRAINTS,
133-
"such that" => :CONSTRAINTS,
134-
"st" => :CONSTRAINTS,
135-
"s.t." => :CONSTRAINTS,
136-
"st." => :CONSTRAINTS,
137-
# BOUNDS
138-
"bounds" => :BOUNDS,
139-
"bound" => :BOUNDS,
140-
# INTEGER
141-
"gen" => :INTEGER,
142-
"general" => :INTEGER,
143-
"generals" => :INTEGER,
144-
"integer" => :INTEGER,
145-
"integers" => :INTEGER,
146-
# BINARY
147-
"bin" => :BINARY,
148-
"binary" => :BINARY,
149-
"binaries" => :BINARY,
150-
# SOS
151-
"sos" => :SOS,
152-
# END
153-
"end" => :END,
154-
)
140+
function _case_insenstive_identifier_to_keyword(input::String)
141+
if !(3 <= length(input) <= 8)
142+
return nothing # identifiers outside these lengths are not recognized
143+
elseif _compare_case_insenstive(input, 'm', _MAXIMIZE)
144+
return "MAXIMIZE"
145+
elseif _compare_case_insenstive(input, 'm', _MINIMIZE_KEYWORDS)
146+
return "MINIMIZE"
147+
elseif _compare_case_insenstive(input, 's', ("st", "s.t.", "st."))
148+
# `subject to` and `such that` handled in `peek`
149+
return "CONSTRAINTS"
150+
elseif _compare_case_insenstive(input, "sos")
151+
return "SOS"
152+
elseif _compare_case_insenstive(input, 'b', ("bound", "bounds"))
153+
return "BOUNDS"
154+
elseif _compare_case_insenstive(input, 'g', ("gen", "general", "generals"))
155+
return "INTEGER"
156+
elseif _compare_case_insenstive(input, 'i', ("integer", "integers"))
157+
return "INTEGER"
158+
elseif _compare_case_insenstive(input, 'b', ("bin", "binary", "binaries"))
159+
return "BINARY"
160+
elseif _compare_case_insenstive(input, "end")
161+
return "END"
162+
end
163+
return nothing
164+
end
155165

156166
"""
157167
_TokenKind
@@ -247,6 +257,13 @@ struct _Token
247257
pos::Int
248258
end
249259

260+
function _compare_case_insenstive(test::_Token, target::String)
261+
if test.kind != _TOKEN_IDENTIFIER
262+
return false
263+
end
264+
return _compare_case_insenstive(test.value, target)
265+
end
266+
250267
"""
251268
mutable struct _LexerState{O<:IO}
252269
io::O
@@ -353,15 +370,6 @@ end
353370

354371
_is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
355372

356-
# We want an efficient way to check if `test.value` is a case-insensitive
357-
# version of `target`. Thsi is run for every identifier, so it needs to be fast.
358-
function _compare_case_insenstive(test::_Token, target::String)
359-
if test.kind != _TOKEN_IDENTIFIER || length(test.value) != length(target)
360-
return false
361-
end
362-
return all(lowercase(a) == b for (a, b) in zip(test.value, target))
363-
end
364-
365373
function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
366374
@assert n >= 1
367375
while length(state.peek_tokens) < n
@@ -418,8 +426,8 @@ function _peek_inner(state::_LexerState)
418426
_ = read(state, Char)
419427
end
420428
val = String(take!(buf))
421-
if (kw = get(_KEYWORDS, lowercase(val), nothing)) !== nothing
422-
return _Token(_TOKEN_KEYWORD, string(kw), pos)
429+
if (kw = _case_insenstive_identifier_to_keyword(val)) !== nothing
430+
return _Token(_TOKEN_KEYWORD, kw, pos)
423431
end
424432
return _Token(_TOKEN_IDENTIFIER, val, pos)
425433
elseif (op = get(_OPERATORS, c, nothing)) !== nothing
@@ -507,12 +515,10 @@ function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T}
507515
elseif token.kind == _TOKEN_SUBTRACTION
508516
return -_parse_number(state, cache)
509517
elseif token.kind == _TOKEN_IDENTIFIER
510-
v = lowercase(token.value)
511-
if v == "inf" || v == "infinity"
518+
if _compare_case_insenstive(token, 'i', ("inf", "infinity"))
512519
return typemax(T)
513-
else
514-
_throw_parse_error(state, token, "We expected this to be a number.")
515520
end
521+
_throw_parse_error(state, token, "We expected this to be a number.")
516522
end
517523
_expect(state, token, _TOKEN_NUMBER)
518524
ret = tryparse(T, token.value)
@@ -740,7 +746,7 @@ end
740746
function _parse_set_suffix(state, cache)
741747
_skip_newlines(state)
742748
p = read(state, _Token)
743-
if p.kind == _TOKEN_IDENTIFIER && lowercase(p.value) == "free"
749+
if _compare_case_insenstive(p, "free")
744750
return nothing
745751
end
746752
_skip_newlines(state)

0 commit comments

Comments
 (0)