Skip to content

Commit b845d44

Browse files
authored
[FileFormats.LP] remove lowercase usage to reduce allocations (#2846)
1 parent 84d559f commit b845d44

File tree

1 file changed

+64
-57
lines changed

1 file changed

+64
-57
lines changed

src/FileFormats/LP/read.jl

Lines changed: 64 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -106,52 +106,63 @@ function Base.read!(io::IO, model::Model{T}) where {T}
106106
return
107107
end
108108

109+
# We want an efficient way to check if `test` is a case-insensitive version of
110+
# `target`. We won't want `lowercase(test) == target` because this involves
111+
# allocating a new string, and we check many identifiers to see if they are
112+
# keywords.
113+
function _compare_case_insenstive(test::String, target::String)
114+
if length(test) != length(target)
115+
return false
116+
end
117+
return all(lowercase(a) == b for (a, b) in zip(test, target))
118+
end
119+
120+
function _compare_case_insenstive(input::String, c::Char, args)
121+
if lowercase(first(input)) != c
122+
return false
123+
end
124+
return any(_compare_case_insenstive(input, arg) for arg in args)
125+
end
126+
127+
const _MAXIMIZE_KEYWORDS = ("max", "maximize", "maximise", "maximum")
128+
const _MINIMIZE_KEYWORDS = ("min", "minimize", "minimise", "minimum")
129+
109130
"""
110-
const _KEYWORDS::Dict{String,Symbol}
131+
_case_insenstive_identifier_to_keyword(input::String)
111132
112-
The LP file format is very permissive in what it allows users to call the
113-
various sections. Here is a dictionary that maps possible user words
114-
(normalized to lowercase, even though users can use mixed case) to the section.
133+
We need to check if identifiers are case insensitive keywords.
115134
116-
If you find new spellings for the section names, add them here.
135+
An obvious way to do this is something like `dict[lowercase(identifier)]`, but
136+
this involves a moderately expensive `lowercase` operation and a dict lookup for
137+
every identifier.
117138
118-
Special handling is needed in the lexer for the keywords that contain spaces.
139+
This function tries to be a little cleverer and doesn't allocate.
119140
"""
120-
const _KEYWORDS = Dict(
121-
# MAXIMIZE
122-
"max" => :MAXIMIZE,
123-
"maximize" => :MAXIMIZE,
124-
"maximise" => :MAXIMIZE,
125-
"maximum" => :MAXIMIZE,
126-
# MINIMIZE
127-
"min" => :MINIMIZE,
128-
"minimize" => :MINIMIZE,
129-
"minimise" => :MINIMIZE,
130-
"minimum" => :MINIMIZE,
131-
# CONSTRAINTS
132-
"subject to" => :CONSTRAINTS,
133-
"such that" => :CONSTRAINTS,
134-
"st" => :CONSTRAINTS,
135-
"s.t." => :CONSTRAINTS,
136-
"st." => :CONSTRAINTS,
137-
# BOUNDS
138-
"bounds" => :BOUNDS,
139-
"bound" => :BOUNDS,
140-
# INTEGER
141-
"gen" => :INTEGER,
142-
"general" => :INTEGER,
143-
"generals" => :INTEGER,
144-
"integer" => :INTEGER,
145-
"integers" => :INTEGER,
146-
# BINARY
147-
"bin" => :BINARY,
148-
"binary" => :BINARY,
149-
"binaries" => :BINARY,
150-
# SOS
151-
"sos" => :SOS,
152-
# END
153-
"end" => :END,
154-
)
141+
function _case_insenstive_identifier_to_keyword(input::String)
142+
if !(2 <= length(input) <= 8)
143+
return nothing # identifiers outside these lengths are not recognized
144+
elseif _compare_case_insenstive(input, 'm', _MAXIMIZE_KEYWORDS)
145+
return "MAXIMIZE"
146+
elseif _compare_case_insenstive(input, 'm', _MINIMIZE_KEYWORDS)
147+
return "MINIMIZE"
148+
elseif _compare_case_insenstive(input, 's', ("st", "s.t.", "st."))
149+
# `subject to` and `such that` handled in `peek`
150+
return "CONSTRAINTS"
151+
elseif _compare_case_insenstive(input, "sos")
152+
return "SOS"
153+
elseif _compare_case_insenstive(input, 'b', ("bound", "bounds"))
154+
return "BOUNDS"
155+
elseif _compare_case_insenstive(input, 'g', ("gen", "general", "generals"))
156+
return "INTEGER"
157+
elseif _compare_case_insenstive(input, 'i', ("integer", "integers"))
158+
return "INTEGER"
159+
elseif _compare_case_insenstive(input, 'b', ("bin", "binary", "binaries"))
160+
return "BINARY"
161+
elseif _compare_case_insenstive(input, "end")
162+
return "END"
163+
end
164+
return nothing
165+
end
155166

156167
"""
157168
_TokenKind
@@ -247,6 +258,13 @@ struct _Token
247258
pos::Int
248259
end
249260

261+
function _compare_case_insenstive(test::_Token, target::String)
262+
if test.kind != _TOKEN_IDENTIFIER
263+
return false
264+
end
265+
return _compare_case_insenstive(test.value, target)
266+
end
267+
250268
"""
251269
mutable struct _LexerState{O<:IO}
252270
io::O
@@ -353,15 +371,6 @@ end
353371

354372
_is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
355373

356-
# We want an efficient way to check if `test.value` is a case-insensitive
357-
# version of `target`. Thsi is run for every identifier, so it needs to be fast.
358-
function _compare_case_insenstive(test::_Token, target::String)
359-
if test.kind != _TOKEN_IDENTIFIER || length(test.value) != length(target)
360-
return false
361-
end
362-
return all(lowercase(a) == b for (a, b) in zip(test.value, target))
363-
end
364-
365374
function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
366375
@assert n >= 1
367376
while length(state.peek_tokens) < n
@@ -418,8 +427,8 @@ function _peek_inner(state::_LexerState)
418427
_ = read(state, Char)
419428
end
420429
val = String(take!(buf))
421-
if (kw = get(_KEYWORDS, lowercase(val), nothing)) !== nothing
422-
return _Token(_TOKEN_KEYWORD, string(kw), pos)
430+
if (kw = _case_insenstive_identifier_to_keyword(val)) !== nothing
431+
return _Token(_TOKEN_KEYWORD, kw, pos)
423432
end
424433
return _Token(_TOKEN_IDENTIFIER, val, pos)
425434
elseif (op = get(_OPERATORS, c, nothing)) !== nothing
@@ -507,12 +516,10 @@ function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T}
507516
elseif token.kind == _TOKEN_SUBTRACTION
508517
return -_parse_number(state, cache)
509518
elseif token.kind == _TOKEN_IDENTIFIER
510-
v = lowercase(token.value)
511-
if v == "inf" || v == "infinity"
519+
if _compare_case_insenstive(token.value, 'i', ("inf", "infinity"))
512520
return typemax(T)
513-
else
514-
_throw_parse_error(state, token, "We expected this to be a number.")
515521
end
522+
_throw_parse_error(state, token, "We expected this to be a number.")
516523
end
517524
_expect(state, token, _TOKEN_NUMBER)
518525
ret = tryparse(T, token.value)
@@ -740,7 +747,7 @@ end
740747
function _parse_set_suffix(state, cache)
741748
_skip_newlines(state)
742749
p = read(state, _Token)
743-
if p.kind == _TOKEN_IDENTIFIER && lowercase(p.value) == "free"
750+
if _compare_case_insenstive(p, "free")
744751
return nothing
745752
end
746753
_skip_newlines(state)

0 commit comments

Comments
 (0)