Skip to content

Commit b947074

Browse files
committed
cleanup
1 parent abf802b commit b947074

File tree

1 file changed

+38
-23
lines changed

1 file changed

+38
-23
lines changed

src/XML.jl

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ unescape(x::AbstractString) = replace(x, reverse.(escape_chars)...)
4545
@enum(RawDataType, RAW_DOCUMENT, RAW_TEXT, RAW_COMMENT, RAW_CDATA, RAW_PROCESSING_INSTRUCTION,
4646
RAW_DECLARATION, RAW_DTD, RAW_ELEMENT_OPEN, RAW_ELEMENT_CLOSE, RAW_ELEMENT_SELF_CLOSED)
4747

48-
nodetype(x::RawDataType) =
48+
@inline nodetype(x::RawDataType) =
4949
x === RAW_ELEMENT_OPEN ? ELEMENT :
5050
x === RAW_ELEMENT_CLOSE ? ELEMENT :
5151
x === RAW_ELEMENT_SELF_CLOSED ? ELEMENT :
@@ -126,38 +126,53 @@ is_node(o::RawData) = o.type !== RAW_ELEMENT_CLOSE
126126
nodes(o::RawData) = Iterators.Filter(is_node, o)
127127

128128
#-----------------------------------------------------------------------------# get_name
129+
# # find the start/stop of a name given a starting position `i`
130+
# _name_start(data, i) = findnext(x -> isletter(Char(x)) || Char(x) === '_', data, i)
131+
# is_name_char(x) = (c = Char(x); isletter(c) || isdigit(c) || c ∈ "._-:")
132+
# function _name_stop(data, i)
133+
# i = findnext(!is_name_char, data, i)
134+
# isnothing(i) ? length(data) : i
135+
# end
136+
137+
# # starting at position i, return name and position after name
138+
# function get_name(data, i)
139+
# i = _name_start(data, i)
140+
# j = _name_stop(data, i)
141+
# @views name = String(data[i:j-1])
142+
# return name, j
143+
# end
144+
145+
is_name_start_char(x::UInt8) = x in UInt8('A'):UInt8('Z') || x in UInt8('a'):UInt8('z') || x == UInt8('_')
146+
147+
# Character is letter, underscore, digit, hyphen, or period
148+
is_name_char(x::UInt8) = is_name_start_char(x) || x in UInt8('0'):UInt8('9') || x == UInt8('-') || x == UInt8('.')
149+
129150
# find the start/stop of a name given a starting position `i`
130-
_name_start(data, i) = findnext(x -> isletter(Char(x)) || Char(x) === '_', data, i)
131-
is_name_char(x) = (c = Char(x); isletter(c) || isdigit(c) || c "._-:")
132-
function _name_stop(data, i)
133-
i = findnext(!is_name_char, data, i)
134-
isnothing(i) ? length(data) : i
135-
end
151+
name_start(data, i) = findnext(is_name_start_char, data, i)
152+
name_stop(data, i) = findnext(!is_name_char, data, i) - 1
136153

137-
# starting at position i, return name and position after name
138154
function get_name(data, i)
139-
i = _name_start(data, i)
140-
j = _name_stop(data, i)
141-
@views name = String(data[i:j-1])
142-
return name, j
155+
i = name_start(data, i)
156+
j = name_stop(data, i)
157+
@views String(data[i:j]), j + 1
143158
end
144159

145160
#-----------------------------------------------------------------------------# get_attributes
146161
# starting at position i, return attributes up until the next '>' or '?' (DTD)
147162
function get_attributes(data, i)
148163
j = findnext(x -> x == UInt8('>') || x == UInt8('?'), data, i)
149-
i = _name_start(data, i)
164+
i = name_start(data, i)
150165
i > j && return nothing
151166
out = OrderedDict{String, String}()
152167
while !isnothing(i) && i < j
153168
key, i = get_name(data, i)
154169
# get quotechar the value is wrapped in (either ' or ")
155-
i = findnext(x -> Char(x) === '"' || Char(x) === ''', data, i)
170+
i = findnext(x -> Char(x) === '"' || Char(x) === ''', data, i + 1)
156171
quotechar = data[i]
157172
i2 = findnext(==(quotechar), data, i + 1)
158173
@views value = String(data[i+1:i2-1])
159174
out[key] = value
160-
i = _name_start(data, i2)
175+
i = name_start(data, i2)
161176
end
162177
return out
163178
end
@@ -189,9 +204,9 @@ Return the attributes of `ELEMENT`, `DECLARATION`, or `PROCESSING_INSTRUCTION` n
189204
function attributes(o::RawData)
190205
if o.type === RAW_ELEMENT_OPEN || o.type === RAW_ELEMENT_SELF_CLOSED || o.type === RAW_PROCESSING_INSTRUCTION
191206
i = o.pos
192-
i = _name_start(o.data, i)
193-
i = _name_stop(o.data, i)
194-
get_attributes(o.data, i)
207+
i = name_start(o.data, i)
208+
i = name_stop(o.data, i)
209+
get_attributes(o.data, i + 1)
195210
elseif o.type === RAW_DECLARATION
196211
get_attributes(o.data, o.pos + 6)
197212
else
@@ -257,7 +272,7 @@ end
257272
depth(o::RawData) = o.depth
258273

259274
#-----------------------------------------------------------------------------# next RawData
260-
notspace(x::UInt8) = !isspace(Char(x))
275+
isspace(x::UInt8) = Base.isspace(Char(x))
261276

262277
"""
263278
next(node) --> typeof(node) or Nothing
@@ -268,7 +283,7 @@ would visit nodes by reading top-down through an XML file. Not defined for `XML
268283
function next(o::RawData)
269284
i = o.pos + o.len + 1
270285
(; depth, data, type) = o
271-
i = findnext(notspace, data, i) # skip insignificant whitespace
286+
i = findnext(!isspace, data, i) # skip insignificant whitespace
272287
isnothing(i) && return nothing
273288
if type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT
274289
depth += 1
@@ -278,7 +293,7 @@ function next(o::RawData)
278293
if c !== '<'
279294
type = RAW_TEXT
280295
j = findnext(==(UInt8('<')), data, i) - 1
281-
j = findprev(notspace, data, j) # "rstrip"
296+
j = findprev(!isspace, data, j) # "rstrip"
282297
elseif c === '<'
283298
c2 = Char(o.data[i + 1])
284299
if c2 === '!'
@@ -326,15 +341,15 @@ function prev(o::RawData)
326341
(; depth, data, type) = o
327342
type === RAW_DOCUMENT && return nothing
328343
j = o.pos - 1
329-
j = findprev(notspace, data, j) # skip insignificant whitespace
344+
j = findprev(!isspace, data, j) # skip insignificant whitespace
330345
isnothing(j) && return RawData(data) # RAW_DOCUMENT
331346
c = Char(o.data[j])
332347
i = j - 1
333348
next_type = type
334349
if c !== '>' # text
335350
type = RAW_TEXT
336351
i = findprev(==(UInt8('>')), data, j) + 1
337-
i = findnext(notspace, data, i) # "lstrip"
352+
i = findnext(!isspace, data, i) # "lstrip"
338353
elseif c === '>'
339354
c2 = Char(o.data[j - 1])
340355
if c2 === '-'

0 commit comments

Comments
 (0)