@@ -45,7 +45,7 @@ unescape(x::AbstractString) = replace(x, reverse.(escape_chars)...)
4545@enum (RawDataType, RAW_DOCUMENT, RAW_TEXT, RAW_COMMENT, RAW_CDATA, RAW_PROCESSING_INSTRUCTION,
4646 RAW_DECLARATION, RAW_DTD, RAW_ELEMENT_OPEN, RAW_ELEMENT_CLOSE, RAW_ELEMENT_SELF_CLOSED)
4747
48- nodetype (x:: RawDataType ) =
48+ @inline nodetype (x:: RawDataType ) =
4949 x === RAW_ELEMENT_OPEN ? ELEMENT :
5050 x === RAW_ELEMENT_CLOSE ? ELEMENT :
5151 x === RAW_ELEMENT_SELF_CLOSED ? ELEMENT :
@@ -126,38 +126,53 @@ is_node(o::RawData) = o.type !== RAW_ELEMENT_CLOSE
126126nodes (o:: RawData ) = Iterators. Filter (is_node, o)
127127
128128# -----------------------------------------------------------------------------# get_name
129+ # # find the start/stop of a name given a starting position `i`
130+ # _name_start(data, i) = findnext(x -> isletter(Char(x)) || Char(x) === '_', data, i)
131+ # is_name_char(x) = (c = Char(x); isletter(c) || isdigit(c) || c ∈ "._-:")
132+ # function _name_stop(data, i)
133+ # i = findnext(!is_name_char, data, i)
134+ # isnothing(i) ? length(data) : i
135+ # end
136+
137+ # # starting at position i, return name and position after name
138+ # function get_name(data, i)
139+ # i = _name_start(data, i)
140+ # j = _name_stop(data, i)
141+ # @views name = String(data[i:j-1])
142+ # return name, j
143+ # end
144+
145+ is_name_start_char (x:: UInt8 ) = x in UInt8 (' A' ): UInt8 (' Z' ) || x in UInt8 (' a' ): UInt8 (' z' ) || x == UInt8 (' _' )
146+
147+ # Character is letter, underscore, digit, hyphen, or period
148+ is_name_char (x:: UInt8 ) = is_name_start_char (x) || x in UInt8 (' 0' ): UInt8 (' 9' ) || x == UInt8 (' -' ) || x == UInt8 (' .' )
149+
129150# find the start/stop of a name given a starting position `i`
130- _name_start (data, i) = findnext (x -> isletter (Char (x)) || Char (x) === ' _' , data, i)
131- is_name_char (x) = (c = Char (x); isletter (c) || isdigit (c) || c ∈ " ._-:" )
132- function _name_stop (data, i)
133- i = findnext (! is_name_char, data, i)
134- isnothing (i) ? length (data) : i
135- end
151+ name_start (data, i) = findnext (is_name_start_char, data, i)
152+ name_stop (data, i) = findnext (! is_name_char, data, i) - 1
136153
137- # starting at position i, return name and position after name
138154function get_name (data, i)
139- i = _name_start (data, i)
140- j = _name_stop (data, i)
141- @views name = String (data[i: j- 1 ])
142- return name, j
155+ i = name_start (data, i)
156+ j = name_stop (data, i)
157+ @views String (data[i: j]), j + 1
143158end
144159
145160# -----------------------------------------------------------------------------# get_attributes
146161# starting at position i, return attributes up until the next '>' or '?' (DTD)
147162function get_attributes (data, i)
148163 j = findnext (x -> x == UInt8 (' >' ) || x == UInt8 (' ?' ), data, i)
149- i = _name_start (data, i)
164+ i = name_start (data, i)
150165 i > j && return nothing
151166 out = OrderedDict {String, String} ()
152167 while ! isnothing (i) && i < j
153168 key, i = get_name (data, i)
154169 # get quotechar the value is wrapped in (either ' or ")
155- i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i)
170+ i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i + 1 )
156171 quotechar = data[i]
157172 i2 = findnext (== (quotechar), data, i + 1 )
158173 @views value = String (data[i+ 1 : i2- 1 ])
159174 out[key] = value
160- i = _name_start (data, i2)
175+ i = name_start (data, i2)
161176 end
162177 return out
163178end
@@ -189,9 +204,9 @@ Return the attributes of `ELEMENT`, `DECLARATION`, or `PROCESSING_INSTRUCTION` n
189204function attributes (o:: RawData )
190205 if o. type === RAW_ELEMENT_OPEN || o. type === RAW_ELEMENT_SELF_CLOSED || o. type === RAW_PROCESSING_INSTRUCTION
191206 i = o. pos
192- i = _name_start (o. data, i)
193- i = _name_stop (o. data, i)
194- get_attributes (o. data, i)
207+ i = name_start (o. data, i)
208+ i = name_stop (o. data, i)
209+ get_attributes (o. data, i + 1 )
195210 elseif o. type === RAW_DECLARATION
196211 get_attributes (o. data, o. pos + 6 )
197212 else
257272depth (o:: RawData ) = o. depth
258273
259274# -----------------------------------------------------------------------------# next RawData
260- notspace (x:: UInt8 ) = ! isspace (Char (x))
275+ isspace (x:: UInt8 ) = Base . isspace (Char (x))
261276
262277"""
263278 next(node) --> typeof(node) or Nothing
@@ -268,7 +283,7 @@ would visit nodes by reading top-down through an XML file. Not defined for `XML
268283function next (o:: RawData )
269284 i = o. pos + o. len + 1
270285 (; depth, data, type) = o
271- i = findnext (notspace , data, i) # skip insignificant whitespace
286+ i = findnext (! isspace , data, i) # skip insignificant whitespace
272287 isnothing (i) && return nothing
273288 if type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT
274289 depth += 1
@@ -278,7 +293,7 @@ function next(o::RawData)
278293 if c != = ' <'
279294 type = RAW_TEXT
280295 j = findnext (== (UInt8 (' <' )), data, i) - 1
281- j = findprev (notspace , data, j) # "rstrip"
296+ j = findprev (! isspace , data, j) # "rstrip"
282297 elseif c === ' <'
283298 c2 = Char (o. data[i + 1 ])
284299 if c2 === ' !'
@@ -326,15 +341,15 @@ function prev(o::RawData)
326341 (; depth, data, type) = o
327342 type === RAW_DOCUMENT && return nothing
328343 j = o. pos - 1
329- j = findprev (notspace , data, j) # skip insignificant whitespace
344+ j = findprev (! isspace , data, j) # skip insignificant whitespace
330345 isnothing (j) && return RawData (data) # RAW_DOCUMENT
331346 c = Char (o. data[j])
332347 i = j - 1
333348 next_type = type
334349 if c != = ' >' # text
335350 type = RAW_TEXT
336351 i = findprev (== (UInt8 (' >' )), data, j) + 1
337- i = findnext (notspace , data, i) # "lstrip"
352+ i = findnext (! isspace , data, i) # "lstrip"
338353 elseif c === ' >'
339354 c2 = Char (o. data[j - 1 ])
340355 if c2 === ' -'
0 commit comments