@@ -45,7 +45,7 @@ unescape(x::AbstractString) = replace(x, reverse.(escape_chars)...)
45
45
@enum (RawDataType, RAW_DOCUMENT, RAW_TEXT, RAW_COMMENT, RAW_CDATA, RAW_PROCESSING_INSTRUCTION,
46
46
RAW_DECLARATION, RAW_DTD, RAW_ELEMENT_OPEN, RAW_ELEMENT_CLOSE, RAW_ELEMENT_SELF_CLOSED)
47
47
48
- nodetype (x:: RawDataType ) =
48
+ @inline nodetype (x:: RawDataType ) =
49
49
x === RAW_ELEMENT_OPEN ? ELEMENT :
50
50
x === RAW_ELEMENT_CLOSE ? ELEMENT :
51
51
x === RAW_ELEMENT_SELF_CLOSED ? ELEMENT :
@@ -126,38 +126,53 @@ is_node(o::RawData) = o.type !== RAW_ELEMENT_CLOSE
126
126
nodes (o:: RawData ) = Iterators. Filter (is_node, o)
127
127
128
128
# -----------------------------------------------------------------------------# get_name
129
+ # # find the start/stop of a name given a starting position `i`
130
+ # _name_start(data, i) = findnext(x -> isletter(Char(x)) || Char(x) === '_', data, i)
131
+ # is_name_char(x) = (c = Char(x); isletter(c) || isdigit(c) || c ∈ "._-:")
132
+ # function _name_stop(data, i)
133
+ # i = findnext(!is_name_char, data, i)
134
+ # isnothing(i) ? length(data) : i
135
+ # end
136
+
137
+ # # starting at position i, return name and position after name
138
+ # function get_name(data, i)
139
+ # i = _name_start(data, i)
140
+ # j = _name_stop(data, i)
141
+ # @views name = String(data[i:j-1])
142
+ # return name, j
143
+ # end
144
+
145
+ is_name_start_char (x:: UInt8 ) = x in UInt8 (' A' ): UInt8 (' Z' ) || x in UInt8 (' a' ): UInt8 (' z' ) || x == UInt8 (' _' )
146
+
147
+ # Character is letter, underscore, digit, hyphen, or period
148
+ is_name_char (x:: UInt8 ) = is_name_start_char (x) || x in UInt8 (' 0' ): UInt8 (' 9' ) || x == UInt8 (' -' ) || x == UInt8 (' .' )
149
+
129
150
# find the start/stop of a name given a starting position `i`
130
- _name_start (data, i) = findnext (x -> isletter (Char (x)) || Char (x) === ' _' , data, i)
131
- is_name_char (x) = (c = Char (x); isletter (c) || isdigit (c) || c ∈ " ._-:" )
132
- function _name_stop (data, i)
133
- i = findnext (! is_name_char, data, i)
134
- isnothing (i) ? length (data) : i
135
- end
151
+ name_start (data, i) = findnext (is_name_start_char, data, i)
152
+ name_stop (data, i) = findnext (! is_name_char, data, i) - 1
136
153
137
- # starting at position i, return name and position after name
138
154
function get_name (data, i)
139
- i = _name_start (data, i)
140
- j = _name_stop (data, i)
141
- @views name = String (data[i: j- 1 ])
142
- return name, j
155
+ i = name_start (data, i)
156
+ j = name_stop (data, i)
157
+ @views String (data[i: j]), j + 1
143
158
end
144
159
145
160
# -----------------------------------------------------------------------------# get_attributes
146
161
# starting at position i, return attributes up until the next '>' or '?' (DTD)
147
162
function get_attributes (data, i)
148
163
j = findnext (x -> x == UInt8 (' >' ) || x == UInt8 (' ?' ), data, i)
149
- i = _name_start (data, i)
164
+ i = name_start (data, i)
150
165
i > j && return nothing
151
166
out = OrderedDict {String, String} ()
152
167
while ! isnothing (i) && i < j
153
168
key, i = get_name (data, i)
154
169
# get quotechar the value is wrapped in (either ' or ")
155
- i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i)
170
+ i = findnext (x -> Char (x) === ' "' || Char (x) === ' '' , data, i + 1 )
156
171
quotechar = data[i]
157
172
i2 = findnext (== (quotechar), data, i + 1 )
158
173
@views value = String (data[i+ 1 : i2- 1 ])
159
174
out[key] = value
160
- i = _name_start (data, i2)
175
+ i = name_start (data, i2)
161
176
end
162
177
return out
163
178
end
@@ -189,9 +204,9 @@ Return the attributes of `ELEMENT`, `DECLARATION`, or `PROCESSING_INSTRUCTION` n
189
204
function attributes (o:: RawData )
190
205
if o. type === RAW_ELEMENT_OPEN || o. type === RAW_ELEMENT_SELF_CLOSED || o. type === RAW_PROCESSING_INSTRUCTION
191
206
i = o. pos
192
- i = _name_start (o. data, i)
193
- i = _name_stop (o. data, i)
194
- get_attributes (o. data, i)
207
+ i = name_start (o. data, i)
208
+ i = name_stop (o. data, i)
209
+ get_attributes (o. data, i + 1 )
195
210
elseif o. type === RAW_DECLARATION
196
211
get_attributes (o. data, o. pos + 6 )
197
212
else
257
272
depth (o:: RawData ) = o. depth
258
273
259
274
# -----------------------------------------------------------------------------# next RawData
260
- notspace (x:: UInt8 ) = ! isspace (Char (x))
275
+ isspace (x:: UInt8 ) = Base . isspace (Char (x))
261
276
262
277
"""
263
278
next(node) --> typeof(node) or Nothing
@@ -268,7 +283,7 @@ would visit nodes by reading top-down through an XML file. Not defined for `XML
268
283
function next (o:: RawData )
269
284
i = o. pos + o. len + 1
270
285
(; depth, data, type) = o
271
- i = findnext (notspace , data, i) # skip insignificant whitespace
286
+ i = findnext (! isspace , data, i) # skip insignificant whitespace
272
287
isnothing (i) && return nothing
273
288
if type === RAW_ELEMENT_OPEN || type === RAW_DOCUMENT
274
289
depth += 1
@@ -278,7 +293,7 @@ function next(o::RawData)
278
293
if c != = ' <'
279
294
type = RAW_TEXT
280
295
j = findnext (== (UInt8 (' <' )), data, i) - 1
281
- j = findprev (notspace , data, j) # "rstrip"
296
+ j = findprev (! isspace , data, j) # "rstrip"
282
297
elseif c === ' <'
283
298
c2 = Char (o. data[i + 1 ])
284
299
if c2 === ' !'
@@ -326,15 +341,15 @@ function prev(o::RawData)
326
341
(; depth, data, type) = o
327
342
type === RAW_DOCUMENT && return nothing
328
343
j = o. pos - 1
329
- j = findprev (notspace , data, j) # skip insignificant whitespace
344
+ j = findprev (! isspace , data, j) # skip insignificant whitespace
330
345
isnothing (j) && return RawData (data) # RAW_DOCUMENT
331
346
c = Char (o. data[j])
332
347
i = j - 1
333
348
next_type = type
334
349
if c != = ' >' # text
335
350
type = RAW_TEXT
336
351
i = findprev (== (UInt8 (' >' )), data, j) + 1
337
- i = findnext (notspace , data, i) # "lstrip"
352
+ i = findnext (! isspace , data, i) # "lstrip"
338
353
elseif c === ' >'
339
354
c2 = Char (o. data[j - 1 ])
340
355
if c2 === ' -'
0 commit comments