|
70 | 70 | function Raw(data::Vector{UInt8})#, ctx::Vector{Bool}=Bool[false]) |
71 | 71 | needle = Vector{UInt8}("xml:space") |
72 | 72 | has_xml_space = findfirst(needle, data) !== nothing |
73 | | - data=normalize_newlines(data) |
74 | 73 | return Raw(RawDocument, 0, 0, 0, data, [false], has_xml_space) |
75 | 74 | end |
76 | 75 | function Raw(data::Vector{UInt8}, has_xml_space::Bool, ctx::Vector{Bool}=Bool[false]) |
@@ -101,40 +100,6 @@ Base.read(io::IO, ::Type{Raw}) = Raw(read(io)) |
101 | 100 |
|
102 | 101 | Base.parse(x::AbstractString, ::Type{Raw}) = Raw(Vector{UInt8}(x)) |
103 | 102 |
|
104 | | - |
105 | | -""" |
106 | | - normalize_newlines(bytes::Vector{UInt8}) -> Vector{UInt8} |
107 | | -
|
108 | | -Implements XML 1.1 §2.11 line-end normalization: |
109 | | -- CR (0x0D) alone → LF (0x0A) |
110 | | -- CR LF pair → LF |
111 | | -- NEL (U+0085) → LF |
112 | | -- LS (U+2028) → LF |
113 | | -""" |
114 | | -function normalize_newlines(bytes::Vector{UInt8}) |
115 | | - n = length(bytes) |
116 | | - out = Vector{UInt8}(undef, n) |
117 | | - outlen = 0 |
118 | | - i = 1 |
119 | | - while i <= n |
120 | | - @inbounds b = bytes[i] |
121 | | - if b == 0x0D |
122 | | - outlen += 1; out[outlen] = 0x0A |
123 | | - i += (i < n && (bytes[i+1] == 0x0A || bytes[i+1] == 0x85)) ? 2 : 1 |
124 | | - elseif b == 0xC2 && i < n && bytes[i+1] == 0x85 |
125 | | - outlen += 1; out[outlen] = 0x0A |
126 | | - i += 2 |
127 | | - elseif b == 0xE2 && i+2 <= n && bytes[i+1] == 0x80 && bytes[i+2] == 0xA8 |
128 | | - outlen += 1; out[outlen] = 0x0A |
129 | | - i += 3 |
130 | | - else |
131 | | - outlen += 1; out[outlen] = b |
132 | | - i += 1 |
133 | | - end |
134 | | - end |
135 | | - return resize!(out, outlen) |
136 | | -end |
137 | | - |
138 | 103 | # Mostly for debugging |
139 | 104 | Base.peek(o::Raw, n::Int) = String(view(o.data[o.pos+o.len+1:min(end, o.pos + o.len + n + 1)])) |
140 | 105 |
|
|
0 commit comments