@@ -125,7 +125,7 @@ function convert(::Type{UTF32String}, dat::AbstractVector{UInt32})
125
125
end
126
126
127
127
convert (:: Type{UTF32String} , data:: AbstractVector{Int32} ) =
128
- convert (UTF32String, reinterpret (UInt32, convert (Vector{T }, data)))
128
+ convert (UTF32String, reinterpret (UInt32, convert (Vector{Int32 }, data)))
129
129
130
130
convert (:: Type{UTF32String} , data:: AbstractVector{Char} ) =
131
131
convert (UTF32String, map (UInt32, data))
@@ -151,20 +151,35 @@ unsafe_convert{T<:Union{UInt32,Int32,Char}}(::Type{Ptr{T}}, s::UTF32String) =
151
151
152
152
function convert (T:: Type{UTF32String} , bytes:: AbstractArray{UInt8} )
153
153
isempty (bytes) && return empty_utf32
154
- length (bytes) & 3 != 0 && throw (UnicodeError (UTF_ERR_ODD_BYTES_32,0 ,0 ))
155
- data = reinterpret (UInt32, bytes)
156
- # check for byte-order mark (BOM):
157
- if data[1 ] == 0x0000feff # native byte order
158
- d = Vector {UInt32} (length (data))
159
- copy! (d,1 , data, 2 , length (data)- 1 )
160
- elseif data[1 ] == 0xfffe0000 # byte-swapped
161
- d = Vector {UInt32} (length (data))
162
- for i = 2 : length (data)
163
- @inbounds d[i- 1 ] = bswap (data[i])
154
+ nb = length (bytes)
155
+ nb & 3 != 0 && throw (UnicodeError (UTF_ERR_ODD_BYTES_32,0 ,0 ))
156
+ b1 = bytes[1 ]
157
+ b2 = bytes[2 ]
158
+ b3 = bytes[3 ]
159
+ b4 = bytes[4 ]
160
+ if b1 == 0 && b2 == 0 && b3 == 0xfe && b4 == 0xff
161
+ offset = 1
162
+ swap = false
163
+ elseif b1 == 0xff && b2 == 0xfe && b3 == 0 && b4 == 0
164
+ offset = 1
165
+ swap = true
166
+ else
167
+ offset = 0
168
+ swap = false
169
+ end
170
+ len = nb ÷ 4 - offset
171
+ d = Vector {UInt32} (len + 1 )
172
+ if swap
173
+ @inbounds for i in 1 : len
174
+ ib = i + offset
175
+ b1 = UInt32 (bytes[ib * 2 - 1 ])
176
+ b2 = UInt32 (bytes[ib * 2 ])
177
+ b3 = UInt32 (bytes[ib * 2 + 1 ])
178
+ b4 = UInt32 (bytes[ib * 2 + 2 ])
179
+ d[i] = (b1 << 24 ) | (b2 << 16 ) | (b3 << 8 ) | b4
164
180
end
165
181
else
166
- d = Vector {UInt32} (length (data) + 1 )
167
- copy! (d, 1 , data, 1 , length (data)) # assume native byte order
182
+ unsafe_copy! (Ptr {UInt8} (pointer (d)), pointer (bytes, offset * 4 + 1 ), len * 4 )
168
183
end
169
184
d[end ] = 0 # NULL terminate
170
185
UTF32String (d)
0 commit comments