Skip to content

Commit 89572a7

Browse files
committed
Add support for loading mxOPAQUE_CLASS types
* MAT_subsys.jl: New file MAT_subsys with methods to set, parse and retrieve subsystem data * MAT_v5.jl: New method "read_opaque" to handle mxOPAQUE_CLASS * MAT_v5.jl: New method "read_subsystem" to handle subsystem data * MAT.jl: Update to clear subsystem cache after load
1 parent 2df47dd commit 89572a7

File tree

3 files changed

+277
-11
lines changed

3 files changed

+277
-11
lines changed

src/MAT.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ module MAT
2626

2727
using HDF5, SparseArrays
2828

29+
include("MAT_subsys.jl")
2930
include("MAT_HDF5.jl")
3031
include("MAT_v5.jl")
3132
include("MAT_v4.jl")
3233

33-
using .MAT_HDF5, .MAT_v5, .MAT_v4
34+
using .MAT_HDF5, .MAT_v5, .MAT_v4, .MAT_subsys
3435

3536
export matopen, matread, matwrite, @read, @write
3637

@@ -133,6 +134,7 @@ function matread(filename::AbstractString)
133134
try
134135
vars = read(file)
135136
finally
137+
MAT_subsys.clear_subsys!()
136138
close(file)
137139
end
138140
vars
@@ -165,7 +167,7 @@ function matwrite(filename::AbstractString, dict::AbstractDict{S, T}; compress::
165167
end
166168

167169
else
168-
170+
169171
file = matopen(filename, "w"; compress = compress)
170172
try
171173
for (k, v) in dict

src/MAT_subsys.jl

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
module MAT_subsys
2+
3+
const FWRAP_VERSION = 4
4+
5+
mutable struct Subsys
6+
num_names::UInt32
7+
mcos_names::Vector{String}
8+
class_id_metadata::Vector{UInt32}
9+
object_id_metadata::Vector{UInt32}
10+
saveobj_prop_metadata::Vector{UInt32}
11+
obj_prop_metadata::Vector{UInt32}
12+
dynprop_metadata::Vector{UInt32}
13+
_u6_metadata::Vector{UInt32}
14+
_u7_metadata::Vector{UInt32}
15+
prop_vals_saved::Vector{Any}
16+
_c3::Any
17+
_c2::Any
18+
prop_vals_defaults::Any
19+
handle_data::Any
20+
java_data::Any
21+
22+
Subsys() = new(
23+
UInt32(0),
24+
String[],
25+
UInt32[],
26+
UInt32[],
27+
UInt32[],
28+
UInt32[],
29+
UInt32[],
30+
UInt32[],
31+
UInt32[],
32+
Any[],
33+
nothing,
34+
nothing,
35+
nothing,
36+
nothing,
37+
nothing
38+
)
39+
end
40+
41+
const subsys_cache = Ref{Union{Nothing,Subsys}}(nothing)
42+
43+
function clear_subsys!()
44+
subsys_cache[] = nothing
45+
end
46+
47+
function load_subsys!(subsystem_data::Dict{String,Any}, swap_bytes::Bool)
48+
subsys_cache[] = Subsys()
49+
subsys_cache[].handle_data = get(subsystem_data, "handle", nothing)
50+
subsys_cache[].java_data = get(subsystem_data, "java", nothing)
51+
mcos_data = get(subsystem_data, "MCOS", nothing)
52+
if mcos_data === nothing
53+
return
54+
end
55+
56+
fwrap_metadata = vec(mcos_data[2][1, 1])
57+
58+
# FIXME: Is this the best way to read?
59+
# Integers are written as uint8 (with swap), interpret as uint32
60+
version = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[1:4]) : fwrap_metadata[1:4])[1]
61+
if version > FWRAP_VERSION
62+
error("Unsupported FileWrapper version: $version")
63+
end
64+
65+
subsys_cache[].num_names = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[5:8]) : fwrap_metadata[5:8])[1]
66+
region_offsets = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[9:40]) : fwrap_metadata[9:40])
67+
68+
# Class and Property Names stored as list of null-terminated strings
69+
start = 41
70+
pos = start
71+
name_count = 0
72+
while name_count < subsys_cache[].num_names
73+
if fwrap_metadata[pos] == 0x00
74+
push!(subsys_cache[].mcos_names, String(fwrap_metadata[start:pos-1]))
75+
name_count += 1
76+
start = pos + 1
77+
if name_count == subsys_cache[].num_names
78+
break
79+
end
80+
end
81+
pos += 1
82+
end
83+
84+
subsys_cache[].class_id_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[1]+1:region_offsets[2]]) : fwrap_metadata[region_offsets[1]+1:region_offsets[2]])
85+
subsys_cache[].saveobj_prop_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[2]+1:region_offsets[3]]) : fwrap_metadata[region_offsets[2]+1:region_offsets[3]])
86+
subsys_cache[].object_id_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[3]+1:region_offsets[4]]) : fwrap_metadata[region_offsets[3]+1:region_offsets[4]])
87+
subsys_cache[].obj_prop_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[4]+1:region_offsets[5]]) : fwrap_metadata[region_offsets[4]+1:region_offsets[5]])
88+
subsys_cache[].dynprop_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[5]+1:region_offsets[6]]) : fwrap_metadata[region_offsets[5]+1:region_offsets[6]])
89+
subsys_cache[]._u6_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[6]+1:region_offsets[7]]) : fwrap_metadata[region_offsets[6]+1:region_offsets[7]])
90+
subsys_cache[]._u7_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[7]+1:region_offsets[8]]) : fwrap_metadata[region_offsets[7]+1:region_offsets[8]])
91+
92+
if version < 4
93+
subsys_cache[].prop_vals_saved = mcos_data[2][3:end-2, 1]
94+
else
95+
subsys_cache[].prop_vals_saved = mcos_data[2][3:end-3, 1]
96+
subsys_cache[]._c3 = mcos_data[2][end-2, 1]
97+
end
98+
99+
subsys_cache[]._c2 = mcos_data[2][end-1, 1]
100+
subsys_cache[].prop_vals_defaults = mcos_data[2][end, 1]
101+
end
102+
103+
function get_classname(class_id::UInt32)
104+
namespace_idx = subsys_cache[].class_id_metadata[class_id*4+1]
105+
classname_idx = subsys_cache[].class_id_metadata[class_id*4+2]
106+
107+
namespace = if namespace_idx == 0
108+
""
109+
else
110+
subsys_cache[].mcos_names[namespace_idx-1] * "."
111+
end
112+
113+
classname = namespace * subsys_cache[].mcos_names[classname_idx-1]
114+
return classname
115+
end
116+
117+
function get_object_metadata(object_id::UInt32)
118+
return subsys_cache[].object_id_metadata[object_id*6+1:object_id*6+6]
119+
end
120+
121+
function get_default_properties(class_id::UInt32)
122+
# FIXME Should we use deepcopy here
123+
return copy(subsys_cache[].prop_vals_defaults[class_id+1, 1])
124+
end
125+
126+
function get_property_idxs(obj_type_id::UInt32, saveobj_ret_type::Bool)
127+
prop_field_idxs = saveobj_ret_type ? subsys_cache[].saveobj_prop_metadata : subsys_cache[].obj_prop_metadata
128+
nfields = 3
129+
offset = 1
130+
while obj_type_id > 0
131+
nprops = prop_field_idxs[offset]
132+
offset += 1 + (nfields * nprops)
133+
offset += (offset + 1) % 2 # Padding
134+
obj_type_id -= 1
135+
end
136+
nprops = prop_field_idxs[offset]
137+
offset += 1
138+
return prop_field_idxs[offset:offset+nprops*nfields-1]
139+
end
140+
141+
function get_saved_properties(obj_type_id::UInt32, saveobj_ret_type::Bool)
142+
save_prop_map = Dict{String,Any}()
143+
prop_field_idxs = get_property_idxs(obj_type_id, saveobj_ret_type)
144+
nprops = length(prop_field_idxs) ÷ 3
145+
for i in 0:nprops-1
146+
prop_name = subsys_cache[].mcos_names[prop_field_idxs[i*3+1]]
147+
prop_type = prop_field_idxs[i*3+2]
148+
if prop_type == 0
149+
prop_value = subsys_cache[].mcos_names[prop_field_idxs[i*3+3]]
150+
elseif prop_type == 1
151+
# FIXME: Search for nested objects
152+
prop_value = subsys_cache[].prop_vals_saved[prop_field_idxs[i*3+3]+1]
153+
elseif prop_type == 2
154+
prop_value = prop_field_idxs[i*3+3]
155+
else
156+
error("Unknown property type ID: $prop_type encountered during deserialization")
157+
end
158+
save_prop_map[prop_name] = prop_value
159+
end
160+
return save_prop_map
161+
end
162+
163+
function get_properties(object_id::UInt32)
164+
if object_id == 0
165+
return Dict{String,Any}()
166+
end
167+
168+
class_id, _, _, saveobj_id, normobj_id, _ = get_object_metadata(object_id)
169+
if saveobj_id != 0
170+
saveobj_ret_type = true
171+
obj_type_id = saveobj_id
172+
else
173+
saveobj_ret_type = false
174+
obj_type_id = normobj_id
175+
end
176+
177+
prop_map = get_default_properties(class_id)
178+
merge!(prop_map, get_saved_properties(obj_type_id, saveobj_ret_type))
179+
# TODO: Add dynamic properties
180+
return prop_map
181+
end
182+
183+
function load_mcos_object(metadata::Any, type_name::String)
184+
# TODO: Add support for handle class objects
185+
186+
if type_name != "MCOS"
187+
@warn "Loading Type:$type_name is not implemented. Returning metadata."
188+
return metadata
189+
end
190+
191+
if isa(metadata, Dict)
192+
# TODO: Load Enumeration Instances
193+
@warn "Loading enumeration instances are not supported. Returning Metadata"
194+
return metadata
195+
end
196+
197+
if !(metadata isa Array{UInt32})
198+
@warn "Expected MCOS metadata to be an Array{UInt32}, got $(typeof(metadata)). Returning metadata."
199+
return metadata
200+
end
201+
202+
if metadata[1, 1] != 0xDD000000
203+
@warn "MCOS object metadata is corrupted. Returning raw data."
204+
return metadata
205+
end
206+
207+
ndims = metadata[2, 1]
208+
dims = metadata[3:2+ndims, 1]
209+
nobjects = prod(dims)
210+
object_ids = metadata[3+ndims:2+ndims+nobjects, 1]
211+
212+
class_id = metadata[end, 1]
213+
classname = get_classname(class_id)
214+
215+
object_arr = Array{Dict{String,Any}}(undef, convert(Vector{Int}, dims)...)
216+
for i = 1:length(object_arr)
217+
prop_dict = get_properties(object_ids[i])
218+
prop_dict["__class__"] = classname
219+
object_arr[i] = prop_dict
220+
end
221+
222+
return object_arr
223+
224+
end
225+
226+
end

src/MAT_v5.jl

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ module MAT_v5
2929
using CodecZlib, BufferedStreams, HDF5, SparseArrays
3030
import Base: read, write, close
3131

32+
using ..MAT_subsys
33+
3234
round_uint8(data) = round.(UInt8, data)
3335
complex_array(a, b) = complex.(a, b)
3436

@@ -246,7 +248,7 @@ function read_sparse(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, flags::
246248
end
247249
if length(ir) > length(pr)
248250
# Fix for Issue #169, xref https://github.com/JuliaLang/julia/pull/40523
249-
#=
251+
#=
250252
# The following expression must be obeyed according to
251253
# https://github.com/JuliaLang/julia/blob/b3e4341d43da32f4ab6087230d98d00b89c8c004/stdlib/SparseArrays/src/sparsematrix.jl#L86-L90
252254
@debug "SparseMatrixCSC" m n jc ir pr
@@ -311,6 +313,18 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32})
311313
data
312314
end
313315

316+
function read_opaque(f::IO, swap_bytes::Bool)
317+
type_name = String(read_element(f, swap_bytes, UInt8))
318+
classname = String(read_element(f, swap_bytes, UInt8))
319+
320+
if classname == "FileWrapper__"
321+
return read_matrix(f, swap_bytes)
322+
end
323+
324+
_, metadata = read_matrix(f, swap_bytes)
325+
return MAT_subsys.load_mcos_object(metadata, type_name)
326+
end
327+
314328
# Read matrix data
315329
function read_matrix(f::IO, swap_bytes::Bool)
316330
(dtype, nbytes) = read_header(f, swap_bytes)
@@ -332,15 +346,10 @@ function read_matrix(f::IO, swap_bytes::Bool)
332346
flags = read_element(f, swap_bytes, UInt32)
333347
class = flags[1] & 0xFF
334348

335-
if class == mxOPAQUE_CLASS
336-
s0 = read_data(f, swap_bytes)
337-
s1 = read_data(f, swap_bytes)
338-
s2 = read_data(f, swap_bytes)
339-
arr = read_matrix(f, swap_bytes)
340-
return ("__opaque__", Dict("s0"=>s0, "s1"=>s1, "s2"=>s2, "arr"=>arr))
349+
if class != mxOPAQUE_CLASS
350+
dimensions = read_element(f, swap_bytes, Int32)
341351
end
342352

343-
dimensions = read_element(f, swap_bytes, Int32)
344353
name = String(read_element(f, swap_bytes, UInt8))
345354

346355
local data
@@ -354,6 +363,8 @@ function read_matrix(f::IO, swap_bytes::Bool)
354363
data = read_string(f, swap_bytes, dimensions)
355364
elseif class == mxFUNCTION_CLASS
356365
data = read_matrix(f, swap_bytes)
366+
elseif class == mxOPAQUE_CLASS
367+
data = read_opaque(f, swap_bytes)
357368
else
358369
if (flags[1] & (1 << 9)) != 0 # logical
359370
data = read_data(f, swap_bytes, Bool, dimensions)
@@ -375,14 +386,41 @@ matopen(ios::IOStream, endian_indicator::UInt16) =
375386

376387
# Read whole MAT file
377388
function read(matfile::Matlabv5File)
378-
seek(matfile.ios, 128)
379389
vars = Dict{String, Any}()
390+
391+
seek(matfile.ios, 116)
392+
subsys_offset = read_bswap(matfile.ios, matfile.swap_bytes, UInt64)
393+
if subsys_offset == 0x2020202020202020
394+
subsys_offset = 0
395+
end
396+
if subsys_offset != 0
397+
read_subsystem(matfile.ios, matfile.swap_bytes, subsys_offset)
398+
end
399+
400+
seek(matfile.ios, 128)
380401
while !eof(matfile.ios)
402+
offset = position(matfile.ios)
403+
if offset == subsys_offset
404+
# Skip reading subsystem again
405+
(_, nbytes) = read_header(matfile.ios, matfile.swap_bytes)
406+
skip(matfile.ios, nbytes)
407+
continue
408+
end
381409
(name, data) = read_matrix(matfile.ios, matfile.swap_bytes)
382410
vars[name] = data
383411
end
384412
vars
385413
end
414+
415+
function read_subsystem(ios::IOStream, swap_bytes::Bool, offset::UInt64)
416+
seek(ios, offset)
417+
(_, subsystem_data) = read_matrix(ios, swap_bytes)
418+
buf = IOBuffer(vec(subsystem_data))
419+
seek(buf, 8) # Skip subsystem header
420+
_, subsys_data = read_matrix(buf, swap_bytes)
421+
MAT_subsys.load_subsys!(subsys_data, swap_bytes)
422+
end
423+
386424
# Read only variable names from an HDF5 file
387425
function getvarnames(matfile::Matlabv5File)
388426
if !isdefined(matfile, :varnames)

0 commit comments

Comments
 (0)