From c90a6d59ec129db927200b4ae087f03c980bb6a2 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 17 Dec 2013 14:53:48 -0500 Subject: [PATCH 01/10] Initial work on reading undoc opaque objects Matlab's opaque classes (handle classes) are stored in an undocumented manner. This defines the class id for mxOPAQUE_OBJECT and enables reading of the object data. --- src/MAT_v5.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index b6eea21..3dc1871 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -68,10 +68,12 @@ const mxINT32_CLASS = 12 const mxUINT32_CLASS = 13 const mxINT64_CLASS = 14 const mxUINT64_CLASS = 15 +const mxFUNCTION_CLASS = 16 # undocumented +const mxOPAQUE_CLASS = 17 # undocumented const READ_TYPES = Type[Int8, Uint8, Int16, Uint16, Int32, Uint32, Float32, None, Float64, - None, None, Int64, Uint64] + None, None, Int64, Uint64, Uint8, Uint8] const CONVERT_TYPES = Type[None, None, None, None, None, Float64, Float32, Int8, Uint8, - Int16, Uint16, Int32, Uint32, Int64, Uint64] + Int16, Uint16, Int32, Uint32, Int64, Uint64, None, None] read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}) = swap_bytes ? bswap(read(f, T)) : read(f, T) @@ -288,10 +290,11 @@ function read_matrix(f::IO, swap_bytes::Bool) end flags = read_element(f, swap_bytes, Uint32) - dimensions = read_element(f, swap_bytes, Int32) + class = flags[1] & 0xFF + # Opaque objects are dimensionless + dimensions = (class == mxOPAQUE_CLASS) ? Int32[] : read_element(f, swap_bytes, Int32) name = ascii(read_element(f, swap_bytes, Uint8)) - class = flags[1] & 0xFF local data if class == mxCELL_CLASS data = read_cell(f, swap_bytes, dimensions) @@ -301,6 +304,11 @@ function read_matrix(f::IO, swap_bytes::Bool) data = read_sparse(f, swap_bytes, dimensions, flags) elseif class == mxCHAR_CLASS && length(dimensions) <= 2 data = read_string(f, swap_bytes, dimensions) + elseif class == mxOPAQUE_CLASS + # Two strings followed by an unnamed matrix + data = {ascii(read_element(f, swap_bytes, Uint8)), + ascii(read_element(f, swap_bytes, Uint8)), + read_matrix(f, swap_bytes)[2]} else convert_type = CONVERT_TYPES[class] data = read_data(f, swap_bytes, convert_type, dimensions) From ea56ad3116e37da98904dbb4f25beabc512ba81a Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Wed, 18 Dec 2013 17:21:17 -0500 Subject: [PATCH 02/10] Add very basic subsystem extraction If subsys_offset is nonzero (or not spaces, yay backwards compatibility!), then there's an extra unnamed miMATRIX at the end of the file. This matrix contains the data of a complete matfile (except without the header), containing all the data for class objects in an undocumented layout. Crazily, this subsystem can additionally include another matrix element that contains yet another matfile. --- src/MAT_v5.jl | 62 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 3dc1871..65f2190 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -31,11 +31,12 @@ import Base: read, write, close import HDF5: names, exists type Matlabv5File <: HDF5.DataFile - ios::IOStream + ios::IO swap_bytes::Bool + subsys_offset::Uint64 varnames::Dict{ASCIIString, FileOffset} - Matlabv5File(ios, swap_bytes) = new(ios, swap_bytes) + Matlabv5File(ios, swap_bytes, subsys_offset) = new(ios, swap_bytes, subsys_offset) end const miINT8 = 1 @@ -75,6 +76,8 @@ const READ_TYPES = Type[Int8, Uint8, Int16, Uint16, Int32, Uint32, Float32, None const CONVERT_TYPES = Type[None, None, None, None, None, Float64, Float32, Int8, Uint8, Int16, Uint16, Int32, Uint32, Int64, Uint64, None, None] +const SUBSYS_HEADER_PADDING = 120 + read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}) = swap_bytes ? bswap(read(f, T)) : read(f, T) read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}, dim::Union(Int, (Int...))) = @@ -330,7 +333,10 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B ios = open(filename, "r") header = read(ios, Uint8, 116) - skip(ios, 8) + subsys_offset = read(ios, Uint64) + if subsys_offset == 0x2020202020202020 + subsys_offset = zero(subsys_offset) + end version = read(ios, Uint16) endian_indicator = read(ios, Uint16) @@ -350,17 +356,61 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B error("Unsupported MATLAB file version") end - return Matlabv5File(ios, swap_bytes) + return Matlabv5File(ios, swap_bytes, subsys_offset) +end + +is_subsystem_matfile(x::Any) = false; +function is_subsystem_matfile{N}(x::Array{Uint8,N}) + N > 2 && return false + length(x) < 12 && return false + + # Check first 4 characters in either endianness + (x[1:4] == [0x00,0x01,0x49,0x4d] || x[1:4] == [0x01,0x00,0x4d,0x49]) +end + +# Read the subsystem data from a Uint8 array +function add_subsystem_vars!{N}(data::Array{Uint8,N},vars) + # Construct subsystem file: a Matfile is stored within this matrix + buf = Array(Uint8,length(data)+SUBSYS_HEADER_PADDING) + buf[SUBSYS_HEADER_PADDING+1:end] = data[:] + s = IOBuffer(buf) + # Check endianness and versioning + seek(s,SUBSYS_HEADER_PADDING+2) + swap_bytes = read(s,Uint16) == 0x494D + seek(s,SUBSYS_HEADER_PADDING) + @assert read_bswap(s,swap_bytes,Uint16) == 0x0100 "Unsupported MATLAB subsystem file version" + # Read everything contained in it with a phony Matlabv5File + svars = read(Matlabv5File(s,swap_bytes,0)) + for (k,v) in svars + # There are sometimes nested subsystems, but without a subsys offset!? + if is_subsystem_matfile(v) + add_subsystem_vars!(v,svars) + delete!(svars,k) + end + end + for (k,v) in svars + vars[string("_sub_",k)] = v + end + vars end # Read whole MAT file function read(matfile::Matlabv5File) seek(matfile.ios, 128) vars = Dict{ASCIIString, Any}() - while !eof(matfile.ios) + i = 0; + while !eof(matfile.ios) && (matfile.subsys_offset==0 || position(matfile.ios) < matfile.subsys_offset) (name, data) = read_matrix(matfile.ios, matfile.swap_bytes) + name = isempty(name) ? "_i$(i+=1)" : name; vars[name] = data end + + if !eof(matfile.ios) && matfile.subsys_offset > 0 + seek(matfile.ios,matfile.subsys_offset) + _, data = read_matrix(matfile.ios,matfile.swap_bytes) + add_subsystem_vars!(data,vars) + end + vars end @@ -369,7 +419,7 @@ function getvarnames(matfile::Matlabv5File) if !isdefined(matfile, :varnames) seek(matfile.ios, 128) matfile.varnames = varnames = Dict{ASCIIString, FileOffset}() - while !eof(matfile.ios) + while !eof(matfile.ios) && (matfile.subsys_offset==0 || position(matfile.ios) < matfile.subsys_offset) offset = position(matfile.ios) (dtype, nbytes, hbytes) = read_header(matfile.ios, matfile.swap_bytes) if dtype == miCOMPRESSED From 0fb38d35d653979611da35b35b5bf2352109a434 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 26 Dec 2013 15:30:53 -0500 Subject: [PATCH 03/10] Refactor to read the subsystem on file open Class objects need access to the MCOS subsystem data when read, so read it first if it is there. --- src/MAT_v5.jl | 86 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 65f2190..494a8af 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -35,6 +35,7 @@ type Matlabv5File <: HDF5.DataFile swap_bytes::Bool subsys_offset::Uint64 varnames::Dict{ASCIIString, FileOffset} + subsystem::Dict{ASCIIString, Any} Matlabv5File(ios, swap_bytes, subsys_offset) = new(ios, swap_bytes, subsys_offset) end @@ -69,8 +70,8 @@ const mxINT32_CLASS = 12 const mxUINT32_CLASS = 13 const mxINT64_CLASS = 14 const mxUINT64_CLASS = 15 -const mxFUNCTION_CLASS = 16 # undocumented -const mxOPAQUE_CLASS = 17 # undocumented +const mxFUNCTION_CLASS = 16 # undocumented (function handles) +const mxOPAQUE_CLASS = 17 # undocumented (classdef objects) const READ_TYPES = Type[Int8, Uint8, Int16, Uint16, Int32, Uint32, Float32, None, Float64, None, None, Int64, Uint64, Uint8, Uint8] const CONVERT_TYPES = Type[None, None, None, None, None, Float64, Float32, Int8, Uint8, @@ -307,11 +308,12 @@ function read_matrix(f::IO, swap_bytes::Bool) data = read_sparse(f, swap_bytes, dimensions, flags) elseif class == mxCHAR_CLASS && length(dimensions) <= 2 data = read_string(f, swap_bytes, dimensions) + elseif class == mxFUNCTION_CLASS + data = read_matrix(f,swap_bytes)[2] elseif class == mxOPAQUE_CLASS - # Two strings followed by an unnamed matrix - data = {ascii(read_element(f, swap_bytes, Uint8)), - ascii(read_element(f, swap_bytes, Uint8)), - read_matrix(f, swap_bytes)[2]} + data = {ascii(read_element(f, swap_bytes, Uint8)), # "MCOS" + ascii(read_element(f, swap_bytes, Uint8)), # Classname + read_matrix(f, swap_bytes)[2]} # Unnamed matrix w/ data else convert_type = CONVERT_TYPES[class] data = read_data(f, swap_bytes, convert_type, dimensions) @@ -334,7 +336,7 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B ios = open(filename, "r") header = read(ios, Uint8, 116) subsys_offset = read(ios, Uint64) - if subsys_offset == 0x2020202020202020 + if subsys_offset == 0x2020_2020_2020_2020 subsys_offset = zero(subsys_offset) end version = read(ios, Uint16) @@ -356,7 +358,14 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B error("Unsupported MATLAB file version") end - return Matlabv5File(ios, swap_bytes, subsys_offset) + matfile = Matlabv5File(ios, swap_bytes, subsys_offset) + + if subsys_offset > 0 + seek(ios, subsys_offset) + matfile.subsystem = read_subsystem(ios, swap_bytes) + end + + matfile end is_subsystem_matfile(x::Any) = false; @@ -368,30 +377,41 @@ function is_subsystem_matfile{N}(x::Array{Uint8,N}) (x[1:4] == [0x00,0x01,0x49,0x4d] || x[1:4] == [0x01,0x00,0x4d,0x49]) end -# Read the subsystem data from a Uint8 array -function add_subsystem_vars!{N}(data::Array{Uint8,N},vars) - # Construct subsystem file: a Matfile is stored within this matrix - buf = Array(Uint8,length(data)+SUBSYS_HEADER_PADDING) - buf[SUBSYS_HEADER_PADDING+1:end] = data[:] - s = IOBuffer(buf) - # Check endianness and versioning - seek(s,SUBSYS_HEADER_PADDING+2) - swap_bytes = read(s,Uint16) == 0x494D - seek(s,SUBSYS_HEADER_PADDING) - @assert read_bswap(s,swap_bytes,Uint16) == 0x0100 "Unsupported MATLAB subsystem file version" - # Read everything contained in it with a phony Matlabv5File - svars = read(Matlabv5File(s,swap_bytes,0)) - for (k,v) in svars - # There are sometimes nested subsystems, but without a subsys offset!? - if is_subsystem_matfile(v) - add_subsystem_vars!(v,svars) - delete!(svars,k) +function read_subsystem(f::IO, swap_bytes::Bool) + name, data = read_matrix(f,swap_bytes) + @assert isempty(name) && is_subsystem_matfile(data) "invalid subsystem" + @assert eof(f) "unread data at end of subsystem" + + read_subsystem_matfile(data); +end + +function read_subsystem_matfile{N}(data::Array{Uint8,N}) + # A Matfile is stored within this matrix's data + f = IOBuffer(data[:]) + # Check endianness and versioning. + seek(f,2) + endian_flag = read(f,Uint16) + swap_bytes = endian_flag == 0x494D + @assert swap_bytes || endian_flag == 0x4D49 "unknown endian flags in subsystem" + seek(f,0) + @assert read_bswap(f,swap_bytes,Uint16) == 0x0100 "unsupported MATLAB file version in subsystem" + seek(f,8) + + svars = Dict{ASCIIString,Any}() + i=0; + while !eof(f) + name,data = read_matrix(f,swap_bytes) + + if isempty(name) && is_subsystem_matfile(data) + # There are sometimes nested subsystems? + data = read_subsystem_matfile(data) end + name = isempty(name) ? "_i$(i+=1)" : name; + svars[name] = data end - for (k,v) in svars - vars[string("_sub_",k)] = v - end - vars + close(f) + + svars end # Read whole MAT file @@ -405,12 +425,6 @@ function read(matfile::Matlabv5File) vars[name] = data end - if !eof(matfile.ios) && matfile.subsys_offset > 0 - seek(matfile.ios,matfile.subsys_offset) - _, data = read_matrix(matfile.ios,matfile.swap_bytes) - add_subsystem_vars!(data,vars) - end - vars end From 0fc9e1e6820410e63d9a59039245c7b6faa56306 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 17 Jan 2014 11:23:08 -0500 Subject: [PATCH 04/10] Refactor Mat_v5 to pass Matfiles around Instead of passing the IO stream and swap_bytes boolean around separately, pass the matfile to the higher level functions instead. This will allow them to have access to the subsystem. --- src/MAT_v5.jl | 77 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 494a8af..8ee89e0 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -33,11 +33,22 @@ import HDF5: names, exists type Matlabv5File <: HDF5.DataFile ios::IO swap_bytes::Bool - subsys_offset::Uint64 + subsys_offset::FileOffset varnames::Dict{ASCIIString, FileOffset} subsystem::Dict{ASCIIString, Any} Matlabv5File(ios, swap_bytes, subsys_offset) = new(ios, swap_bytes, subsys_offset) + # TODO: is there a better way to copy and replace the IO? Needed for uncompressed buffers + function Matlabv5File(matfile::Matlabv5File, f::IO) + v = new(f, matfile.swap_bytes, 0) + if isdefined(matfile, :varnames) + v.varnames = matfile.varnames + end + if isdefined(matfile, :subsystem) + v.subsystem = matfile.subsystem + end + v + end end const miINT8 = 1 @@ -79,9 +90,9 @@ const CONVERT_TYPES = Type[None, None, None, None, None, Float64, Float32, Int8, const SUBSYS_HEADER_PADDING = 120 -read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}) = +read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}) = swap_bytes ? bswap(read(f, T)) : read(f, T) -read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}, dim::Union(Int, (Int...))) = +read_bswap{T}(f::IO, swap_bytes::Bool, ::Type{T}, dim::Union(Int, (Int...))) = swap_bytes ? [bswap(x) for x in read(f, T, dim)] : read(f, T, dim) skip_padding(f::IO, nbytes::Int64, hbytes::Int) = if nbytes % hbytes != 0 @@ -139,15 +150,16 @@ function read_data{T}(f::IO, swap_bytes::Bool, ::Type{T}, dimensions::Vector{Int read_array ? convert(Array{T}, data) : convert(T, data) end -function read_cell(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) +function read_cell(matfile::Matlabv5File, dimensions::Vector{Int32}) data = cell(int(dimensions)...) for i = 1:length(data) - (ignored_name, data[i]) = read_matrix(f, swap_bytes) + (ignored_name, data[i]) = read_matrix(matfile) end data end -function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_object::Bool) +function read_struct(matfile::Matlabv5File, dimensions::Vector{Int32}, is_object::Bool) + f, swap_bytes = matfile.ios, matfile.swap_bytes field_length = read_element(f, swap_bytes, Int32)[1] field_names = read_element(f, swap_bytes, Uint8) n_fields = div(length(field_names), field_length) @@ -174,7 +186,7 @@ function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_obje if n_el == 1 # Read a single struct into a dict for field_name in field_name_strings - data[field_name] = read_matrix(f, swap_bytes)[2] + data[field_name] = read_matrix(matfile)[2] end else # Read multiple structs into a dict of arrays @@ -183,7 +195,7 @@ function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_obje end for i = 1:n_el for field_name in field_name_strings - data[field_name][i] = read_matrix(f, swap_bytes)[2] + data[field_name][i] = read_matrix(matfile)[2] end end end @@ -198,7 +210,8 @@ function plusone!(A) A end -function read_sparse(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, flags::Vector{Uint32}) +function read_sparse(matfile::Matlabv5File, dimensions::Vector{Int32}, flags::Vector{Uint32}) + f, swap_bytes = matfile.ios, matfile.swap_bytes local m::Int, n::Int if length(dimensions) == 2 (m, n) = dimensions @@ -229,7 +242,8 @@ function read_sparse(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, flags:: SparseMatrixCSC(m, n, jc, ir, pr) end -function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) +function read_string(matfile::Matlabv5File, dimensions::Vector{Int32}) + f, swap_bytes = matfile.ios, matfile.swap_bytes (dtype, nbytes, hbytes) = read_header(f, swap_bytes) if dtype <= 2 || dtype == 16 # If dtype <= 2, this may give an error on non-ASCII characters, since the string @@ -272,19 +286,20 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) end # Read matrix data -function read_matrix(f::IO, swap_bytes::Bool) +function read_matrix(matfile::Matlabv5File) + f, swap_bytes = matfile.ios, matfile.swap_bytes (dtype, nbytes) = read_header(f, swap_bytes) if dtype == miCOMPRESSED bytes = decompress(read(f, Uint8, nbytes)) - mi = IOBuffer(bytes) - output = read_matrix(mi, swap_bytes) + mi = Matlabv5File(matfile,IOBuffer(bytes)) + output = read_matrix(mi) close(mi) return output elseif dtype != miMATRIX error("Unexpected data type") elseif nbytes == 0 # If one creates a cell array using - # y = cell(m, n) + # y = cell(matfile, n) # then MATLAB will save the empty cells as zero-byte matrices. If one creates a # empty cells using # a = {[], [], []} @@ -301,19 +316,20 @@ function read_matrix(f::IO, swap_bytes::Bool) local data if class == mxCELL_CLASS - data = read_cell(f, swap_bytes, dimensions) + data = read_cell(matfile, dimensions) elseif class == mxSTRUCT_CLASS || class == mxOBJECT_CLASS - data = read_struct(f, swap_bytes, dimensions, class == mxOBJECT_CLASS) + data = read_struct(matfile, dimensions, class == mxOBJECT_CLASS) elseif class == mxSPARSE_CLASS - data = read_sparse(f, swap_bytes, dimensions, flags) + data = read_sparse(matfile, dimensions, flags) elseif class == mxCHAR_CLASS && length(dimensions) <= 2 - data = read_string(f, swap_bytes, dimensions) + data = read_string(matfile, dimensions) elseif class == mxFUNCTION_CLASS - data = read_matrix(f,swap_bytes)[2] + # Unnamed submatrix with a struct of function implementation details + data = read_matrix(matfile)[2] elseif class == mxOPAQUE_CLASS data = {ascii(read_element(f, swap_bytes, Uint8)), # "MCOS" ascii(read_element(f, swap_bytes, Uint8)), # Classname - read_matrix(f, swap_bytes)[2]} # Unnamed matrix w/ data + read_matrix(matfile)[2]} # Unnamed matrix w/ data else convert_type = CONVERT_TYPES[class] data = read_data(f, swap_bytes, convert_type, dimensions) @@ -362,7 +378,7 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B if subsys_offset > 0 seek(ios, subsys_offset) - matfile.subsystem = read_subsystem(ios, swap_bytes) + matfile.subsystem = read_subsystem(matfile) end matfile @@ -377,17 +393,17 @@ function is_subsystem_matfile{N}(x::Array{Uint8,N}) (x[1:4] == [0x00,0x01,0x49,0x4d] || x[1:4] == [0x01,0x00,0x4d,0x49]) end -function read_subsystem(f::IO, swap_bytes::Bool) - name, data = read_matrix(f,swap_bytes) +function read_subsystem(matfile::Matlabv5File) + name, data = read_matrix(matfile) @assert isempty(name) && is_subsystem_matfile(data) "invalid subsystem" - @assert eof(f) "unread data at end of subsystem" + @assert eof(matfile.ios) "unread data at end of subsystem" read_subsystem_matfile(data); end function read_subsystem_matfile{N}(data::Array{Uint8,N}) # A Matfile is stored within this matrix's data - f = IOBuffer(data[:]) + f = IOBuffer(vec(data)) # Check endianness and versioning. seek(f,2) endian_flag = read(f,Uint16) @@ -397,13 +413,14 @@ function read_subsystem_matfile{N}(data::Array{Uint8,N}) @assert read_bswap(f,swap_bytes,Uint16) == 0x0100 "unsupported MATLAB file version in subsystem" seek(f,8) + matfile = Matlabv5File(f,swap_bytes,0); svars = Dict{ASCIIString,Any}() i=0; - while !eof(f) - name,data = read_matrix(f,swap_bytes) + while !eof(matfile.ios) + name,data = read_matrix(matfile) if isempty(name) && is_subsystem_matfile(data) - # There are sometimes nested subsystems? + # There are sometimes nested subsystems? Are they ever non-empty? data = read_subsystem_matfile(data) end name = isempty(name) ? "_i$(i+=1)" : name; @@ -420,7 +437,7 @@ function read(matfile::Matlabv5File) vars = Dict{ASCIIString, Any}() i = 0; while !eof(matfile.ios) && (matfile.subsys_offset==0 || position(matfile.ios) < matfile.subsys_offset) - (name, data) = read_matrix(matfile.ios, matfile.swap_bytes) + (name, data) = read_matrix(matfile) name = isempty(name) ? "_i$(i+=1)" : name; vars[name] = data end @@ -493,7 +510,7 @@ function read(matfile::Matlabv5File, varname::ASCIIString) error("no variable $varname in file") end seek(matfile.ios, varnames[varname]) - (name, data) = read_matrix(matfile.ios, matfile.swap_bytes) + (name, data) = read_matrix(matfile) data end From e7ce2cbbdb2a56974c32a010897e45a20504d7a5 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 20 Feb 2014 17:39:09 -0500 Subject: [PATCH 05/10] Add an outline for the read_opaque function --- src/MAT_v5.jl | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 8ee89e0..b67df42 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -285,6 +285,29 @@ function read_string(matfile::Matlabv5File, dimensions::Vector{Int32}) data end +function read_opaque(matfile::Matlabv5File) + f, swap_bytes = matfile.ios, matfile.swap_bytes + # This is undocumented. There are two strings followed by an array + # The first string is always (?) MCOS: "Matlab Common/Class Object System" + class_system = ascii(read_element(f, swap_bytes, Uint8)) + class_system == "MCOS" || error("unknown opaque class system: ", class_system) + # The second string is the object's class name + class_name = ascii(read_element(f, swap_bytes, Uint8)) + + if class_name == "FileWrapper__" + # This is the subsystem implementation + data = read_matrix(matfile)[2] + else + # This is an unnamed array of uint8s: indexes into the subsystem + idxs = vec(read_matrix(matfile)[2]) + idxs[1] == 0xdd00_0000 || error("unknown opaque sentinal value: ", idxs[1]) + + data = idxs + end + + return (class_name, data) +end + # Read matrix data function read_matrix(matfile::Matlabv5File) f, swap_bytes = matfile.ios, matfile.swap_bytes @@ -327,9 +350,7 @@ function read_matrix(matfile::Matlabv5File) # Unnamed submatrix with a struct of function implementation details data = read_matrix(matfile)[2] elseif class == mxOPAQUE_CLASS - data = {ascii(read_element(f, swap_bytes, Uint8)), # "MCOS" - ascii(read_element(f, swap_bytes, Uint8)), # Classname - read_matrix(matfile)[2]} # Unnamed matrix w/ data + data = read_opaque(matfile) else convert_type = CONVERT_TYPES[class] data = read_data(f, swap_bytes, convert_type, dimensions) @@ -375,12 +396,12 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B end matfile = Matlabv5File(ios, swap_bytes, subsys_offset) - + if subsys_offset > 0 seek(ios, subsys_offset) matfile.subsystem = read_subsystem(matfile) end - + matfile end @@ -394,10 +415,11 @@ function is_subsystem_matfile{N}(x::Array{Uint8,N}) end function read_subsystem(matfile::Matlabv5File) + # Read the final, unnamed matrix hidden at the end of the file name, data = read_matrix(matfile) @assert isempty(name) && is_subsystem_matfile(data) "invalid subsystem" @assert eof(matfile.ios) "unread data at end of subsystem" - + read_subsystem_matfile(data); end @@ -412,13 +434,13 @@ function read_subsystem_matfile{N}(data::Array{Uint8,N}) seek(f,0) @assert read_bswap(f,swap_bytes,Uint16) == 0x0100 "unsupported MATLAB file version in subsystem" seek(f,8) - + matfile = Matlabv5File(f,swap_bytes,0); svars = Dict{ASCIIString,Any}() i=0; while !eof(matfile.ios) name,data = read_matrix(matfile) - + if isempty(name) && is_subsystem_matfile(data) # There are sometimes nested subsystems? Are they ever non-empty? data = read_subsystem_matfile(data) @@ -427,7 +449,7 @@ function read_subsystem_matfile{N}(data::Array{Uint8,N}) svars[name] = data end close(f) - + svars end From e72f3fb362d82faf7a037f6fa15f21ac271ab514 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 20 Feb 2014 17:41:19 -0500 Subject: [PATCH 06/10] Use a concrete-typed IO in the Matfile (Via parameterization). This improves performance now that I pass around the matfile more --- src/MAT_v5.jl | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index b67df42..0784699 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -30,25 +30,26 @@ using Zlib, HDF5 import Base: read, write, close import HDF5: names, exists -type Matlabv5File <: HDF5.DataFile - ios::IO +type Matlabv5File{I<:IO} <: HDF5.DataFile + ios::I swap_bytes::Bool subsys_offset::FileOffset varnames::Dict{ASCIIString, FileOffset} subsystem::Dict{ASCIIString, Any} - Matlabv5File(ios, swap_bytes, subsys_offset) = new(ios, swap_bytes, subsys_offset) - # TODO: is there a better way to copy and replace the IO? Needed for uncompressed buffers - function Matlabv5File(matfile::Matlabv5File, f::IO) - v = new(f, matfile.swap_bytes, 0) - if isdefined(matfile, :varnames) - v.varnames = matfile.varnames - end - if isdefined(matfile, :subsystem) - v.subsystem = matfile.subsystem - end - v + Matlabv5File(ios::I, swap_bytes::Bool, subsys_offset) = new(ios, swap_bytes, subsys_offset) +end +Matlabv5File{I<:IO}(ios::I, swap_bytes::Bool, subsys_offset) = Matlabv5File{I}(ios, swap_bytes, subsys_offset) +# TODO: is there a better way to copy and replace the IO? Needed for uncompressed buffers +function Matlabv5File(matfile::Matlabv5File, f::IO) + v = Matlabv5File(f, matfile.swap_bytes, 0) + if isdefined(matfile, :varnames) + v.varnames = matfile.varnames + end + if isdefined(matfile, :subsystem) + v.subsystem = matfile.subsystem end + v end const miINT8 = 1 From d12391695fee092e6b6cd9c81ab1c3b008f80cf6 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 15 Mar 2014 21:04:36 -0400 Subject: [PATCH 07/10] First pass at parsing the subsystem Still need to add tests and implement parsing of nested objects --- src/MAT_v5.jl | 166 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 12 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 0784699..8304572 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -40,7 +40,7 @@ type Matlabv5File{I<:IO} <: HDF5.DataFile Matlabv5File(ios::I, swap_bytes::Bool, subsys_offset) = new(ios, swap_bytes, subsys_offset) end Matlabv5File{I<:IO}(ios::I, swap_bytes::Bool, subsys_offset) = Matlabv5File{I}(ios, swap_bytes, subsys_offset) -# TODO: is there a better way to copy and replace the IO? Needed for uncompressed buffers +# TODO: Remove if/when github.com/JuliaLang/julia/issues/5333 lands for mutables function Matlabv5File(matfile::Matlabv5File, f::IO) v = Matlabv5File(f, matfile.swap_bytes, 0) if isdefined(matfile, :varnames) @@ -295,18 +295,20 @@ function read_opaque(matfile::Matlabv5File) # The second string is the object's class name class_name = ascii(read_element(f, swap_bytes, Uint8)) - if class_name == "FileWrapper__" - # This is the subsystem implementation - data = read_matrix(matfile)[2] - else - # This is an unnamed array of uint8s: indexes into the subsystem - idxs = vec(read_matrix(matfile)[2]) - idxs[1] == 0xdd00_0000 || error("unknown opaque sentinal value: ", idxs[1]) + # The subsystem contains a FileWrapper__; an opaque object unlike all others + class_name == "FileWrapper__" && return read_matrix(matfile)[2] - data = idxs - end + # This is an unnamed array of uint8s: indexes into the subsystem + ids = vec(read_matrix(matfile)[2]) + ids[1] == 0xdd00_0000 || error("unknown opaque sentinal value: ", idxs[1]) + ids[2] == 2 && ids[3] == ids[4] == 1 || error("unexpected object id values") + object_id = ids[5] + class_id = ids[6] - return (class_name, data) + obj = matfile.subsystem["_objects"][object_id] + obj["_class"] = matfile.subsystem["_classes"][class_id] + + return obj end # Read matrix data @@ -406,6 +408,7 @@ function matopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B matfile end +# Undocumented methods for reading the subsystem is_subsystem_matfile(x::Any) = false; function is_subsystem_matfile{N}(x::Array{Uint8,N}) N > 2 && return false @@ -421,7 +424,13 @@ function read_subsystem(matfile::Matlabv5File) @assert isempty(name) && is_subsystem_matfile(data) "invalid subsystem" @assert eof(matfile.ios) "unread data at end of subsystem" - read_subsystem_matfile(data); + sys = read_subsystem_matfile(data); + + # Parse the FileWrapper__ object - might there be more than one? + mcos = sys["_i1"]["MCOS"] + sys["_classes"], sys["_objects"] = parse_filewrapper(mcos) + + sys end function read_subsystem_matfile{N}(data::Array{Uint8,N}) @@ -454,6 +463,139 @@ function read_subsystem_matfile{N}(data::Array{Uint8,N}) svars end +# Returns a tuple of parsed (classes, objects) +function parse_filewrapper(mcos::Array{Any}) + # Parse the first element as an IOBuffer data stream + f = IOBuffer(vec(mcos[1])) + offsets,strs = parse_filewrapper_header(f) + classes = parse_filewrapper_classes(f,strs,offsets) + objects = parse_filewrapper_objects(f,strs,mcos,offsets) + + (classes, objects) +end + +function parse_filewrapper_header(f) + id = read(f,Uint32) # First element is a version number? Always 2? + id == 2 || error("unknown first field (version/id?): ", id) + + n_strs = read(f,Uint32) # Second element is the number of strings + offsets = read(f,Uint32,6) # Followed by up to 6 section offsets + + # And two unknown/reserved fields + all(read(f,Uint32,2) .== 0) || error("reserved header fields nonzero") + + # The string data section: a stream of null-delimited strings + @assert position(f) == 0x28 + strs = Array(ASCIIString,n_strs) + for i = 1:n_strs + strs[i] = readuntil(f, '\0')[1:end-1] # drop the trailing null byte + end + + (offsets,strs) +end + +function parse_filewrapper_classes(f,strs,offsets) + # Class information is a set of four Int32s per class, but the first four + # values always seem to be 0. Are they reserved? Or simply never used? + seek(f,offsets[1]) + all(read(f,Int32,4) .== 0) || error("unknown header to class information") + + n = div(offsets[2]-offsets[1],4*4) - 1 + classes = Array((ASCIIString,ASCIIString),n) + for i=1:n + package_idx = read(f,Int32) + package = package_idx > 0 ? strs[package_idx] : "" + name_idx = read(f,Int32) + name = name_idx > 0 ? strs[name_idx] : "" + all(read(f,Uint32,2) .== 0) || error("discovered a nonzero class property for ",name) + classes[i] = (package, name) + end + @assert position(f) == offsets[2] + + classes +end + +function parse_filewrapper_objects(f,names,heap,offsets) + seek(f,offsets[2]) + seg2_props = parse_filewrapper_props(f,names,heap,offsets[3]) + + seek(f,offsets[3]) + # Again, first set of elements are all zero + all(read(f,Int32,6) .== 0) || error("unknown header to object information") + + n = div(offsets[4]-position(f),6*4) + # 6 values per obj: class_idx, unknown, unknown, seg2_idx, seg4_idx, obj_id + object_info = Array(Int32,6,n) + read(f,object_info) + object_info = object_info.' # Transpose to column-major + @assert all(object_info[:,2:3] .== 0) "discovered a nonzero unknown object property" + + @assert position(f) == offsets[4] + seg4_props = parse_filewrapper_props(f,names,heap,offsets[5]) + + objects = Array(Dict{ASCIIString,Any},n) + for i=1:n + class_idx = object_info[i,1] + seg2_idx, seg4_idx = object_info[i,4:5] + obj_id = object_info[i,6] + + if seg2_idx > 0 && seg4_idx == 0 + props = seg2_props[seg2_idx] + elseif seg4_idx > 0 && seg2_idx == 0 + props = seg4_props[seg4_idx] + else + error("unable to find property for object with id ", obj_id) + end + + # Merge it with the matfile defaults for this class + @assert obj_id <= n "obj_ids are assumed to be continuous" + objects[obj_id] = merge(heap[end][class_idx+1],props) + end + objects +end + +function parse_filewrapper_props(f,names,heap,section_end) + props = Array(Dict{ASCIIString,Any},0) + position(f) >= section_end && return props + + # Another first element that's always zero; reserved? or ignored? + all(read(f,Int32,2) .== 0) || error("unknown header to properties segment") + + # We have to guess on the array size; 8 int32s would be 2 props per object + sizehint(props,iceil((section_end-position(f))/(8*4))) + + while position(f) < section_end + # For each object, there is first an Int32 for the number of properties + nprops = read(f,Int32) + d = Dict{ASCIIString,Any}() + sizehint(d,nprops) + for i=1:nprops + # And then three Int32s for each property + name = names[read(f,Int32)] + flag = read(f,Int32) # A flag describing how the value is used + value = read(f,Int32) + + if flag == 0 # The prop is stored in the names array + d[name] = names[value] + elseif flag == 1 # The prop is stored in the MCOS FileWrapper__ heap + d[name] = heap[value+3] # But… it's off by 3!? Crazy. + elseif flag == 2 # The prop is a boolean, and it's the value itself + @assert 0 <= value <= 1 "boolean flag has a value other than 0 or 1" + d[name] = bool(value) + else + error("unknown flag ",flag," for property ",name," with value ",value) + end + end + push!(props,d) + + # Jump to the next 8-byte aligned offset + if position(f) % 8 != 0 + seek(f,iceil(position(f)/8)*8) + end + end + props +end + # Read whole MAT file function read(matfile::Matlabv5File) seek(matfile.ios, 128) From 138a50a8a372765e06794078b11dfcb5638f2ff9 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 16 Mar 2014 18:09:31 -0400 Subject: [PATCH 08/10] Add simple test for basic custom classes --- test/read.jl | 15 ++++++++++++++- test/v6/simpleclass.mat | Bin 0 -> 1640 bytes test/v7.3/simpleclass.mat | Bin 0 -> 13824 bytes test/v7/simpleclass.mat | Bin 0 -> 573 bytes 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 test/v6/simpleclass.mat create mode 100644 test/v7.3/simpleclass.mat create mode 100644 test/v7/simpleclass.mat diff --git a/test/read.jl b/test/read.jl index ed4c065..5e9bcd2 100644 --- a/test/read.jl +++ b/test/read.jl @@ -140,5 +140,18 @@ for format in ["v6", "v7", "v7.3"] var2 = read(matfile, "var2") @assert var2[27, 90] == 10 close(matfile) - + + if format != "v7.3" + # Class tests, not implemented for v7.3 yet + result = { + "s" => (ASCIIString=>Any)[ + "char_field_1" => "char_field", + "array_field_2" => [0.0 1.0 2.0 3.0 4.0 5.0], + "cell_field_3" => {1.0 "one" 2.0 "two"}, + "_class" => ("", "SimpleClass") + ] + } + check("simpleclass.mat", result) + end + end diff --git a/test/v6/simpleclass.mat b/test/v6/simpleclass.mat new file mode 100644 index 0000000000000000000000000000000000000000..482e4fd99a145dcc01c7ff82317fbe03d1409116 GIT binary patch literal 1640 zcmb7EJ#Q015M6_V38IX%`|fSq#=X-w<4Klo+mp;K#xsA> zg{^W-WpV0`~05c!$#Hu9qmtfGZ2l&V* z$$TdOE8k;Af|QSO7K-vQ2X%v9@XgP&Y_9P1lR~^|74~NCqm41_m3E%vAjq?t3%#tr znhSgWfW>|BZazQm?E?!x}7^|V{X3op5WX;<-lPgJiCAI1~UnzX$F-?@G@_m8pR z`%*WLCuKf}{4Q)k4$Zl7#+$^9%>&EM?3!)er^v-?WmBg oQf`)VE8pW4oT^jzj(vq+bvQTb;X4-gz0O>@Gv=cV-!Z7>4^b{-h5!Hn literal 0 HcmV?d00001 diff --git a/test/v7.3/simpleclass.mat b/test/v7.3/simpleclass.mat new file mode 100644 index 0000000000000000000000000000000000000000..0bba86c494fa10a46291ebb24e806187a5fb8a53 GIT binary patch literal 13824 zcmeHMy>nAX5Z`C>A;1trU;-qQT*jG<3j#72@VG&afruFlp4c;SC!VF}Sd46WBzXcI zWzwajOqni}lqvZW++-#tC1pCYd%M4<=Vv_|%bq~sokrTbz3<=c?(LptrsnTXee14Y zyy9l2=D%60H=+w}wh?xgnyr;d=1t$ZHh#fPx1z8U)m(FR(#@@}x|y)$#;&=s>yuZm zPhK5!my2WLu6*_xPq=7p#$maCzAn*$x+VP|3kNVjQ1v)^( zx{v`SuPxEFR9TDkz-VCfXQRJc|A6E}bk_JMZTxh*`R{MtB5sNKK&=~<4`{9cMovE{ z1j@IjW@W<*w(C)&FHFv?NNvlEwAUBgkK3JSWyEtvTG3LQvyjswI_y9LDyaNg$G{{x zh~--cIAr4tvV2QZ(IORD=W7nyI0M0eAE`jfAJp;wlHsHx`IU~RxSu59|8ef?xgnac z@^P*y5F2f_-<4`3Z2PT$%(cg?y)>MwudFqqX;jk?m4waXo#A}kzq$A@s&*>1sM@SW zQa{YoCf6_XOBz(dzne92?%w^I0oOXKpS5cAdt~LOYH46)=+}eV0qve!dE>D1F^lU? zY%{!{B+Fi=aoNdUzB;iPzxNZZd{hzE475dFg-=C8_^skJHKB?FM84LIQ zuxa|R7$r^^U*&fo^S*yHr0ty&8vRr={t_@=PTpSxAivLSi@5h6omWQk9!%V0(aKuq zv9u6*SDDru1o924qip?!f119phG^6`4(PDIcgt62r{-Ch<-Qe_!nl{r(SYADY!!j& zL11OLElGbtS6cG(Z1opZqojSWw4I;O4?anJv;2&Ycp=z#HC%13)~jK|;Rs*>daaiI z67Me@h`opfvk$#n>v>Auliu5pyJ6PlQ!%S&#u@ji9N_;r1$5M(fs5r#5lH)|7Cy0c zR)6*r-m}QtL6#@!Dt@VyaSjp(3O~u?B9}*ncux;!fj+=|0*Aouwv)>&xjN4MAXm5f zLE?v2xV9FxDi!AAKAA8%C^Ju!LuOm5awpoU=}(y~PrF87zv2kk$&DT4cf(egw3pgN zPjaih{M+{!qzw{VEAszI8~R!mQ{>V*lLB3 zt#nzs^-QquJu8g~8Alr_CwV9*d1wo|Rlkb9DxmE6P(mz>xn5uGOmKOK&~NMgb|lf> z?zc;eeir$0ac;XzKu7$X&L&RB(XdZg*M8b5*!X9sb%ig_*h<9+TIvT!Su@rvvLi$7 z7RRd1pN^-Y7ljPGFfXXQBS`+FyJ5DiMB~S3{I$i+TlGT?sNk&W)eWKj;2>`wSVyh7 zzS!Uwgx%s$(?#%a;_`@j-3)LUwF%R5gl7V*CuuKKCw1T1cpRc8trBm+r+)C*Bo}Nt zWT39SJjMC=-G#L;@#wGE{<6<_=W|EU@=NT;bv%M?iR0RmCp2vIw5R>}Ye|3QP2$Y$@af(_ zy1Mi){s&0khu!nYeqTwhvtPf;j*(+UwQvEQ@Nb{?f4_gFYV)GS6&4Rmp0VXD)BdIZ z8|^EukUIYQ=T0k&Z~oo%{kvWUWDrh}2mAYt()JVnJf2O{_Ebk<|49BE$Wx9$jzEq; djzEq;jzEq;jzEq;jzEq;jzEq;j=(#Cz`x8J>8=0( literal 0 HcmV?d00001 diff --git a/test/v7/simpleclass.mat b/test/v7/simpleclass.mat new file mode 100644 index 0000000000000000000000000000000000000000..39bca54c12c0563075530c4067205162b74ec4d3 GIT binary patch literal 573 zcmeZu4DoSvQZUssQ1EpO(M`+DN!3vZ$Vn_o%P-2c0*X0%nwjV*I2WZRmZYXAxrB)niqGE;cQ zkmSh5k-%~&$>8jv2e04ApW#R;W-cs9%X9QwRqU5kXc!?YFHo)U@_Fm4^wv)|52QI5 zFi+8EOuU!sG^sg};aLnvD9G42Mv$>Wqcb&m%p!L zxbogTeEz>Dbw^KppL+Gy*~|G+vulFe*G9?mY=3>L_T0kXHcl(%9~YQ)qvxZtp`Mey g!~W=bw~b5ohfnvD>$+3B-tS@kJ?&n84J8LI0F@f^+5i9m literal 0 HcmV?d00001 From 575a1caff86938bdcd158ec43e69b60e6c5971c3 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 16 Mar 2014 18:10:14 -0400 Subject: [PATCH 09/10] Fix reading varnames for class objects - they have no dimension fields --- src/MAT_v5.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 8304572..56162f1 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -647,8 +647,8 @@ function getvarnames(matfile::Matlabv5File) error("Unexpected data type") end - read_element(f, matfile.swap_bytes, Uint32) - read_element(f, matfile.swap_bytes, Int32) + class = read_element(f, matfile.swap_bytes, Uint32)[1] & 0xFF + (class != mxOPAQUE_CLASS) && read_element(f, matfile.swap_bytes, Int32) varnames[ascii(read_element(f, matfile.swap_bytes, Uint8))] = offset if dtype == miCOMPRESSED From 0fe0154fa20e5d65c5febf4186bbc7eb8585b808 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 16 Mar 2014 22:39:21 -0400 Subject: [PATCH 10/10] Support empty and multidimensional object arrays Extend the test to contain empty, 2d- and 3d- array test cases --- src/MAT_v5.jl | 35 +++++++++++++++++++++++++++-------- test/read.jl | 22 ++++++++++++++++++++++ test/v6/simpleclass.mat | Bin 1640 -> 4288 bytes test/v7.3/simpleclass.mat | Bin 13824 -> 25512 bytes test/v7/simpleclass.mat | Bin 573 -> 1059 bytes 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 56162f1..7b90ee4 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -298,16 +298,35 @@ function read_opaque(matfile::Matlabv5File) # The subsystem contains a FileWrapper__; an opaque object unlike all others class_name == "FileWrapper__" && return read_matrix(matfile)[2] - # This is an unnamed array of uint8s: indexes into the subsystem - ids = vec(read_matrix(matfile)[2]) + # This is an unnamed array of uint32s: indexes into the subsystem + ids = read_matrix(matfile)[2] ids[1] == 0xdd00_0000 || error("unknown opaque sentinal value: ", idxs[1]) - ids[2] == 2 && ids[3] == ids[4] == 1 || error("unexpected object id values") - object_id = ids[5] - class_id = ids[6] - - obj = matfile.subsystem["_objects"][object_id] + ndims = ids[2] + dims = ids[3:2+ndims] + n_el = isempty(dims) ? 0 : prod(dims) + object_ids = ids[3+ndims:2+ndims+n_el] + class_id = ids[3+ndims+n_el] + @assert 3+ndims+n_el == length(ids) "unexpected data at end of opaque id" + + # Return like a struct or struct array + obj = Dict{ASCIIString,Any}() obj["_class"] = matfile.subsystem["_classes"][class_id] - + if n_el == 1 + obj = merge!(obj,matfile.subsystem["_objects"][object_ids[1]]) + elseif n_el > 1 + # Multiple objects as a dict of arrays, use the first as a template? + fields = keys(matfile.subsystem["_objects"][object_ids[1]]) + for fld in fields + obj[fld] = cell(dims...) + end + for i = 1:n_el + sys_obj = matfile.subsystem["_objects"][object_ids[i]] + for fld in fields + obj[fld][i] = sys_obj[fld] + end + end + end + return obj end diff --git a/test/read.jl b/test/read.jl index 5e9bcd2..4337aa5 100644 --- a/test/read.jl +++ b/test/read.jl @@ -149,8 +149,30 @@ for format in ["v6", "v7", "v7.3"] "array_field_2" => [0.0 1.0 2.0 3.0 4.0 5.0], "cell_field_3" => {1.0 "one" 2.0 "two"}, "_class" => ("", "SimpleClass") + ], + "empty" => (ASCIIString=>Any)[ + "_class" => ("", "SimpleClass") + ], + "array" => (ASCIIString=>Any)[ + "char_field_1" => {"one" "two" "three"}, + "array_field_2" => {[1.0 0.0] [2.0 2.0] [3.0 3.0 3.0]}, + "cell_field_3" => {{1.0 0.0} {2.0 2.0} {3.0 3.0 3.0}}, + "_class" => ("", "SimpleClass") + ], + "threed" => (ASCIIString=>Any)[ + "char_field_1" => cell(1,1,2), + "array_field_2" => cell(1,1,2), + "cell_field_3" => cell(1,1,2), + "_class" => ("", "SimpleClass") ] } + result["threed"]["char_field_1"][1] = "one one one" + result["threed"]["array_field_2"][1] = [1.0 1.0 1.0] + result["threed"]["cell_field_3"][1] = {1.0 1.0 1.0} + result["threed"]["char_field_1"][2] = "one one two" + result["threed"]["array_field_2"][2] = [1.0 1.0 2.0] + result["threed"]["cell_field_3"][2] = {1.0 1.0 2.0} + check("simpleclass.mat", result) end diff --git a/test/v6/simpleclass.mat b/test/v6/simpleclass.mat index 482e4fd99a145dcc01c7ff82317fbe03d1409116..2232554325edf0b000b75230ce1c3fdfb8b5c29c 100644 GIT binary patch literal 4288 zcmchaPfrt35WpV_TK|HP7!Mxy=m8BaNK7vjn;1xJB!NV4Wka{2$pXz%quw}j^ytx} zAHk2|7x2!JkKp`vJMMJ5i=}NCGQ9WZ|C={6Z5tcA?>63euU;;Djg8$G?M@ghdE24i zYad64RiV|l)>fCidKCD*pyeGORlS{0M_$8^yz-h?sZ>|is^t~0vRq#EMuqbmE`gb? z#zWbi3EmPc2|f}8X5{ZAUT0mS{(fgh_V)yLI)~jbsE7UuT<6f=6jc2O5C!H0u?HPp z5a9eY-nl=7p*&r&_f1gkrST*-gTrp`Ypgd}T*Rk7WgGijWaE;!l5H9{aeWsoip!58 zf7H0Pjh$@1$XPZnPiO-F@^w=VH|J}ZDeXD2nvnfp1T4N}y&#yn`qP{Yu%^Xq;ZhpjL8Hb1z3 zD-HP}^1Iz2YBuGJ{uiUK@oF58V;ku`knc*cMfa`=&&JKYpt16{dxEEZd@h0|VZc|| z55ln7?gU}0xiZk4`v-p1XOvxBo&8fqRfkq%c8I40y{|pn_?3<=Fe^`uXq~7mD|ZF6 z^5hlF(#a>NpfRYN^#e_9YfR8oM&q$Ds88hZqq3ZD3#vWauhg`ayKCf_o#=v6J_B;p zncAaXRBm4O;{tX7R_aFU58Hs&D`)i$9r@RSDz7=V@){rV+?gCCaIv1%$2MT~HQ&(T zhg7wUZZz?K|?<{~jj0QceB|q$~E$1=XJVFY4pT>aR4^ zw|Vgw=W#mvHh+2Vpfj9I!G9u{^u3o^Cw&i{1L=Cb?=k5-n2!E5-owrpYcH?9iu)sd z2i{&FuGj19kLl!d{Ll5|zNgZFMe{&sv(Aa^Jb(|_`N3J`=cJ=-f1|%({N?FedGyhN z_Dpc#>u=8YUu11Pz7>F1fw%iJ^j#fuYI7K;?-EY)k@-6U*i?1+Y$j$0Vnuz{z>% diff --git a/test/v7.3/simpleclass.mat b/test/v7.3/simpleclass.mat index 0bba86c494fa10a46291ebb24e806187a5fb8a53..fa64bf26c22fb2d76267aaf1dbee6df2ec7f0c74 100644 GIT binary patch literal 25512 zcmeHPJyaaY5$*+{TXX1cCCfHG|Fxg&^V&}*fj~0GKQ{TSeC_bUVgkR zKA$5(Ia8>_?-3FzBvhzSA&K83R7j}o+|^az{Ot}qJFFNurg=M6U0wZGU0q$>J;T(* zlSdPG^{;P?=&6Y(pDeA^s@L^&t=w2zudj|9+T_DKqu2Fhy;^Qm7xntuxSrWs(^KWT z9=@Y*-5MXcGd?_`Zw(EP>e9UjxAkUaxw=}`!#9S8^bOrID1@GeQ$RDAjs*T=2JKS+ zNBy?;d|q8p1#UMq#xw{SJXaDZ-Jh7I4$n%j9f7Yz&m^ig1*xuXG`=!;xn3_5Y%JHS)kUSQZEh`W zezn=Cu3od8*Xq@!O@l&zp{G)eq32nDg&`-JoOAV?IWJ0vUZdZ{O0$SnR@;<5DW@mb zV@t{_ruTAr|D)n4!T+O-S4=(%ME~#H$1?+JOyZYJN?sYGZS414rB>dwUH`U8eP8&A zab{(8qgI_nvKmkhkxhBuF}Rz5ec_90r7^cyt*kFrDZOB-^|znur_js5Jh@$n&V2s( zUQS(<)+x15jW7ZEOl3Cc4;U61sKIYB0zOCL^O7Iqu?GQ5zAzB0rJnVLQ-X1{T6-M{ zGDu7deDhw*KR5Btz`%~X;GZ0d5Q6kFDKT_ zUZtrbcV3dZ&>SU5^8)&MdgX?mo5tGqvK@QZ@*??=Z0JYwB0Z(4d-mkj+HVQlr*?kJ z;T6+)6N00`wCwrKmq6HWfi>Hf^$il3ntVJ%IOa(S2?=4&SSl(D>t{jy?&q@pqI}lh ze5Btk+0Jh?HW*wLxqdH&x5JOZOtx(9<0yZ?=WkUT{^0m2)J9sE8( zZi*ZVPA=oP*Ep22D4`-~<~R@xs@{6Oq9 zoyQQ}Pcu;y-yvYid9JptOYh}*O@*4U^CO1)KW2FNQ-;pZ8Seao;mab!*-scgyTNep zCd1cv7!Hmxd^*k$V}>z#ahK`(Gluj$YCc5mUoV+_B1ZMV97z7PUf$TK*5~F79qZVb z^tnWp*q{X-4OrjFDsU)WKAJyEZ7qRrog=_LJF*{-nm3LZYQLRk9pRB|w$mk>qgOdS zgwURC-u$-ZS2UUPCIRIdvZ_>TwYjC0YHe|DByez2SuWQlVVF;pBzB9kwG7hSmT4eD z$Nh1lh3Qll{lat-8TMNSbp!R)zehy^?ZxO9E{FDK{^J$20et>?h=#J!CZd4{?eVt( zbrK)d173>LnvL>kB#A~p@*VtqbU+P0oNo~y9V_|y$aa8_Oeo5uHS##2*^a^Qi9Uxz zzp37>m9++$&rbcN1-H@K#g0O9Msn0=O%D^o^ND0bL6VK^*gs0X4i#;A_Ppf#7FB~E zF`WJZ!{r|`q-PR2Vmv87%~67#%CSP3Y=GtG80=3zLSB#)Af49|ep!DiYlx#p-&dnw zlM+&Fwa33YlT2AWG5+v^+>www#RRm z$u95-_)+q>sMgh*GF`ACi97;&VN5g?{df-$!Z1>j)8h>Q+2wfd>CD{=7J@ zf9TDV!Znsq@n@c-j%}wC`h#D%O@Lp<1lZe?Tffuw2mkKM0hYie1dx-L3s;{+Kf2-9 z*iD9QLOZ$eek)2}y5T3EX1@mOh9l>du&1!sCD9_BK?an{7m;O0;f&Ik6hLPs23rzH z^hM?c_uCp@@|i&~@8a9*PWPv*-<}`C`w1f;hlG^Y{9B{SZHPDj;>)Ld>)F9CxrJ|k zB~3LTiM#^)j)dzO_C0)4;I#+(6J?*^mjQO}2N!H*-uXtldb8nny`A2$9-J4s{3E7Z zV7HfrA9qD=_7DPj<1Q(b>_-13819>IIf!af1-wZLz|dJOvr`wKnCIHTQL(vE)${8Aw7 zCcVQ>;ol6@guuH3Ck5UUcwgWHfw+=~KmJhYe-!v9fsX{jE)^vn1z_h+ZXM-$nncgK zF+VX*?dB)e(WfFfzmM2Tmd}KUcQ9J<5be1n;2n)9z8y)xI~-Abr=5P@^b7O9oqnA+ zxxgOAl?&ce4EtU5?f?bYxA^y#+UDz3-<5ky9fCIb%==+jZ=hfC{fp!JVYojD!nEu> z*4uXa#&-Uq+U4PQeItoBffqu7@6^N~ggro?Z`F$Tr4aFsUMn7=y^;jH!xzQ(S`zS% zUld=r>1q12c;^-GwEEv!O`?C@@Mq!YrGK82?)?P(+;;O2_Uh#P&~)Vjzdw@;TOxd4 zfPGM0xtye*aGLLNI(?rG`%KKc^!x{7{Dd9b z_Y}IB$xv|bafktF$z@t64sRX@0ug*#`26qiN3fQ zLOuTb^gK>6hneGdo2!PL&AqX!T#(?1zIydtQo$8 z3zp)=3WDMrI4>oLLHqG-Trky(X`u2g1ETL5V*X+M?dEr?kg>lHN%Y9?FKssJTNQJ8 z(W&2vdj|h^t-km4@zJV%tCri%nt(n4w)?*e>T~Wb^eFpo-9P%@d>r57!tV{+?S2sf z^eO&1FzoYHRWbMPWpe{RrK6EMvk(Bq-)SKoKrj!`N8gET@33f*<1?9-?^X8Bi<|jr-4hOC(hZ~ z4>Rk!!)XP9S1mm5IuYkPwWH+I&OgKdi3s}--grcRpV^J|4AWph{xe-pePGtE0{$t8Cad@B)l#fFA`wRG^ zR1Ssk_ZUo7>cxk^GMGsqlRzecOahq%G6`f7$Rvz)JTgPduoCLk&DS4;C z+m6i{7t)a!1{D@dyex555d@QbrQ^?iyAn6M;J9T)gBCmf?*BR_auRNank?FO{F=$MCFtZ&+mEjhHoMr_|sv)bq=fX&V5A-~C z!-G(A%9Odg5kp)u9?ao*0E$xFATIEl)XrI747Qh0R)v@r<_X>JDPRWgd!|UkUn`Gc z$c7EvgikbXe-CL0K89nd@9k4A)I`Mg-1c0s=bD^cjeUDI5s=6RtVeXK;sYb#BqwtIwa^$Lq3GVA(kv=g@*DN%xxzSrE1={t}=C)&$qb1{d-cR{;$ zYs``0-?9n$UAC;ICN5m+cVt?Pmf%3A(DVeml(y^)tinT)Lk6-lSvVM=0cIcOV^_9r z4xZDvRiV(z7#^p;O{+W$xyw`$@5Jlmub&ooqE@esHijrh@N+t@o- zL!zaA;W#%`CEkr$<(qIipnB9Hv7-gGnWSdw0#RYmUo9?v-Nh7x{Gb8N>+8JGz2p?{>=7i=feTrOOKtMi2* R8WuFq+|S+N`m1?`{Q`DA)ZYLA diff --git a/test/v7/simpleclass.mat b/test/v7/simpleclass.mat index 39bca54c12c0563075530c4067205162b74ec4d3..0ec60b2bf22496abdc563317de72f809a7644816 100644 GIT binary patch delta 879 zcmdnXvY2Cn9j}p*m9d$Xp|OIIfuYI7K;?-EY)royCzhqwhchrRRLpstoRBbs;Yg0d z8O9?`JOT$ej;wK*Gb=hEi1XQIgCADE*_)5-Y_za2-maj2)lz-q4v97M_&aPToO#$f z>oC{T%}!@iTDqFAbUMr^pD7{0pvKRzKY~k?A82$8!gxkB<0C?*L~z#QF`pS`ez60@ zd`*z~3m%^|dBSV9PZLWP<_| zD3p$dZ}dBCAYeP&WJ&8ZB?%yq1167ll%x8svFZ2FsDn)J9UpRrf@nQ8wt zU*hwGADZXu^XI33lu5p1HHmrQ`W3sGo)ns{d#cs5=;B$Go^|f*26OKCE#4r{%Fkb5 z%9kW_qgkP^S^Ud4MgRJWcOF`29vOSjSv>2bYKZZ>1+zY~t&%Jc{QS)Ew?WYC$u}l7 zE-uaxn3Mfr8fV|Ty~`T21@CYjTr0K1>|iNJ9`7md_8Yp>_Qz|QOVx8|-rM>>^Sp15 z_D{P>KW!(ykNEWRS9xoBy?EFg?z)-#`u)ELr0)AS=jWxr_kD_j=Y5&H=5syc(bvwe zuWYmvty4J%KaKu&f7aITHJ49S z#Rr^`UmO1R@7}IIS0}%BHvF$W;c5J@M?LQsoU72-&-ZK3^0JMu1HRt54wU>^I^n*+ zS?Ai<4BU47|M&8}s-4gMj`_|B^WQh$-SwNW^6{Tj-y;g`j6Z#q&4|u;CY8PO_`lTn ztqQx>N6p;z(nkOJhgVX2z8#oS60f;<|L)i=vpN6gFX?}4|Dx&3!xxwTEA0Be`VYJ3 KPex^xyy*ao#iOqP delta 389 zcmV;00eb$U2)zW5J`FK9Ix{yqG&UeIFflZdQ6rIH1_IXrk#=Q&UI73Ac$}?Nu};G< z5Iwt2lp+xoiGhKEPbjc-YotmDhKivBY-v=C8p+VeMFKWPK8MfX6Bzk~egbxU!A@`j z0-kibJo}x`IiDQ^z$gF+m5-G5!5~0Rv=5BepvZxoO*M~|Et!p{p7_elI4kD}YNnI( zS+&lAv(66c$){(3rB?H8Whc4d7cwgfF4Gj_-+MOR<};i%_M!C=Y5(uHhMwXW#kJ!>+{dL2h#^`S@a=I!IS!Dg)EhfX~6#P^)|fnuv)L(Jq5C{SsGr9Wn%Io- jBQY*QKR~}*p4&5YzgL$|9*W$3uTJ5A-Ybm{C?Y@#vkt*i