@@ -141,51 +141,248 @@ internal static List<MatlabMatrix> ParseFile(Stream stream)
141141 internal static Matrix < T > ParseMatrix < T > ( byte [ ] data )
142142 where T : struct , IEquatable < T > , IFormattable
143143 {
144- using ( var stream = new MemoryStream ( data ) )
145- using ( var reader = new BinaryReader ( stream ) )
144+ Func < BinaryReader , ArrayClass , bool , int , int , Matrix < T > > parser = ( BinaryReader r , ArrayClass a , bool complex , int rows , int columns ) =>
146145 {
147- // Array Flags tag (8 bytes)
148- reader . BaseStream . Seek ( 8 , SeekOrigin . Current ) ;
146+ // Data
147+ switch ( a )
148+ {
149+ case ArrayClass . Sparse :
150+ return PopulateSparseMatrix < T > ( r , complex , rows , columns ) ;
151+ case ArrayClass . Function :
152+ case ArrayClass . Character :
153+ case ArrayClass . Object :
154+ case ArrayClass . Structure :
155+ case ArrayClass . Cell :
156+ case ArrayClass . Unknown :
157+ throw new NotSupportedException ( ) ;
158+ default :
159+ return PopulateDenseMatrix < T > ( r , complex , rows , columns ) ;
160+ }
161+ } ;
149162
150- // Array Flags data: flags (byte 3), class (byte 4) (8 bytes)
151- var arrayClass = ( ArrayClass ) reader . ReadByte ( ) ;
152- var flags = reader . ReadByte ( ) ;
153- var complex = ( flags & ( byte ) ArrayFlags . Complex ) == ( byte ) ArrayFlags . Complex ;
154- reader . BaseStream . Seek ( 6 , SeekOrigin . Current ) ;
163+ return ParseObject ( data , parser ) ;
164+ }
155165
156- // Dimensions Array tag (8 bytes)
157- reader . BaseStream . Seek ( 4 , SeekOrigin . Current ) ;
158- var numDimensions = reader . ReadInt32 ( ) / 8 ;
159- if ( numDimensions > 2 )
166+ /// <summary>
167+ /// For parsing nayhting that cannot be mapped to a MathNet.Numerics matrix
168+ /// </summary>
169+ /// <param name="data"></param>
170+ /// <returns></returns>
171+ /// <exception cref="NotSupportedException"></exception>
172+ internal static NestedObject ParseNonNumeric ( byte [ ] data )
173+ {
174+ Func < BinaryReader , ArrayClass , bool , int , int , NestedObject > parser = ( BinaryReader r , ArrayClass a , bool complex , int rows , int columns ) =>
175+ {
176+ // Data
177+ switch ( a )
160178 {
161- throw new NotSupportedException ( "Only 1 and 2 dimensional arrays are supported." ) ;
179+ case ArrayClass . Character :
180+ return PopulateCharacterMatrix ( r , rows , columns ) ;
181+ case ArrayClass . Structure :
182+ return PopulateStructure ( r ) ;
183+ case ArrayClass . Cell :
184+ return PopulateCellMatrix ( r , complex , rows , columns ) ;
185+ case ArrayClass . Unknown :
186+ throw new NotSupportedException ( ) ;
187+ default :
188+ throw new NotSupportedException ( ) ;
162189 }
190+ } ;
163191
164- // Dimensions Array data: row and column count (8 bytes)
165- var rows = reader . ReadInt32 ( ) ;
166- var columns = reader . ReadInt32 ( ) ;
192+ return ParseObject ( data , parser ) ;
193+ }
167194
168- // Array name
169- ReadElementTag ( reader , out _ , out var size , out var isSmallBlock ) ;
170- reader . BaseStream . Seek ( size , SeekOrigin . Current ) ;
171- SkipElementPadding ( reader , size , isSmallBlock ) ;
195+ private static NestedObject PopulateCharacterMatrix ( BinaryReader reader , int rows , int columns )
196+ {
197+ ReadElementTag ( reader , out var type , out var size , out var isSmallBlock ) ;
198+
199+ MatlabCharMatrix result ;
200+ Encoding encoding ;
201+
202+ switch ( type )
203+ {
204+ case DataType . Utf8 :
205+ encoding = Encoding . UTF8 ;
206+ break ;
207+ case DataType . Utf16 :
208+ encoding = Encoding . Unicode ;
209+ break ;
210+ case DataType . Utf32 :
211+ encoding = Encoding . UTF32 ;
212+ break ;
213+ default :
214+ throw new NotImplementedException ( $ "Could not parse char array due to unsupported encoding: { type } ") ;
215+ }
216+
217+ result = new MatlabCharMatrix ( rows , columns , encoding ) ;
218+
219+ for ( int col = 0 ; col < columns ; col ++ )
220+ {
221+ for ( int row = 0 ; row < rows ; row ++ )
222+ {
223+ byte [ ] newChar ;
224+ if ( encoding . IsSingleByte )
225+ {
226+ newChar = reader . ReadBytes ( 1 ) ;
227+ }
228+ else
229+ {
230+ newChar = reader . ReadBytes ( 2 ) ;
231+ }
232+
233+ result . Data [ row , col ] = encoding . GetString ( newChar ) ;
234+ }
235+ }
236+
237+ return new NestedObject ( result ) ;
238+ }
239+
240+ internal static T ParseObject < T > ( byte [ ] data , Func < BinaryReader , ArrayClass , bool , int , int , T > parser )
241+ {
242+ using ( var stream = new MemoryStream ( data ) )
243+ using ( var reader = new BinaryReader ( stream ) )
244+ {
245+ ( ArrayClass arrayClass ,
246+ bool complex ,
247+ int rows , int columns , _ ) = ParseObjectHeader ( reader ) ;
172248
173249 // Data
250+ return parser ( reader , arrayClass , complex , rows , columns ) ;
251+ }
252+ }
253+
254+ /// <summary>
255+ /// Reads the object header and skips any remaining padding
256+ /// </summary>
257+ /// <param name="reader"></param>
258+ /// <returns></returns>
259+ /// <exception cref="NotSupportedException"></exception>
260+ private static ( ArrayClass arrayClass , bool complex , int rows , int columns , string name ) ParseObjectHeader ( BinaryReader reader )
261+ {
262+ // Array Flags tag (8 bytes)
263+ reader . BaseStream . Seek ( 8 , SeekOrigin . Current ) ;
264+
265+ // Array Flags data: flags (byte 3), class (byte 4) (8 bytes)
266+ ArrayClass arrayClass = ( ArrayClass ) reader . ReadByte ( ) ;
267+ var flags = reader . ReadByte ( ) ;
268+ bool complex = ( flags & ( byte ) ArrayFlags . Complex ) == ( byte ) ArrayFlags . Complex ;
269+ reader . BaseStream . Seek ( 6 , SeekOrigin . Current ) ;
270+
271+ // Dimensions Array tag (8 bytes)
272+ reader . BaseStream . Seek ( 4 , SeekOrigin . Current ) ;
273+ var numDimensions = reader . ReadInt32 ( ) / 8 ;
274+ if ( numDimensions > 2 )
275+ {
276+ throw new NotSupportedException ( "Only 1 and 2 dimensional arrays are supported." ) ;
277+ }
278+
279+ // Dimensions Array data: row and column count (8 bytes)
280+ int rows = reader . ReadInt32 ( ) ;
281+ int columns = reader . ReadInt32 ( ) ;
282+
283+ // Array name
284+ ReadElementTag ( reader , out _ , out var size , out var isSmallBlock ) ;
285+ byte [ ] nameBytes = new byte [ size ] ;
286+ reader . BaseStream . Read ( nameBytes , 0 , size ) ;
287+ string name = Encoding . UTF8 . GetString ( nameBytes ) ;
288+
289+ SkipElementPadding ( reader , size , isSmallBlock ) ;
290+
291+ return ( arrayClass , complex , rows , columns , name ) ;
292+ }
293+
294+ private static NestedObject PopulateStructure ( BinaryReader reader )
295+ {
296+ // after the common fields for all arrays a structure has the length for the field names as a short data element
297+ // acording to the docs MATLAB always sets this to 32 bytes (31 chars + NULL) so we don't actually need to check it
298+ ReadElementTag ( reader , out _ , out _ , out _ ) ;
299+
300+ int nameLength = reader . ReadInt32 ( ) ;
301+
302+ // field names are saved as an miINT8 data element
303+ // each name is padded to align on 32 bytes and NULL terminated
304+ ReadElementTag ( reader , out _ , out var size , out var isSmallBlock ) ;
305+
306+ List < string > fieldNames = new List < string > ( ) ;
307+ int bytesRead = 0 ;
308+
309+ while ( bytesRead < size )
310+ {
311+ byte [ ] currentName = reader . ReadBytes ( nameLength ) ;
312+ fieldNames . Add ( Encoding . UTF8 . GetString ( currentName ) . TrimEnd ( ( char ) 0 ) ) ;
313+ bytesRead += nameLength ;
314+ }
315+
316+ SkipElementPadding ( reader , size , isSmallBlock ) ;
317+
318+ // each field of the structure could be any type supported by a matlab file
319+ MatlabStructure result = new MatlabStructure ( ) ;
320+
321+ for ( int i = 0 ; i < fieldNames . Count ; i ++ )
322+ {
323+ // to use the regular array parsing methods we need to know how much data to give them
324+ ReadElementTag ( reader , out _ , out var fieldSize , out _ ) ;
325+
326+ // we also need to know what the array class is (maybe a nested structure or a cell)
327+ ( ArrayClass arrayClass , _ , _ , _ , string name ) = ParseObjectHeader ( reader ) ;
328+
329+ // reset reader back to expected position for further parsers
330+ // the header has array flags (16 bytes), dimensions array (16 bytes) and array name (8 bytes)
331+ reader . BaseStream . Seek ( - 40 , SeekOrigin . Current ) ;
332+
333+ byte [ ] arrayData = reader . ReadBytes ( fieldSize ) ;
334+
174335 switch ( arrayClass )
175336 {
176- case ArrayClass . Sparse :
177- return PopulateSparseMatrix < T > ( reader , complex , rows , columns ) ;
178- case ArrayClass . Function :
179- case ArrayClass . Character :
180- case ArrayClass . Object :
181337 case ArrayClass . Structure :
182338 case ArrayClass . Cell :
183- case ArrayClass . Unknown :
184- throw new NotSupportedException ( ) ;
339+ case ArrayClass . Character :
340+ result . Add ( fieldNames [ i ] , ParseNonNumeric ( arrayData ) ) ;
341+ break ;
185342 default :
186- return PopulateDenseMatrix < T > ( reader , complex , rows , columns ) ;
343+ result . Add ( fieldNames [ i ] , new NestedObject ( new MatlabMatrix ( name , arrayData ) ) ) ;
344+ break ;
187345 }
188346 }
347+
348+ return new NestedObject ( result ) ;
349+ }
350+
351+ private static NestedObject PopulateCellMatrix ( BinaryReader reader , bool complex , int rows , int columns )
352+ {
353+ MatlabCellMatrix result = new MatlabCellMatrix ( rows , columns ) ;
354+
355+ for ( int col = 0 ; col < columns ; col ++ )
356+ {
357+ for ( int row = 0 ; row < rows ; row ++ )
358+ {
359+ // to use the regular array parsing methods we need to know how much data to give them
360+ ReadElementTag ( reader , out _ , out var fieldSize , out _ ) ;
361+
362+ // we also need to know what the array class is (maybe a nested structure or a cell)
363+ ( ArrayClass arrayClass , _ , _ , _ , string name ) = ParseObjectHeader ( reader ) ;
364+
365+ // reset reader back to expected position for further parsers
366+ // the header has array flags (16 bytes), dimensions array (16 bytes) and array name (8 bytes)
367+ reader . BaseStream . Seek ( - 40 , SeekOrigin . Current ) ;
368+
369+ byte [ ] arrayData = reader . ReadBytes ( fieldSize ) ;
370+
371+ switch ( arrayClass )
372+ {
373+ case ArrayClass . Structure :
374+ case ArrayClass . Cell :
375+ case ArrayClass . Character :
376+ result . Data [ row , col ] = ParseNonNumeric ( arrayData ) ;
377+ break ;
378+ default :
379+ result . Data [ row , col ] = new NestedObject ( new MatlabMatrix ( name , arrayData ) ) ;
380+ break ;
381+ }
382+ }
383+ }
384+
385+ return new NestedObject ( result ) ;
189386 }
190387
191388 /// <summary>
0 commit comments