55using System . Xml . Serialization ;
66using IO . Mgf ;
77using NUnit . Framework ;
8+ using Parquet ;
89using ThermoRawFileParser ;
910using ThermoRawFileParser . Writer . MzML ;
1011
@@ -283,7 +284,7 @@ public void TestMzML_MS2()
283284 }
284285
285286 [ Test ]
286- public void TestParquet ( )
287+ public void TestParquetCentroid ( )
287288 {
288289 // Get temp path for writing the test mzML
289290 var tempFilePath = Path . GetTempPath ( ) ;
@@ -294,17 +295,45 @@ public void TestParquet()
294295 RawFileParser . Parse ( parseInput ) ;
295296
296297 // Actual test
297- //var xmlSerializer = new XmlSerializer(typeof(mzMLType));
298- //var testMzMl = (mzMLType)xmlSerializer.Deserialize(new FileStream(
299- // Path.Combine(tempFilePath, "small.mzML"), FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
298+ var parquetFilePath = Path . Combine ( tempFilePath , "small.mzparquet" ) ;
300299
301- //Assert.That(testMzMl.run.spectrumList.count, Is.EqualTo("48"));
302- //Assert.That(testMzMl.run.spectrumList.spectrum.Length, Is.EqualTo(48));
300+ using ( var parquetReader = ParquetReader . CreateAsync ( parquetFilePath ) . Result )
301+ {
302+ var groupReader = parquetReader . OpenRowGroupReader ( 0 ) ;
303+ var schema = parquetReader . Schema ;
304+ var scanColumn = groupReader . ReadColumnAsync ( schema . FindDataField ( "scan" ) ) . Result ;
303305
304- //Assert.That(testMzMl.run.chromatogramList.count, Is.EqualTo("1"));
305- //Assert.That(testMzMl.run.chromatogramList.chromatogram.Length, Is.EqualTo(1));
306+ Assert . That ( scanColumn . NumValues , Is . EqualTo ( 48520 ) ) ;
307+ Assert . That ( scanColumn . Statistics . DistinctCount , Is . EqualTo ( 48 ) ) ;
308+ Assert . That ( ( from int p in scanColumn . Data where p == 22 select p ) . Count ( ) , Is . EqualTo ( 1632 ) ) ;
309+ }
310+ }
311+
312+ [ Test ]
313+ public void TestParquetProfile ( )
314+ {
315+ // Get temp path for writing the test mzML
316+ var tempFilePath = Path . GetTempPath ( ) ;
306317
307- //Assert.That(testMzMl.run.chromatogramList.chromatogram[0].defaultArrayLength, Is.EqualTo(48));
318+ var testRawFile = Path . Combine ( AppDomain . CurrentDomain . BaseDirectory , @"Data/small.RAW" ) ;
319+ var parseInput = new ParseInput ( testRawFile , null , tempFilePath , OutputFormat . Parquet ) ;
320+ parseInput . NoPeakPicking = new HashSet < int > { 1 , 2 } ;
321+
322+ RawFileParser . Parse ( parseInput ) ;
323+
324+ // Actual test
325+ var parquetFilePath = Path . Combine ( tempFilePath , "small.mzparquet" ) ;
326+
327+ using ( var parquetReader = ParquetReader . CreateAsync ( parquetFilePath ) . Result )
328+ {
329+ var groupReader = parquetReader . OpenRowGroupReader ( 0 ) ;
330+ var schema = parquetReader . Schema ;
331+ var scanColumn = groupReader . ReadColumnAsync ( schema . FindDataField ( "scan" ) ) . Result ;
332+
333+ Assert . That ( scanColumn . NumValues , Is . EqualTo ( 305213 ) ) ;
334+ Assert . That ( scanColumn . Statistics . DistinctCount , Is . EqualTo ( 48 ) ) ;
335+ Assert . That ( ( from int p in scanColumn . Data where p == 22 select p ) . Count ( ) , Is . EqualTo ( 17758 ) ) ;
336+ }
308337 }
309338 }
310339}
0 commit comments