99using ThermoFisher . CommonCore . Data . Business ;
1010using ThermoFisher . CommonCore . Data . FilterEnums ;
1111using ThermoFisher . CommonCore . Data . Interfaces ;
12+ using ThermoRawFileParser . Writer . MzML ;
1213
1314namespace ThermoRawFileParser . Writer
1415{
@@ -60,9 +61,6 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
6061 throw new RawFileParserException ( "No MS data in RAW file, no output will be produced" ) ;
6162 }
6263
63- //TODO: Correct iterator based on MS-level filter
64- var enumerator = raw . GetFilteredScanEnumerator ( " " ) ;
65-
6664 ConfigureWriter ( ".mzparquet" ) ;
6765
6866 ParquetSerializerOptions opts = new ParquetSerializerOptions ( ) ;
@@ -71,195 +69,225 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
7169
7270 var data = new List < MzParquet > ( ) ;
7371
74- foreach ( var scanNumber in enumerator )
75- {
76- var scanFilter = raw . GetFilterForScanNumber ( scanNumber ) ;
77-
78- // Get the scan event for this scan number
79- var scanEvent = raw . GetScanEventForScanNumber ( scanNumber ) ;
80-
81- // Get scan ms level
82- var msLevel = ( int ) scanFilter . MSOrder ;
72+ var lastScanProgress = 0 ;
8373
84- // Get Scan
85- var scan = Scan . FromFile ( raw , scanNumber ) ;
74+ Log . Info ( String . Format ( "Processing {0} MS scans" , + ( 1 + lastScanNumber - firstScanNumber ) ) ) ;
8675
87- ScanTrailer trailerData ;
76+ for ( var scanNumber = firstScanNumber ; scanNumber <= lastScanNumber ; scanNumber ++ )
77+ {
78+ if ( ParseInput . LogFormat == LogFormat . DEFAULT )
79+ {
80+ var scanProgress = ( int ) ( ( double ) scanNumber / ( lastScanNumber - firstScanNumber + 1 ) * 100 ) ;
81+ if ( scanProgress % ProgressPercentageStep == 0 )
82+ {
83+ if ( scanProgress != lastScanProgress )
84+ {
85+ Console . Write ( "" + scanProgress + "% " ) ;
86+ lastScanProgress = scanProgress ;
87+ }
88+ }
89+ }
8890
8991 try
9092 {
91- trailerData = new ScanTrailer ( raw . GetTrailerExtraInformation ( scanNumber ) ) ;
93+ int level = ( int ) raw . GetScanEventForScanNumber ( scanNumber ) . MSOrder ; //applying MS level filter
94+ if ( ParseInput . MsLevel . Contains ( level ) )
95+ AddScan ( raw , scanNumber , data ) ;
9296 }
9397 catch ( Exception ex )
9498 {
95- Log . WarnFormat ( "Cannot load trailer infromation for scan {0} due to following exception\n {1}" , scanNumber , ex . Message ) ;
96- ParseInput . NewWarn ( ) ;
97- trailerData = new ScanTrailer ( ) ;
99+ Log . Error ( $ "Scan #{ scanNumber } cannot be processed because of the following exception: { ex . Message } \n { ex . StackTrace } ") ;
100+ ParseInput . NewError ( ) ;
98101 }
99102
100- int ? trailer_charge = trailerData . AsPositiveInt ( "Charge State:" ) ;
101- double ? trailer_mz = trailerData . AsDouble ( "Monoisotopic M/Z:" ) ;
102- double ? trailer_isolationWidth = trailerData . AsDouble ( "MS" + msLevel + " Isolation Width:" ) ;
103- double ? FAIMSCV = null ;
104- if ( trailerData . AsBool ( "FAIMS Voltage On:" ) . GetValueOrDefault ( false ) )
105- FAIMSCV = trailerData . AsDouble ( "FAIMS CV:" ) ;
106-
107- double rt = raw . RetentionTimeFromScanNumber ( scanNumber ) ;
108- int precursor_scan = 0 ;
109- PrecursorData precursor_data = new PrecursorData
110- {
111- isolation_lower = null ,
112- isolation_upper = null ,
113- mz = null
114-
115- } ;
116-
117- if ( msLevel == 1 )
103+ // If we have enough ions to write a row group, do so
104+ // - some row groups might have more than this number of ions
105+ // but this ensures that all ions from a single scan are always
106+ // present in the same row group (critical property of mzparquet)
107+ if ( data . Count >= 1_048_576 )
118108 {
119- // Keep track of scan number for precursor reference
120- _precursorScanNumbers [ "" ] = scanNumber ;
121- _precursorTree [ scanNumber ] = new PrecursorInfo ( ) ;
109+ var task = ParquetSerializer . SerializeAsync ( data , Writer . BaseStream , opts ) ;
110+ task . Wait ( ) ;
111+ opts . Append = true ;
112+ data . Clear ( ) ;
113+ Log . Debug ( "Writing next row group" ) ;
122114 }
123- else if ( msLevel > 1 )
124- {
125- // Keep track of scan number and isolation m/z for precursor reference
126- var result = _filterStringIsolationMzPattern . Match ( scanEvent . ToString ( ) ) ;
127- if ( result . Success )
128- {
129- if ( _precursorScanNumbers . ContainsKey ( result . Groups [ 1 ] . Value ) )
130- {
131- _precursorScanNumbers . Remove ( result . Groups [ 1 ] . Value ) ;
132- }
115+ }
133116
134- _precursorScanNumbers . Add ( result . Groups [ 1 ] . Value , scanNumber ) ;
135- }
117+ // serialize any remaining ions into the final row group
118+ if ( data . Count > 0 )
119+ {
120+ var task = ParquetSerializer . SerializeAsync ( data , Writer . BaseStream , opts ) ;
121+ task . Wait ( ) ;
122+ Log . Debug ( "Writing final row group" ) ;
123+ }
124+ }
136125
137- //update precursor scan if it is provided in trailer data
138- var trailerMasterScan = trailerData . AsPositiveInt ( "Master Scan Number:" ) ;
139- if ( trailerMasterScan . HasValue )
140- {
141- precursor_scan = trailerMasterScan . Value ;
142- }
143- else //try getting it from the scan filter
144- {
145- precursor_scan = GetParentFromScanString ( result . Groups [ 1 ] . Value ) ;
146- }
126+ private void AddScan ( IRawDataPlus raw , int scanNumber , List < MzParquet > data )
127+ {
128+ var scanFilter = raw . GetFilterForScanNumber ( scanNumber ) ;
147129
148- //finding precursor scan failed
149- if ( precursor_scan == - 2 )
150- {
151- Log . Warn ( $ "Cannot find precursor scan for scan# { scanNumber } ") ;
152- _precursorTree [ - 2 ] = new PrecursorInfo ( 0 , msLevel , FindLastReaction ( scanEvent , msLevel ) , null ) ;
153- }
130+ // Get the scan event for this scan number
131+ var scanEvent = raw . GetScanEventForScanNumber ( scanNumber ) ;
154132
155- //Parsing the last reaction
156- try
157- {
158- try //since there is no direct way to get the number of reactions available, it is necessary to try and fail
159- {
160- scanEvent . GetReaction ( _precursorTree [ precursor_scan ] . ReactionCount ) ;
161- }
162- catch ( ArgumentOutOfRangeException ex )
163- {
164- Log . Debug ( $ "Using Tribrid decision tree fix for scan# { scanNumber } ") ;
165- //Is it a decision tree scheduled scan on tribrid?
166- if ( msLevel == _precursorTree [ precursor_scan ] . MSLevel )
167- {
168- precursor_scan = GetParentFromScanString ( result . Groups [ 1 ] . Value ) ;
169- }
170- else
171- {
172- throw new RawFileParserException (
173- $ "Tribrid decision tree fix failed - cannot get reaction# { _precursorTree [ precursor_scan ] . ReactionCount } from { scanEvent . ToString ( ) } ",
174- ex ) ;
175- }
176- }
133+ // Get scan ms level
134+ var msLevel = ( int ) scanFilter . MSOrder ;
177135
178- // Get Precursor m/z and isolation window borders
179- precursor_data = GetPrecursorData ( precursor_scan , scanEvent , trailer_mz , trailer_isolationWidth , out var reactionCount ) ;
136+ // Get Scan
137+ var scan = Scan . FromFile ( raw , scanNumber ) ;
138+ ScanTrailer trailerData ;
180139
181- //save precursor information for later reference
182- _precursorTree [ scanNumber ] = new PrecursorInfo ( precursor_scan , msLevel , reactionCount , null ) ;
183- }
184- catch ( Exception e )
185- {
186- var extra = ( e . InnerException is null ) ? "" : $ "\n { e . InnerException . StackTrace } ";
140+ try
141+ {
142+ trailerData = new ScanTrailer ( raw . GetTrailerExtraInformation ( scanNumber ) ) ;
143+ }
144+ catch ( Exception ex )
145+ {
146+ Log . WarnFormat ( "Cannot load trailer infromation for scan {0} due to following exception\n {1}" , scanNumber , ex . Message ) ;
147+ ParseInput . NewWarn ( ) ;
148+ trailerData = new ScanTrailer ( ) ;
149+ }
150+
151+ int ? trailer_charge = trailerData . AsPositiveInt ( "Charge State:" ) ;
152+ double ? trailer_mz = trailerData . AsDouble ( "Monoisotopic M/Z:" ) ;
153+ double ? trailer_isolationWidth = trailerData . AsDouble ( "MS" + msLevel + " Isolation Width:" ) ;
154+ double ? FAIMSCV = null ;
155+ if ( trailerData . AsBool ( "FAIMS Voltage On:" ) . GetValueOrDefault ( false ) )
156+ FAIMSCV = trailerData . AsDouble ( "FAIMS CV:" ) ;
187157
188- Log . Warn ( $ "Could not get precursor data for scan# { scanNumber } - precursor information for this and dependent scans will be empty\n Exception details:{ e . Message } \n { e . StackTrace } \n { extra } ") ;
189- ParseInput . NewWarn ( ) ;
158+ double rt = raw . RetentionTimeFromScanNumber ( scanNumber ) ;
159+ int precursor_scan = 0 ;
160+ PrecursorData precursor_data = new PrecursorData
161+ {
162+ isolation_lower = null ,
163+ isolation_upper = null ,
164+ mz = null
165+
166+ } ;
190167
191- _precursorTree [ scanNumber ] = new PrecursorInfo ( precursor_scan , 1 , 0 , null ) ;
168+ if ( msLevel == 1 )
169+ {
170+ // Keep track of scan number for precursor reference
171+ _precursorScanNumbers [ "" ] = scanNumber ;
172+ _precursorTree [ scanNumber ] = new PrecursorInfo ( ) ;
173+ }
174+ else if ( msLevel > 1 )
175+ {
176+ // Keep track of scan number and isolation m/z for precursor reference
177+ var result = _filterStringIsolationMzPattern . Match ( scanEvent . ToString ( ) ) ;
178+ if ( result . Success )
179+ {
180+ if ( _precursorScanNumbers . ContainsKey ( result . Groups [ 1 ] . Value ) )
181+ {
182+ _precursorScanNumbers . Remove ( result . Groups [ 1 ] . Value ) ;
192183 }
193184
185+ _precursorScanNumbers . Add ( result . Groups [ 1 ] . Value , scanNumber ) ;
194186 }
195187
196- double [ ] masses ;
197- double [ ] intensities ;
188+ //update precursor scan if it is provided in trailer data
189+ var trailerMasterScan = trailerData . AsPositiveInt ( "Master Scan Number:" ) ;
190+ if ( trailerMasterScan . HasValue )
191+ {
192+ precursor_scan = trailerMasterScan . Value ;
193+ }
194+ else //try getting it from the scan filter
195+ {
196+ precursor_scan = GetParentFromScanString ( result . Groups [ 1 ] . Value ) ;
197+ }
198198
199- if ( ! ParseInput . NoPeakPicking . Contains ( msLevel ) )
199+ //finding precursor scan failed
200+ if ( precursor_scan == - 2 )
200201 {
201- // Check if the scan has a centroid stream
202- if ( scan . HasCentroidStream )
202+ Log . Warn ( $ "Cannot find precursor scan for scan# { scanNumber } ") ;
203+ _precursorTree [ - 2 ] = new PrecursorInfo ( 0 , msLevel , FindLastReaction ( scanEvent , msLevel ) , null ) ;
204+ }
205+
206+ //Parsing the last reaction
207+ try
208+ {
209+ try //since there is no direct way to get the number of reactions available, it is necessary to try and fail
203210 {
204- masses = scan . CentroidScan . Masses ;
205- intensities = scan . CentroidScan . Intensities ;
211+ scanEvent . GetReaction ( _precursorTree [ precursor_scan ] . ReactionCount ) ;
206212 }
207- else // otherwise take the segmented (low res) scan
213+ catch ( ArgumentOutOfRangeException ex )
208214 {
209- // If the spectrum is profile perform centroiding
210- var segmentedScan = scanEvent . ScanData == ScanDataType . Profile
211- ? Scan . ToCentroid ( scan ) . SegmentedScan
212- : scan . SegmentedScan ;
213-
214- masses = segmentedScan . Positions ;
215- intensities = segmentedScan . Intensities ;
215+ Log . Debug ( $ "Using Tribrid decision tree fix for scan# { scanNumber } ") ;
216+ //Is it a decision tree scheduled scan on tribrid?
217+ if ( msLevel == _precursorTree [ precursor_scan ] . MSLevel )
218+ {
219+ precursor_scan = GetParentFromScanString ( result . Groups [ 1 ] . Value ) ;
220+ }
221+ else
222+ {
223+ throw new RawFileParserException (
224+ $ "Tribrid decision tree fix failed - cannot get reaction# { _precursorTree [ precursor_scan ] . ReactionCount } from { scanEvent . ToString ( ) } ",
225+ ex ) ;
226+ }
216227 }
228+
229+ // Get Precursor m/z and isolation window borders
230+ precursor_data = GetPrecursorData ( precursor_scan , scanEvent , trailer_mz , trailer_isolationWidth , out var reactionCount ) ;
231+
232+ //save precursor information for later reference
233+ _precursorTree [ scanNumber ] = new PrecursorInfo ( precursor_scan , msLevel , reactionCount , null ) ;
217234 }
218- else // use the segmented data as is
235+ catch ( Exception e )
219236 {
220- masses = scan . SegmentedScan . Positions ;
221- intensities = scan . SegmentedScan . Intensities ;
222- }
237+ var extra = ( e . InnerException is null ) ? "" : $ "\n { e . InnerException . StackTrace } ";
223238
224- // Add a row to parquet file for every m/z value in this scan
225- for ( int i = 0 ; i < masses . Length ; i ++ )
226- {
227- MzParquet m ;
228- m . rt = ( float ) rt ;
229- m . scan = ( uint ) scanNumber ;
230- m . level = ( uint ) msLevel ;
231- m . intensity = ( float ) intensities [ i ] ;
232- m . mz = ( float ) masses [ i ] ;
233- m . isolation_lower = precursor_data . isolation_lower ;
234- m . isolation_upper = precursor_data . isolation_upper ;
235- m . precursor_scan = precursor_scan ;
236- m . precursor_mz = precursor_data . mz ;
237- m . precursor_charge = ( uint ? ) trailer_charge ;
238- m . ion_mobility = ( float ? ) FAIMSCV ;
239- data . Add ( m ) ;
239+ Log . Warn ( $ "Could not get precursor data for scan# { scanNumber } - precursor information for this and dependent scans will be empty\n Exception details:{ e . Message } \n { e . StackTrace } \n { extra } ") ;
240+ ParseInput . NewWarn ( ) ;
241+
242+ _precursorTree [ scanNumber ] = new PrecursorInfo ( precursor_scan , 1 , 0 , null ) ;
240243 }
241244
242- // If we have enough ions to write a row group, do so
243- // - some row groups might have more than this number of ions
244- // but this ensures that all ions from a single scan are always
245- // present in the same row group (critical property of mzparquet)
246- if ( data . Count >= 1_048_576 )
245+ }
246+
247+ double [ ] masses ;
248+ double [ ] intensities ;
249+
250+ if ( ! ParseInput . NoPeakPicking . Contains ( msLevel ) )
251+ {
252+ // Check if the scan has a centroid stream
253+ if ( scan . HasCentroidStream )
247254 {
248- var task = ParquetSerializer . SerializeAsync ( data , Writer . BaseStream , opts ) ;
249- task . Wait ( ) ;
250- opts . Append = true ;
251- data . Clear ( ) ;
252- Log . Debug ( "Writing next row group" ) ;
255+ masses = scan . CentroidScan . Masses ;
256+ intensities = scan . CentroidScan . Intensities ;
253257 }
258+ else // otherwise take the segmented (low res) scan
259+ {
260+ // If the spectrum is profile perform centroiding
261+ var segmentedScan = scanEvent . ScanData == ScanDataType . Profile
262+ ? Scan . ToCentroid ( scan ) . SegmentedScan
263+ : scan . SegmentedScan ;
254264
265+ masses = segmentedScan . Positions ;
266+ intensities = segmentedScan . Intensities ;
267+ }
268+ }
269+ else // use the segmented data as is
270+ {
271+ masses = scan . SegmentedScan . Positions ;
272+ intensities = scan . SegmentedScan . Intensities ;
255273 }
256274
257- // serialize any remaining ions into the final row group
258- if ( data . Count > 0 )
275+ // Add a row to parquet file for every m/z value in this scan
276+ for ( int i = 0 ; i < masses . Length ; i ++ )
259277 {
260- var task = ParquetSerializer . SerializeAsync ( data , Writer . BaseStream , opts ) ;
261- task . Wait ( ) ;
262- Log . Debug ( "Writing final row group" ) ;
278+ MzParquet m ;
279+ m . rt = ( float ) rt ;
280+ m . scan = ( uint ) scanNumber ;
281+ m . level = ( uint ) msLevel ;
282+ m . intensity = ( float ) intensities [ i ] ;
283+ m . mz = ( float ) masses [ i ] ;
284+ m . isolation_lower = precursor_data . isolation_lower ;
285+ m . isolation_upper = precursor_data . isolation_upper ;
286+ m . precursor_scan = precursor_scan ;
287+ m . precursor_mz = precursor_data . mz ;
288+ m . precursor_charge = ( uint ? ) trailer_charge ;
289+ m . ion_mobility = ( float ? ) FAIMSCV ;
290+ data . Add ( m ) ;
263291 }
264292 }
265293
0 commit comments