Skip to content

Commit 653ca74

Browse files
MgfSpectrumWriter optimalization
1 parent 6ce17b6 commit 653ca74

File tree

5 files changed

+164
-145
lines changed

5 files changed

+164
-145
lines changed

MainClass.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,13 +430,13 @@ private static void RegularParametersParsing(string[] args)
430430
},
431431
{
432432
"L=|msLevel=",
433-
"Select MS levels (MS1, MS2, etc) included in the output, should be a comma-separated list of integers ( 1,2,3 ) and/or intervals ( 1-3 ), open-end intervals ( 1- ) are allowed",
433+
"Select MS levels (MS1, MS2, etc) included in the output, should be a comma-separated list of integers (1,2,3) and/or intervals (1-3), open-end intervals (1-) are allowed",
434434
v => parseInput.MsLevel = ParseMsLevel(v)
435435
},
436436
{
437437
"P|mgfPrecursor",
438438
"Include precursor scan number in MGF file TITLE",
439-
v => parseInput.MGFPrecursor = v != null
439+
v => parseInput.MgfPrecursor = v != null
440440
},
441441
{
442442
"u:|s3_url:",

ParseInput.cs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ namespace ThermoRawFileParser
77
{
88
public class ParseInput
99
{
10-
//all ms levels
11-
private readonly HashSet<int> allLevels = new HashSet<int>(new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
10+
// All MS levels
11+
private readonly HashSet<int> _allLevels = new HashSet<int>(new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
1212

1313
/// <summary>
1414
/// The RAW file path.
1515
/// </summary>
16-
private string rawFilePath;
16+
private string _rawFilePath;
1717

1818
/// <summary>
1919
/// The RAW folder path.
@@ -22,15 +22,15 @@ public class ParseInput
2222

2323
public string RawFilePath
2424
{
25-
get => rawFilePath;
25+
get => _rawFilePath;
2626
set
2727
{
28-
rawFilePath = value;
28+
_rawFilePath = value;
2929
if (value != null)
3030
{
3131
RawFileNameWithoutExtension = Path.GetFileNameWithoutExtension(value);
3232
var splittedPath = value.Split('/');
33-
rawFileName = splittedPath[splittedPath.Length - 1];
33+
_rawFileName = splittedPath[splittedPath.Length - 1];
3434
}
3535
}
3636
}
@@ -79,7 +79,7 @@ public string RawFilePath
7979

8080
public HashSet<int> MsLevel { get; set; }
8181

82-
public bool MGFPrecursor { get; set; }
82+
public bool MgfPrecursor { get; set; }
8383

8484
private S3Loader S3Loader { get; set; }
8585

@@ -94,7 +94,7 @@ public string RawFilePath
9494
/// <summary>
9595
/// The raw file name.
9696
/// </summary>
97-
private string rawFileName;
97+
private string _rawFileName;
9898

9999
/// <summary>
100100
/// The RAW file name without extension.
@@ -111,7 +111,8 @@ public ParseInput()
111111
LogFormat = LogFormat.DEFAULT;
112112
IgnoreInstrumentErrors = false;
113113
AllDetectors = false;
114-
MsLevel = allLevels;
114+
MsLevel = _allLevels;
115+
MgfPrecursor = false;
115116
}
116117

117118
public ParseInput(string rawFilePath, string rawDirectoryPath, string outputDirectory, OutputFormat outputFormat
@@ -121,6 +122,7 @@ public ParseInput(string rawFilePath, string rawDirectoryPath, string outputDire
121122
RawDirectoryPath = rawDirectoryPath;
122123
OutputDirectory = outputDirectory;
123124
OutputFormat = outputFormat;
125+
MgfPrecursor = true;
124126
}
125127

126128
public void InitializeS3Bucket()

Writer/MgfSpectrumWriter.cs

Lines changed: 87 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@ public class MgfSpectrumWriter : SpectrumWriter
2020
private const string PositivePolarity = "+";
2121
private const string NegativePolarity = "-";
2222

23-
//filter string
23+
// Filter string
2424
private const string FilterStringIsolationMzPattern = @"ms2 (.*?)@";
2525

26-
//precursor scan number for MS2 scans
26+
// Precursor scan number for MS2 scans
2727
private int _precursorMs1ScanNumber;
2828

29-
// Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MS3 spectrum
30-
private readonly LimitedSizeDictionary<string, int> _precursorMs2ScanNumbers = new LimitedSizeDictionary<string, int>(40);
31-
32-
// Precursor scan number for reference in the precursor element of an MS2 spectrum
29+
// Dictionary with isolation m/z (key) and precursor scan number (value) entries
30+
// for reference in the precursor element of an MS3 spectrum
31+
private readonly LimitedSizeDictionary<string, int> _isolationMzToPrecursorScanNumberMapping =
32+
new LimitedSizeDictionary<string, int>(40);
3333

3434
public MgfSpectrumWriter(ParseInput parseInput) : base(parseInput)
3535
{
36-
ParseInput.MsLevel.Remove(1); //MS1 spectra are not supposed to be in MGF
36+
ParseInput.MsLevel.Remove(1); // MS1 spectra are not supposed to be in MGF
3737
}
3838

3939
/// <inheritdoc />
@@ -49,7 +49,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
4949
{
5050
if (ParseInput.LogFormat == LogFormat.DEFAULT)
5151
{
52-
var scanProgress = (int)((double)scanNumber / (lastScanNumber - firstScanNumber + 1) * 100);
52+
var scanProgress = (int) ((double) scanNumber / (lastScanNumber - firstScanNumber + 1) * 100);
5353
if (scanProgress % ProgressPercentageStep == 0)
5454
{
5555
if (scanProgress != lastScanProgress)
@@ -60,79 +60,54 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
6060
}
6161
}
6262

63-
// Get each scan from the RAW file
63+
// Get the scan from the RAW file
6464
var scan = Scan.FromFile(rawFile, scanNumber);
6565

66-
// Check to see if the RAW file contains label (high-res) data and if it is present
67-
// then look for any data that is out of order
68-
var time = rawFile.RetentionTimeFromScanNumber(scanNumber);
66+
// Get the retention time
67+
var retentionTime = rawFile.RetentionTimeFromScanNumber(scanNumber);
6968

7069
// Get the scan filter for this scan number
7170
var scanFilter = rawFile.GetFilterForScanNumber(scanNumber);
7271

7372
// Get the scan event for this scan number
7473
var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber);
7574

76-
// precursor reference
77-
var spectrumRef = "";
78-
79-
//keeping track of precursor scan
80-
switch (scanFilter.MSOrder)
75+
// Construct the precursor reference string for the title
76+
var precursorReference = "";
77+
if (ParseInput.MgfPrecursor)
8178
{
82-
case MSOrderType.Ms:
83-
84-
// Keep track of scan number for precursor reference
79+
if (scanFilter.MSOrder == MSOrderType.Ms)
80+
{
81+
// Keep track of the MS1 scan number for precursor reference
8582
_precursorMs1ScanNumber = scanNumber;
86-
87-
break;
88-
case MSOrderType.Ms2:
89-
// Keep track of scan number and isolation m/z for precursor reference
90-
var result = Regex.Match(scanEvent.ToString(), FilterStringIsolationMzPattern);
91-
if (result.Success)
92-
{
93-
if (_precursorMs2ScanNumbers.ContainsKey(result.Groups[1].Value))
94-
{
95-
_precursorMs2ScanNumbers.Remove(result.Groups[1].Value);
96-
}
97-
98-
_precursorMs2ScanNumbers.Add(result.Groups[1].Value, scanNumber);
99-
}
100-
101-
spectrumRef = ConstructSpectrumTitle((int)Device.MS, 1, _precursorMs1ScanNumber);
102-
break;
103-
104-
case MSOrderType.Ms3:
105-
var precursorMs2ScanNumber = _precursorMs2ScanNumbers.Keys.FirstOrDefault(
106-
isolationMz => scanEvent.ToString().Contains(isolationMz));
107-
if (!precursorMs2ScanNumber.IsNullOrEmpty())
108-
{
109-
spectrumRef = ConstructSpectrumTitle((int)Device.MS, 1, _precursorMs2ScanNumbers[precursorMs2ScanNumber]);
110-
}
111-
else
112-
{
113-
throw new InvalidOperationException("Couldn't find a MS2 precursor scan for MS3 scan " + scanEvent);
114-
}
115-
break;
116-
117-
default:
118-
break;
83+
}
84+
else
85+
{
86+
precursorReference = ConstructPrecursorReference(scanFilter.MSOrder, scanNumber, scanEvent);
87+
}
11988
}
12089

121-
122-
// don't include MS1 spectra
123-
if (ParseInput.MsLevel.Contains((int)scanFilter.MSOrder))
90+
// Don't include MS1 spectra
91+
if (ParseInput.MsLevel.Contains((int) scanFilter.MSOrder))
12492
{
125-
IReaction reaction = GetReaction(scanEvent, scanNumber);
93+
var reaction = GetReaction(scanEvent, scanNumber);
12694

12795
Writer.WriteLine("BEGIN IONS");
128-
if
129-
(ParseInput.MGFPrecursor) Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)} [PRECURSOR={spectrumRef}]");
96+
if (!ParseInput.MgfPrecursor)
97+
{
98+
Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int) Device.MS, 1, scanNumber)}");
99+
}
130100
else
131-
Writer.WriteLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)}");
101+
{
102+
Writer.WriteLine(
103+
$"TITLE={ConstructSpectrumTitle((int) Device.MS, 1, scanNumber)} [PRECURSOR={precursorReference}]");
104+
}
105+
132106
Writer.WriteLine($"SCANS={scanNumber}");
133107
Writer.WriteLine(
134-
$"RTINSECONDS={(time * 60).ToString(CultureInfo.InvariantCulture)}");
135-
// trailer extra data list
108+
$"RTINSECONDS={(retentionTime * 60).ToString(CultureInfo.InvariantCulture)}");
109+
110+
// Trailer extra data list
136111
var trailerData = rawFile.GetTrailerExtraInformation(scanNumber);
137112
int? charge = null;
138113
double? monoisotopicMz = null;
@@ -153,7 +128,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
153128
CultureInfo.CurrentCulture);
154129
}
155130

156-
if (trailerData.Labels[i] == "MS" + (int)scanFilter.MSOrder + " Isolation Width:")
131+
if (trailerData.Labels[i] == "MS" + (int) scanFilter.MSOrder + " Isolation Width:")
157132
{
158133
isolationWidth = double.Parse(trailerData.Values[i], NumberStyles.Any,
159134
CultureInfo.CurrentCulture);
@@ -169,7 +144,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
169144
selectedIonMz.ToString(CultureInfo.InvariantCulture));
170145
}
171146

172-
// charge
147+
// Charge
173148
if (charge != null)
174149
{
175150
// Scan polarity
@@ -182,12 +157,12 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
182157
Writer.WriteLine($"CHARGE={charge}{polarity}");
183158
}
184159

185-
// write the filter string
160+
// Write the filter string
186161
//Writer.WriteLine($"SCANEVENT={scanEvent.ToString()}");
187162

188163
if (!ParseInput.NoPeakPicking)
189164
{
190-
// check if the scan has a centroid stream
165+
// Check if the scan has a centroid stream
191166
if (scan.HasCentroidStream)
192167
{
193168
if (scan.CentroidScan.Length > 0)
@@ -203,9 +178,9 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
203178
}
204179
}
205180
}
206-
else // otherwise take segmented (low res) scan data
181+
else // Otherwise take segmented (low res) scan data
207182
{
208-
// if the spectrum is profile perform centroiding
183+
// If the spectrum is profile perform centroiding
209184
var segmentedScan = scanEvent.ScanData == ScanDataType.Profile
210185
? Scan.ToCentroid(scan).SegmentedScan
211186
: scan.SegmentedScan;
@@ -221,7 +196,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
221196
}
222197
}
223198
}
224-
else // use the segmented data as is
199+
else // Use the segmented data as is
225200
{
226201
for (var i = 0; i < scan.SegmentedScan.Positions.Length; i++)
227202
{
@@ -246,5 +221,48 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
246221
}
247222
}
248223
}
224+
225+
private string ConstructPrecursorReference(MSOrderType msOrder, int scanNumber, IScanEvent scanEvent)
226+
{
227+
// Precursor reference
228+
var precursorReference = "";
229+
230+
switch (msOrder)
231+
{
232+
case MSOrderType.Ms2:
233+
// Keep track of the MS2 scan number and isolation m/z for precursor reference
234+
var result = Regex.Match(scanEvent.ToString(), FilterStringIsolationMzPattern);
235+
if (result.Success)
236+
{
237+
if (_isolationMzToPrecursorScanNumberMapping.ContainsKey(result.Groups[1].Value))
238+
{
239+
_isolationMzToPrecursorScanNumberMapping.Remove(result.Groups[1].Value);
240+
}
241+
242+
_isolationMzToPrecursorScanNumberMapping.Add(result.Groups[1].Value, scanNumber);
243+
}
244+
245+
precursorReference = ConstructSpectrumTitle((int) Device.MS, 1, _precursorMs1ScanNumber);
246+
break;
247+
248+
case MSOrderType.Ms3:
249+
var precursorScanNumber = _isolationMzToPrecursorScanNumberMapping.Keys.FirstOrDefault(
250+
isolationMz => scanEvent.ToString().Contains(isolationMz));
251+
if (!precursorScanNumber.IsNullOrEmpty())
252+
{
253+
precursorReference = ConstructSpectrumTitle((int) Device.MS, 1,
254+
_isolationMzToPrecursorScanNumberMapping[precursorScanNumber]);
255+
}
256+
else
257+
{
258+
throw new InvalidOperationException("Couldn't find a MS2 precursor scan for MS3 scan " +
259+
scanEvent);
260+
}
261+
262+
break;
263+
}
264+
265+
return precursorReference;
266+
}
249267
}
250268
}

0 commit comments

Comments
 (0)