Skip to content

Commit 1e9b500

Browse files
committed
Move PrecursorTree to general SpectrumWriter
1 parent 8d880ad commit 1e9b500

File tree

4 files changed

+81
-178
lines changed

4 files changed

+81
-178
lines changed

Writer/MgfSpectrumWriter.cs

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
using System;
2-
using System.Collections.Generic;
32
using System.Globalization;
4-
using System.Linq;
53
using System.Reflection;
6-
using System.Text.RegularExpressions;
74
using log4net;
85
using ThermoFisher.CommonCore.Data.Business;
96
using ThermoFisher.CommonCore.Data.FilterEnums;
@@ -19,15 +16,9 @@ public class MgfSpectrumWriter : SpectrumWriter
1916
private const string PositivePolarity = "+";
2017
private const string NegativePolarity = "-";
2118

22-
// Filter string
23-
private readonly Regex _filterStringIsolationMzPattern = new Regex(@"ms\d+ (.+?) \[");
24-
2519
// Precursor scan number for MSn scans
2620
private int _precursorScanNumber;
2721

28-
// Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MSn spectrum
29-
private readonly Dictionary<string, int> _precursorScanNumbers = new Dictionary<string, int>();
30-
3122
public MgfSpectrumWriter(ParseInput parseInput) : base(parseInput)
3223
{
3324
ParseInput.MsLevel.Remove(1); // MS1 spectra are not supposed to be in MGF
@@ -126,23 +117,7 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
126117
}
127118
else //try getting it from the scan filter
128119
{
129-
var parts = Regex.Split(result.Groups[1].Value, " ");
130-
131-
//find the position of the first (from the end) precursor with a different mass
132-
//to account for possible supplementary activations written in the filter
133-
var lastIonMass = parts.Last().Split('@').First();
134-
int last = parts.Length;
135-
while (last > 0 &&
136-
parts[last - 1].Split('@').First() == lastIonMass)
137-
{
138-
last--;
139-
}
140-
141-
string parentFilter = String.Join(" ", parts.Take(last));
142-
if (_precursorScanNumbers.ContainsKey(parentFilter))
143-
{
144-
_precursorScanNumber = _precursorScanNumbers[parentFilter];
145-
}
120+
_precursorScanNumber = GetParentFromScanString(result.Groups[1].Value);
146121
}
147122

148123
if (_precursorScanNumber > 0)
@@ -151,7 +126,8 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
151126
}
152127
else
153128
{
154-
Log.Error($"Failed finding precursor for {scanNumber}");
129+
Log.Error($"Cannot find precursor scan for scan# {scanNumber}");
130+
_precursorTree[-2] = new PrecursorInfo(0, msLevel, FindLastReaction(scanEvent, msLevel), null);
155131
ParseInput.NewError();
156132
}
157133
}

Writer/MzMlSpectrumWriter.cs

Lines changed: 1 addition & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ public class MzMlSpectrumWriter : SpectrumWriter
2727
private static readonly ILog Log =
2828
LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
2929

30-
private readonly Regex _filterStringIsolationMzPattern = new Regex(@"ms\d+ (.+?) \[");
31-
3230
// Tune version < 3 produces multiple trailer entry like "SPS Mass [number]"
3331
private readonly Regex _spSentry = new Regex(@"SPS Mass\s+\d+:");
3432

@@ -45,12 +43,6 @@ public class MzMlSpectrumWriter : SpectrumWriter
4543
private readonly Dictionary<IonizationModeType, CVParamType> _ionizationTypes =
4644
new Dictionary<IonizationModeType, CVParamType>();
4745

48-
// Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MSn spectrum
49-
private readonly Dictionary<string, int> _precursorScanNumbers = new Dictionary<string, int>();
50-
51-
//Precursor information for scans
52-
private Dictionary<int, PrecursorInfo> _precursorTree = new Dictionary<int, PrecursorInfo>();
53-
5446
private const string SourceFileId = "RAW1";
5547
private readonly XmlSerializerFactory _factory = new XmlSerializerFactory();
5648
private const string Ns = "http://psi.hupo.org/ms/mzml";
@@ -68,8 +60,6 @@ public MzMlSpectrumWriter(ParseInput parseInput) : base(parseInput)
6860
_mzMlNamespace.Add(string.Empty, "http://psi.hupo.org/ms/mzml");
6961
_doIndexing = ParseInput.OutputFormat == OutputFormat.IndexMzML;
7062
_osOffset = Environment.NewLine == "\n" ? 0 : 1;
71-
_precursorScanNumbers[""] = -1;
72-
_precursorTree[-1] = new PrecursorInfo();
7363
}
7464

7565
/// <inheritdoc />
@@ -639,7 +629,6 @@ public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastSc
639629

640630
_writer.WriteValue(BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant());
641631
_writer.WriteEndElement(); // fileChecksum
642-
643632
_writer.WriteEndElement(); // indexedmzML
644633
}
645634

@@ -1374,6 +1363,7 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
13741363
{
13751364
Log.Warn($"Cannot find precursor scan for scan# {scanNumber}");
13761365
_precursorTree[-2] = new PrecursorInfo(0, msLevel, FindLastReaction(scanEvent, msLevel), new PrecursorType[0]);
1366+
ParseInput.NewWarn();
13771367
}
13781368

13791369
try
@@ -1938,46 +1928,6 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
19381928

19391929
return spectrum;
19401930
}
1941-
1942-
private int FindLastReaction(IScanEvent scanEvent, int msLevel)
1943-
{
1944-
int lastReactionIndex = msLevel - 2;
1945-
1946-
//iteratively trying find the last available index for reaction
1947-
while(true)
1948-
{
1949-
try
1950-
{
1951-
scanEvent.GetReaction(lastReactionIndex + 1);
1952-
}
1953-
catch (ArgumentOutOfRangeException)
1954-
{
1955-
//stop trying
1956-
break;
1957-
}
1958-
1959-
lastReactionIndex++;
1960-
}
1961-
1962-
//supplemental activation flag is on -> one of the levels (not necissirily the last one) used supplemental activation
1963-
//check last two activations
1964-
if (scanEvent.SupplementalActivation == TriState.On)
1965-
{
1966-
var lastActivation = scanEvent.GetReaction(lastReactionIndex).ActivationType;
1967-
var beforeLastActivation = scanEvent.GetReaction(lastReactionIndex - 1).ActivationType;
1968-
1969-
if ((beforeLastActivation == ActivationType.ElectronTransferDissociation || beforeLastActivation == ActivationType.ElectronCaptureDissociation) &&
1970-
(lastActivation == ActivationType.CollisionInducedDissociation || lastActivation == ActivationType.HigherEnergyCollisionalDissociation))
1971-
return lastReactionIndex - 1; //ETD or ECD followed by HCD or CID -> supplemental activation in the last level (move the last reaction one step back)
1972-
else
1973-
return lastReactionIndex;
1974-
}
1975-
else //just use the last one
1976-
{
1977-
return lastReactionIndex;
1978-
}
1979-
}
1980-
19811931
private SpectrumType ConstructPDASpectrum(int scanNumber, int instrumentNumber)
19821932
{
19831933
// Get each scan from the RAW file
@@ -2558,29 +2508,6 @@ private PrecursorListType ConstructPrecursorList(int precursorScanNumber, IScanE
25582508

25592509
}
25602510

2561-
private int GetParentFromScanString(string scanString)
2562-
{
2563-
var parts = Regex.Split(scanString, " ");
2564-
2565-
//find the position of the first (from the end) precursor with a different mass
2566-
//to account for possible supplementary activations written in the filter
2567-
var lastIonMass = parts.Last().Split('@').First();
2568-
int last = parts.Length;
2569-
while (last > 0 &&
2570-
parts[last - 1].Split('@').First() == lastIonMass)
2571-
{
2572-
last--;
2573-
}
2574-
2575-
string parentFilter = String.Join(" ", parts.Take(last));
2576-
if (_precursorScanNumbers.ContainsKey(parentFilter))
2577-
{
2578-
return _precursorScanNumbers[parentFilter];
2579-
}
2580-
2581-
return -2; //unsuccessful parsing
2582-
}
2583-
25842511
/// <summary>
25852512
/// Populate the scan list element. Full version used for mass spectra,
25862513
/// having Scan Event, scan Filter etc

Writer/ParquetSpectrumWriter.cs

Lines changed: 1 addition & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.Linq;
43
using System.Reflection;
5-
using System.Text.RegularExpressions;
64
using log4net;
75
using Parquet.Serialization;
8-
using ThermoFisher.CommonCore.Data;
96
using ThermoFisher.CommonCore.Data.Business;
107
using ThermoFisher.CommonCore.Data.FilterEnums;
118
using ThermoFisher.CommonCore.Data.Interfaces;
12-
using ThermoRawFileParser.Writer.MzML;
139

1410
namespace ThermoRawFileParser.Writer
1511
{
@@ -40,18 +36,9 @@ public class ParquetSpectrumWriter : SpectrumWriter
4036
private static readonly ILog Log =
4137
LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
4238

43-
private readonly Regex _filterStringIsolationMzPattern = new Regex(@"ms\d+ (.+?) \[");
44-
45-
// Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MSn spectrum
46-
private readonly Dictionary<string, int> _precursorScanNumbers = new Dictionary<string, int>();
47-
48-
//Precursor information for scans
49-
private Dictionary<int, PrecursorInfo> _precursorTree = new Dictionary<int, PrecursorInfo>();
50-
5139
public ParquetSpectrumWriter(ParseInput parseInput) : base(parseInput)
5240
{
53-
_precursorScanNumbers[""] = -1;
54-
_precursorTree[-1] = new PrecursorInfo();
41+
//nothing to do here
5542
}
5643

5744
public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNumber)
@@ -291,69 +278,6 @@ private void AddScan(IRawDataPlus raw, int scanNumber, List<MzParquet> data)
291278
}
292279
}
293280

294-
private int GetParentFromScanString(string scanString)
295-
{
296-
var parts = Regex.Split(scanString, " ");
297-
298-
//find the position of the first (from the end) precursor with a different mass
299-
//to account for possible supplementary activations written in the filter
300-
var lastIonMass = parts.Last().Split('@').First();
301-
int last = parts.Length;
302-
while (last > 0 &&
303-
parts[last - 1].Split('@').First() == lastIonMass)
304-
{
305-
last--;
306-
}
307-
308-
string parentFilter = String.Join(" ", parts.Take(last));
309-
if (_precursorScanNumbers.ContainsKey(parentFilter))
310-
{
311-
return _precursorScanNumbers[parentFilter];
312-
}
313-
314-
return -2; //unsuccessful parsing
315-
}
316-
317-
318-
private int FindLastReaction(IScanEvent scanEvent, int msLevel)
319-
{
320-
int lastReactionIndex = msLevel - 2;
321-
322-
//iteratively trying find the last available index for reaction
323-
while (true)
324-
{
325-
try
326-
{
327-
scanEvent.GetReaction(lastReactionIndex + 1);
328-
}
329-
catch (ArgumentOutOfRangeException)
330-
{
331-
//stop trying
332-
break;
333-
}
334-
335-
lastReactionIndex++;
336-
}
337-
338-
//supplemental activation flag is on -> one of the levels (not necissirily the last one) used supplemental activation
339-
//check last two activations
340-
if (scanEvent.SupplementalActivation == TriState.On)
341-
{
342-
var lastActivation = scanEvent.GetReaction(lastReactionIndex).ActivationType;
343-
var beforeLastActivation = scanEvent.GetReaction(lastReactionIndex - 1).ActivationType;
344-
345-
if ((beforeLastActivation == ActivationType.ElectronTransferDissociation || beforeLastActivation == ActivationType.ElectronCaptureDissociation) &&
346-
(lastActivation == ActivationType.CollisionInducedDissociation || lastActivation == ActivationType.HigherEnergyCollisionalDissociation))
347-
return lastReactionIndex - 1; //ETD or ECD followed by HCD or CID -> supplemental activation in the last level (move the last reaction one step back)
348-
else
349-
return lastReactionIndex;
350-
}
351-
else //just use the last one
352-
{
353-
return lastReactionIndex;
354-
}
355-
}
356-
357281
private PrecursorData GetPrecursorData(int precursorScanNumber, IScanEventBase scanEvent,
358282
double? monoisotopicMz, double? isolationWidth, out int reactionCount)
359283
{

0 commit comments

Comments
 (0)