Skip to content

Commit 6ce17b6

Browse files
Merge branch 'SPS' into master
2 parents 3a8507e + 806f582 commit 6ce17b6

File tree

10 files changed

+115
-36
lines changed

10 files changed

+115
-36
lines changed

MainClass.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static class MainClass
1919
private static readonly ILog Log =
2020
LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
2121

22-
public const string Version = "1.3.1";
22+
public const string Version = "1.3.2";
2323

2424
public static void Main(string[] args)
2525
{
@@ -424,6 +424,10 @@ private static void RegularParametersParsing(string[] args)
424424
"e|ignoreInstrumentErrors", "Ignore missing properties by the instrument.",
425425
v => parseInput.IgnoreInstrumentErrors = v != null
426426
},
427+
{
428+
"x|includeExceptionData", "Include reference and exception data",
429+
v => parseInput.ExData = v != null
430+
},
427431
{
428432
"L=|msLevel=",
429433
"Select MS levels (MS1, MS2, etc) included in the output, should be a comma-separated list of integers ( 1,2,3 ) and/or intervals ( 1-3 ), open-end intervals ( 1- ) are allowed",
@@ -778,7 +782,7 @@ private static HashSet<int> ParseMsLevel(string inputString)
778782
if (!valid.IsMatch(inputString))
779783
throw new OptionException("Invalid characters in msLevel key", "msLevel");
780784

781-
foreach (var piece in inputString.Split(new char[] { ',' }))
785+
foreach (var piece in inputString.Split(new char[] {','}))
782786
{
783787
try
784788
{
@@ -821,7 +825,8 @@ private static HashSet<int> ParseMsLevel(string inputString)
821825

822826
catch (Exception ex)
823827
{
824-
throw new OptionException(String.Format("Cannot parse part of msLevel input: '{0}'", piece), "msLevel", ex);
828+
throw new OptionException(String.Format("Cannot parse part of msLevel input: '{0}'", piece),
829+
"msLevel", ex);
825830
}
826831
}
827832

ParseInput.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public string RawFilePath
7575

7676
public bool IgnoreInstrumentErrors { get; set; }
7777

78+
public bool ExData { get; set; }
79+
7880
public HashSet<int> MsLevel { get; set; }
7981

8082
public bool MGFPrecursor { get; set; }

Properties/AssemblyInfo.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// set of attributes. Change these attribute values to modify the information
66
// associated with an assembly.
77
[assembly: AssemblyTitle("ThermoRawFileParser")]
8-
[assembly: AssemblyDescription("")]
8+
[assembly: AssemblyDescription("Modular, Scalable, and Cross-Platform RAW File Conversion [PMID 31755270]")]
99
[assembly: AssemblyConfiguration("")]
1010
[assembly: AssemblyCompany("")]
1111
[assembly: AssemblyProduct("ThermoRawFileParser")]
@@ -31,7 +31,7 @@
3131
// You can specify all the values or you can default the Build and Revision Numbers
3232
// by using the '*' as shown below:
3333
// [assembly: AssemblyVersion("1.0.*")]
34-
[assembly: AssemblyVersion("1.3.1.0")]
35-
[assembly: AssemblyFileVersion("1.3.1.0")]
34+
[assembly: AssemblyVersion("1.3.2.0")]
35+
[assembly: AssemblyFileVersion("1.3.2.0")]
3636

3737
[assembly: log4net.Config.XmlConfigurator(ConfigFile = "log4net.config")]

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Wrapper around the .net (C#) ThermoFisher ThermoRawFileReader library for running on Linux with mono (works on Windows too). It takes a thermo RAW file as input and outputs a metadata file and the spectra in 3 possible formats:
44
* MGF: MS2 and MS3 spectra
5-
* mzML and indexed mzML: both MS1, MS2 and MS3 spectra
5+
* mzML and indexed mzML: MS1, MS2 and MS3 spectra
66
* Apache Parquet: under development
77

88
As of version 1.2.0, 2 subcommands are available (shoutout to the [eubic 2020 developers meeting](https://eubic-ms.org/events/2020-developers-meeting/), see [usage](#usage) for examples):
@@ -76,6 +76,7 @@ optional subcommands are xic|query (use [subcommand] -h for more info]):
7676
verbose.
7777
-e, --ignoreInstrumentErrors
7878
Ignore missing properties by the instrument.
79+
-x, --includeExceptionData Include reference and exception data
7980
-L, --msLevel=VALUE Select MS levels (MS1, MS2, etc) included in the
8081
output, should be a comma-separated list of
8182
integers ( 1,2,3 ) and/or intervals ( 1-3 ),
@@ -112,7 +113,7 @@ usage is:
112113
Thermo library. By default peak picking is
113114
enabled.
114115
-s, --stdout Pipes the output into standard output. Logging is
115-
being turned off
116+
being turned off.
116117
```
117118
### xic subcommand
118119
Return one or more chromatograms based on query JSON input.

RawFileParser.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.IO;
3+
using System.Linq;
34
using ThermoFisher.CommonCore.Data;
45
using ThermoFisher.CommonCore.Data.Business;
56
using ThermoFisher.CommonCore.Data.Interfaces;
@@ -23,9 +24,12 @@ public static void Parse(ParseInput parseInput)
2324
{
2425
Log.Info("Started analyzing folder " + parseInput.RawDirectoryPath);
2526

26-
var rawFilesPath =
27-
Directory.EnumerateFiles(parseInput.RawDirectoryPath);
28-
if (Directory.GetFiles(parseInput.RawDirectoryPath, "*", SearchOption.TopDirectoryOnly).Length == 0)
27+
var rawFilesPath = Directory
28+
.EnumerateFiles(parseInput.RawDirectoryPath, "*", SearchOption.TopDirectoryOnly)
29+
.Where(s => s.ToLower().EndsWith("raw")).ToArray();
30+
Log.Info(String.Format("The folder contains {0} RAW files", rawFilesPath.Length));
31+
32+
if (rawFilesPath.Length == 0)
2933
{
3034
Log.Debug("No raw files found in folder");
3135
throw new RawFileParserException("No raw files found in folder!");
@@ -99,6 +103,8 @@ private static void ProcessFile(ParseInput parseInput)
99103
// selected instrument to the MS instrument, first instance of it
100104
rawFile.SelectInstrument(Device.MS, 1);
101105

106+
rawFile.IncludeReferenceAndExceptionData = parseInput.ExData;
107+
102108
// Get the first and last scan from the RAW file
103109
var firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
104110
var lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;

ThermoRawFileParser.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
5757
<Prefer32Bit>false</Prefer32Bit>
5858
</PropertyGroup>
59+
<PropertyGroup>
60+
<StartupObject>ThermoRawFileParser.MainClass</StartupObject>
61+
</PropertyGroup>
5962
<ItemGroup>
6063
<Reference Include="AWS.Logger.Core, Version=1.4.0.0, Culture=neutral, PublicKeyToken=885c28607f98e604">
6164
<HintPath>packages\AWS.Logger.Core.1.4.0\lib\net45\AWS.Logger.Core.dll</HintPath>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
NOTRAWFILE

ThermoRawFileParserTest/WriterTests.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,25 @@ public void TestMgf()
3535
public void TestFolderMgfs()
3636
{
3737
// Get temp path for writing the test MGF
38-
var tempFilePath = Path.GetTempPath();
38+
var tempFilePath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
39+
40+
Directory.CreateDirectory(tempFilePath);
3941

4042
var testRawFolder = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/TestFolderMgfs");
4143
var parseInput = new ParseInput(null, testRawFolder, tempFilePath, OutputFormat.MGF);
4244

4345
RawFileParser.Parse(parseInput);
4446

47+
var numFiles = Directory.GetFiles(tempFilePath, "*.mgf");
48+
Assert.AreEqual(numFiles.Length, 2);
49+
4550
var mgfData = Mgf.LoadAllStaticData(Path.Combine(tempFilePath, "small1.mgf"));
4651
Assert.AreEqual(34, mgfData.NumSpectra);
4752

4853
var mgfData2 = Mgf.LoadAllStaticData(Path.Combine(tempFilePath, "small2.mgf"));
4954
Assert.AreEqual(34, mgfData2.NumSpectra);
55+
56+
Directory.Delete(tempFilePath, true);
5057
}
5158

5259
[Test]

Writer/MzMlSpectrumWriter.cs

Lines changed: 70 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@ public class MzMlSpectrumWriter : SpectrumWriter
2727
private static readonly ILog Log =
2828
LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
2929

30-
private const string FilterStringIsolationMzPattern = @"ms2 (.*?)@";
30+
private readonly Regex FilterStringIsolationMzPattern = new Regex(@"ms2 (.*?)@");
31+
32+
//tune version < 3 produces multiple trailer entry like "SPS Mass [number]"
33+
private readonly Regex SPSentry = new Regex(@"SPS Mass\s+\d+:");
34+
//tune version == 3 produces trailer entry "SPS Masses/Continued"
35+
private readonly Regex SPSentry3 = new Regex(@"SPS Masses(?:\s+Continued)?:");
3136

3237
private IRawDataPlus _rawFile;
3338

@@ -1146,7 +1151,7 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
11461151
int? charge = null;
11471152
double? monoisotopicMz = null;
11481153
double? ionInjectionTime = null;
1149-
double? isolationWidth = null;
1154+
List<double> SPSMasses = new List<double>();
11501155
for (var i = 0; i < trailerData.Length; i++)
11511156
{
11521157
if (trailerData.Labels[i] == "Charge State:")
@@ -1169,10 +1174,21 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
11691174
CultureInfo.CurrentCulture);
11701175
}
11711176

1172-
if (trailerData.Labels[i] == "MS" + (int) scanFilter.MSOrder + " Isolation Width:")
1177+
//tune version < 3 produced trailer entry like "SPS Mass #", one entry per mass
1178+
if (SPSentry.IsMatch(trailerData.Labels[i]))
11731179
{
1174-
isolationWidth = double.Parse(trailerData.Values[i], NumberStyles.Any,
1175-
CultureInfo.CurrentCulture);
1180+
var mass = double.Parse(trailerData.Values[i]);
1181+
if (mass > 0) SPSMasses.Add(mass); //zero means mass does not exist
1182+
}
1183+
1184+
//tune version == 3 produces trailer entry "SPS Masses", comma separated list of masses
1185+
if (SPSentry3.IsMatch(trailerData.Labels[i]))
1186+
{
1187+
foreach (var mass in trailerData.Values[i].Trim().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries))
1188+
{
1189+
SPSMasses.Add(double.Parse(mass));
1190+
}
1191+
11761192
}
11771193
}
11781194

@@ -1206,7 +1222,7 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
12061222
});
12071223

12081224
// Keep track of scan number and isolation m/z for precursor reference
1209-
var result = Regex.Match(scanEvent.ToString(), FilterStringIsolationMzPattern);
1225+
var result = FilterStringIsolationMzPattern.Match(scanEvent.ToString());
12101226
if (result.Success)
12111227
{
12121228
if (_precursorMs2ScanNumbers.ContainsKey(result.Groups[1].Value))
@@ -1219,7 +1235,7 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
12191235

12201236
// Construct and set the precursor list element of the spectrum
12211237
var precursorListType =
1222-
ConstructPrecursorList(scanEvent, charge, scanFilter.MSOrder, monoisotopicMz, isolationWidth);
1238+
ConstructPrecursorList(scanEvent, charge, scanFilter.MSOrder, monoisotopicMz, SPSMasses);
12231239
spectrum.precursorList = precursorListType;
12241240
break;
12251241
case MSOrderType.Ms3:
@@ -1230,8 +1246,7 @@ private SpectrumType ConstructMSSpectrum(int scanNumber)
12301246
name = "MSn spectrum",
12311247
value = ""
12321248
});
1233-
precursorListType = ConstructPrecursorList(scanEvent, charge, scanFilter.MSOrder, monoisotopicMz,
1234-
isolationWidth);
1249+
precursorListType = ConstructPrecursorList(scanEvent, charge, scanFilter.MSOrder, monoisotopicMz, SPSMasses);
12351250
spectrum.precursorList = precursorListType;
12361251
break;
12371252
default:
@@ -1772,19 +1787,20 @@ private SpectrumType ConstructPDASpectrum(int scanNumber, int instrumentNumber)
17721787
/// <param name="isolationWidth">the isolation width</param>
17731788
/// <returns>the precursor list</returns>
17741789
private PrecursorListType ConstructPrecursorList(IScanEventBase scanEvent, int? charge, MSOrderType msLevel,
1775-
double? monoisotopicMz, double? isolationWidth)
1790+
double? monoisotopicMz, List<double> SPSMasses)
17761791
{
17771792
// Construct the precursor
17781793
var precursorList = new PrecursorListType
17791794
{
1780-
count = "1",
1781-
precursor = new PrecursorType[1]
1795+
count = (Math.Max(SPSMasses.Count, 1)).ToString(),
1796+
precursor = new PrecursorType[Math.Max(SPSMasses.Count, 1)]
17821797
};
17831798

17841799
var spectrumRef = "";
17851800
int precursorScanNumber = _precursorMs1ScanNumber;
17861801
IReaction reaction = null;
17871802
var precursorMz = 0.0;
1803+
double? isolationWidth = null;
17881804
try
17891805
{
17901806
switch (msLevel)
@@ -1814,6 +1830,7 @@ private PrecursorListType ConstructPrecursorList(IScanEventBase scanEvent, int?
18141830
}
18151831

18161832
precursorMz = reaction.PrecursorMass;
1833+
isolationWidth = reaction.IsolationWidth;
18171834
}
18181835
catch (ArgumentOutOfRangeException)
18191836
{
@@ -1823,15 +1840,11 @@ private PrecursorListType ConstructPrecursorList(IScanEventBase scanEvent, int?
18231840
var precursor = new PrecursorType
18241841
{
18251842
selectedIonList =
1826-
new SelectedIonListType {count = 1.ToString(), selectedIon = new ParamGroupType[1]},
1843+
new SelectedIonListType {count = "1", selectedIon = new ParamGroupType[1]},
18271844
spectrumRef = spectrumRef
18281845
};
18291846

1830-
precursor.selectedIonList.selectedIon[0] =
1831-
new ParamGroupType
1832-
{
1833-
cvParam = new CVParamType[3]
1834-
};
1847+
precursor.selectedIonList.selectedIon[0] = new ParamGroupType();
18351848

18361849
// Selected ion MZ
18371850
var selectedIonMz = CalculateSelectedIonMz(reaction, monoisotopicMz, isolationWidth);
@@ -1886,6 +1899,7 @@ private PrecursorListType ConstructPrecursorList(IScanEventBase scanEvent, int?
18861899
{
18871900
cvParam = new CVParamType[3]
18881901
};
1902+
18891903
precursor.isolationWindow.cvParam[0] =
18901904
new CVParamType
18911905
{
@@ -2022,6 +2036,44 @@ private PrecursorListType ConstructPrecursorList(IScanEventBase scanEvent, int?
20222036

20232037
precursorList.precursor[0] = precursor;
20242038

2039+
//the first SPS mass seems to be the same as the one from reaction or scan filter
2040+
for (int n = 1; n < SPSMasses.Count; n++)
2041+
{
2042+
var SPSPrecursor = new PrecursorType
2043+
{
2044+
selectedIonList =
2045+
new SelectedIonListType { count = "1", selectedIon = new ParamGroupType[1] },
2046+
spectrumRef = spectrumRef
2047+
};
2048+
2049+
// Selected ion MZ only
2050+
SPSPrecursor.selectedIonList.selectedIon[0] =
2051+
new ParamGroupType
2052+
{
2053+
cvParam = new CVParamType[]
2054+
{
2055+
new CVParamType {
2056+
name = "selected ion m/z",
2057+
value = SPSMasses[n].ToString(),
2058+
accession = "MS:1000744",
2059+
cvRef = "MS",
2060+
unitCvRef = "MS",
2061+
unitAccession = "MS:1000040",
2062+
unitName = "m/z"
2063+
}
2064+
}
2065+
};
2066+
2067+
//All SPS masses have the same activation (i.e. it was calculated above)
2068+
SPSPrecursor.activation =
2069+
new ParamGroupType
2070+
{
2071+
cvParam = activationCvParams.ToArray()
2072+
};
2073+
2074+
precursorList.precursor[n] = SPSPrecursor;
2075+
}
2076+
20252077
return precursorList;
20262078
}
20272079

Writer/OntologyMapping.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -612,12 +612,8 @@ public static List<CVParamType> GetDetectors(string instrumentAccession)
612612
case "MS:1002732":
613613
// ORBITRAP ECLIPSE
614614
case "MS:1003029":
615-
// ORBITRAP EXPLORIS 120
616-
case "MS:1003095":
617-
// ORBITRAP EXPLORIS 240
618-
case "MS:1003094":
619-
// ORBITRAP EXPLORIS 480
620-
case "MS:1003028":
615+
// ORBITRAP ID-X
616+
case "MS:1003112":
621617
detectors = new List<CVParamType>
622618
{
623619
new CVParamType
@@ -648,6 +644,12 @@ public static List<CVParamType> GetDetectors(string instrumentAccession)
648644
case "MS:1002877":
649645
// Q EXACTIVE PLUS
650646
case "MS:1002634":
647+
// ORBITRAP EXPLORIS 120
648+
case "MS:1003095":
649+
// ORBITRAP EXPLORIS 240
650+
case "MS:1003094":
651+
// ORBITRAP EXPLORIS 480
652+
case "MS:1003028":
651653
detectors = new List<CVParamType>
652654
{
653655
new CVParamType

0 commit comments

Comments
 (0)