Skip to content

Commit 1193034

Browse files
committed
2 parents b148f2d + 29d4e59 commit 1193034

29 files changed

+1178
-1089
lines changed

MainClass.cs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public static class MainClass
2020
private static readonly ILog Log =
2121
LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
2222

23-
public const string Version = "1.4.2";
23+
public const string Version = "1.4.4";
2424
public static void Main(string[] args)
2525
{
2626
// Set Invariant culture as default for all further processing
@@ -83,15 +83,15 @@ private static void XicParametersParsing(string[] args)
8383
v => parameters.printJsonExample = v != null
8484
},
8585
{
86-
"o=|output=",
87-
"The output directory. Specify this or an output file. Specifying neither writes to the input directory.",
88-
v => outputDirectory = v
89-
},
90-
{
91-
"b=|output_file",
86+
"b=|output=",
9287
"The output file. Specify this or an output directory. Specifying neither writes to the input directory.",
9388
v => outputFile = v
9489
},
90+
{
91+
"o=|output_directory=",
92+
"The output directory. Specify this or an output file. Specifying neither writes to the input directory.",
93+
v => outputDirectory = v
94+
},
9595
{
9696
"6|base64",
9797
"Encodes the content of the xic vectors as base 64 encoded string.",
@@ -103,7 +103,7 @@ private static void XicParametersParsing(string[] args)
103103
v => parameters.stdout = v != null
104104
},
105105
{
106-
"w|warningsAreErrors", "Return non-zero exit code for warnings; default only for errors",
106+
"w|warningsAreErrors", "Return non-zero exit code for warnings; default only for errors",
107107
v => parameters.Vigilant = v != null
108108
},
109109
{
@@ -338,7 +338,7 @@ private static void SpectrumQueryParametersParsing(string[] args)
338338
v => parameters.scans = v
339339
},
340340
{
341-
"b=|output_file",
341+
"b=|output=",
342342
"The output file. Specifying none writes the output file to the input file parent directory.",
343343
v => parameters.outputFile = v
344344
},
@@ -510,27 +510,27 @@ private static void RegularParametersParsing(string[] args)
510510
v => parseInput.RawDirectoryPath = v
511511
},
512512
{
513-
"o=|output=",
514-
"The output directory. Specify this or an output file -b. Specifying neither writes to the input directory.",
515-
v => parseInput.OutputDirectory = v
516-
},
517-
{
518-
"b=|output_file",
513+
"b=|output=",
519514
"The output file. Specify this or an output directory -o. Specifying neither writes to the input directory.",
520515
v => parseInput.OutputFile = v
521516
},
517+
{
518+
"o=|output_directory=",
519+
"The output directory. Specify this or an output file -b. Specifying neither writes to the input directory.",
520+
v => parseInput.OutputDirectory = v
521+
},
522522
{
523523
"s|stdout",
524524
"Write to standard output. Cannot be combined with file or directory output. Implies silent logging, i.e. logging level 0",
525525
v => parseInput.StdOut = v != null
526526
},
527527
{
528528
"f=|format=",
529-
"The spectra output format: 0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet; both numeric and text (case insensitive) value recognized. Defaults to indexed mzML if no format is specified.",
529+
"The spectra output format: 0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet, 4 for None (no output); both numeric and text (case insensitive) value recognized. Defaults to indexed mzML if no format is specified.",
530530
v => outputFormatString = v
531531
},
532532
{
533-
"m=|metadata=", "The metadata output format: 0 for JSON, 1 for TXT; both numeric and text (case insensitive) value recognized",
533+
"m=|metadata=", "The metadata output format: 0 for JSON, 1 for TXT, 2 for None (no output); both numeric and text (case insensitive) value recognized. Defaults to None",
534534
v => metadataFormatString = v
535535
},
536536
{
@@ -740,7 +740,7 @@ private static void RegularParametersParsing(string[] args)
740740
"-c, --metadata_output_file");
741741
}
742742

743-
if (parseInput.MetadataOutputFile != null && parseInput.MetadataFormat == MetadataFormat.NONE)
743+
if (parseInput.MetadataOutputFile != null && parseInput.MetadataFormat == MetadataFormat.None)
744744
{
745745
throw new OptionException("specify a metadata format (0 for JSON, 1 for TXT)",
746746
"-m, --metadata");
@@ -766,6 +766,8 @@ private static void RegularParametersParsing(string[] args)
766766
if (parseInput.OutputFormat == OutputFormat.IndexMzML) parseInput.OutputFormat = OutputFormat.MzML;
767767
}
768768

769+
parseInput.MaxLevel = parseInput.MsLevel.Max();
770+
769771
if (parseInput.S3Url != null && parseInput.S3AccessKeyId != null &&
770772
parseInput.S3SecretAccessKey != null && parseInput.BucketName != null)
771773
if (Uri.IsWellFormedUriString(parseInput.S3Url, UriKind.Absolute))

OutputFormat.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ public enum OutputFormat
66
MzML,
77
IndexMzML,
88
Parquet,
9-
NONE
9+
None
1010
}
1111

1212
public enum MetadataFormat
1313
{
1414
JSON,
1515
TXT,
16-
NONE
16+
None
1717
}
1818
}

ParseInput.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ public int Warnings
9696

9797
public HashSet<int> MsLevel { get; set; }
9898

99+
public int MaxLevel { get; set; }
100+
99101
public bool MgfPrecursor { get; set; }
100102

101103
public bool NoiseData { get; set; }
@@ -121,8 +123,8 @@ public int Warnings
121123

122124
public ParseInput()
123125
{
124-
MetadataFormat = MetadataFormat.NONE;
125-
OutputFormat = OutputFormat.NONE;
126+
MetadataFormat = MetadataFormat.None;
127+
OutputFormat = OutputFormat.None;
126128
Gzip = false;
127129
NoPeakPicking = new HashSet<int>();
128130
NoZlibCompression = false;
@@ -136,6 +138,7 @@ public ParseInput()
136138
Vigilant = false;
137139
_errors = 0;
138140
_warnings = 0;
141+
MaxLevel = 10;
139142
}
140143

141144
public ParseInput(string rawFilePath, string rawDirectoryPath, string outputDirectory, OutputFormat outputFormat

README.md

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,9 @@ or
4747
mono ThermoRawFileParser.exe -d=/home/user/data_input/
4848
```
4949

50-
For running on Windows, omit `mono`. The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON, 1 for TXT) and the spectra file `-f=0|1|2|3` (0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet) or both. Use the `-p` flag to disable the thermo native peak picking.
50+
For running on Windows, omit `mono`. The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON, 1 for TXT) and the spectra file `-f=0|1|2|3|4` (0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet, 4 for no output) or both. Use the `-p` flag to disable the thermo native peak picking.
5151

5252
```
53-
ThermoRawFileParser.exe --help
5453
Usage is ThermoRawFileParser.exe [subcommand] [options]
5554
optional subcommands are xic|query (use [subcommand] -h for more info]):
5655
-h, --help Prints out the options.
@@ -60,23 +59,25 @@ optional subcommands are xic|query (use [subcommand] -h for more info]):
6059
-d, --input_directory=VALUE
6160
The directory containing the raw files (Required).
6261
Specify this or an input raw file -i.
63-
-o, --output=VALUE The output directory. Specify this or an output
64-
file -b. Specifying neither writes to the input
65-
directory.
66-
-b, --output_file=VALUE The output file. Specify this or an output
62+
-b, --output=VALUE The output file. Specify this or an output
6763
directory -o. Specifying neither writes to the
6864
input directory.
65+
-o, --output_directory=VALUE
66+
The output directory. Specify this or an output
67+
file -b. Specifying neither writes to the input
68+
directory.
6969
-s, --stdout Write to standard output. Cannot be combined with
7070
file or directory output. Implies silent logging,
7171
i.e. logging level 0
7272
-f, --format=VALUE The spectra output format: 0 for MGF, 1 for mzML,
73-
2 for indexed mzML, 3 for Parquet; both numeric
74-
and text (case insensitive) value recognized.
75-
Defaults to indexed mzML if no format is
76-
specified.
77-
-m, --metadata=VALUE The metadata output format: 0 for JSON, 1 for TXT;
78-
both numeric and text (case insensitive) value
79-
recognized
73+
2 for indexed mzML, 3 for Parquet, 4 for None (
74+
no output); both numeric and text (case
75+
insensitive) value recognized. Defaults to
76+
indexed mzML if no format is specified.
77+
-m, --metadata=VALUE The metadata output format: 0 for JSON, 1 for TXT,
78+
2 for None (no output); both numeric and text (
79+
case insensitive) value recognized. Defaults to
80+
None
8081
-c, --metadata_output_file=VALUE
8182
The metadata output file. By default the metadata
8283
file is written to the output directory.
@@ -120,7 +121,7 @@ optional subcommands are xic|query (use [subcommand] -h for more info]):
120121
S3 bucket name
121122
```
122123

123-
Output file extension is determined by the used output format and (optional) gzip compression, for example, if format is MGF without gzip compression, the output file will receive `.mgf` extension, if format is mzML with gzip compression the output file will have `.mzML.gz` extension. All user input will be standardized to fulfill abovementioned requrements.
124+
Output file extension is determined by the used output format and (optional) gzip compression, for example, if format is MGF without gzip compression, the output file will receive `.mgf` extension, if format is mzML with gzip compression the output file will have `.mzML.gz` extension. All user input will be standardized to fulfill abovementioned requirements.
124125

125126
A (java) graphical user interface is also available [here](https://github.com/compomics/ThermoRawFileParserGUI) that enables the selection of an input RAW directory or one ore more RAW files.
126127

@@ -137,7 +138,7 @@ usage is:
137138
-h, --help Prints out the options.
138139
-i, --input=VALUE The raw file input (Required).
139140
-n, --scans=VALUE The scan numbers. e.g. "1-5, 20, 25-30"
140-
-b, --output_file=VALUE The output file. Specifying none writes the output
141+
-b, --output=VALUE The output file. Specifying none writes the output
141142
file to the input file parent directory.
142143
-p, --noPeakPicking Don't use the peak picking provided by the native
143144
Thermo library. By default peak picking is
@@ -169,12 +170,13 @@ ThermoRawFileParser.exe xic --help
169170
Specify this or an input file -i.
170171
-j, --json=VALUE The json input file (Required).
171172
-p, --print_example Show a json input file example.
172-
-o, --output=VALUE The output directory. Specify this or an output
173-
file. Specifying neither writes to the input
174-
directory.
175-
-b, --output_file=VALUE The output file. Specify this or an output
173+
-b, --output=VALUE The output file. Specify this or an output
176174
directory. Specifying neither writes to the
177175
input directory.
176+
-o, --output_directory=VALUE
177+
The output directory. Specify this or an output
178+
file. Specifying neither writes to the input
179+
directory.
178180
-6, --base64 Encodes the content of the xic vectors as base 64
179181
encoded string.
180182
-s, --stdout Pipes the output into standard output. Logging is
@@ -192,7 +194,8 @@ Provide one of the following filters:
192194
* M/Z start and end
193195
* sequence and tolerance (tolerance unit optional, defaults to `ppm`)
194196

195-
optionally one can define starting and ending retention times and thermo filter string (defaults to `ms`)
197+
optionally one can define starting and ending retention times, provide filter string (defaults to `ms`, i.e. only MS1 scans), and a comment (free text) field; any valid filter string is supported,
198+
however only basic validation is performed, see [issue #158](https://github.com/compomics/ThermoRawFileParser/issues/158) for details. Comment can contain any text and will be preserved in the output.
196199

197200
An example input JSON file:
198201

@@ -219,7 +222,17 @@ An example input JSON file:
219222
{
220223
"sequence":"TRANNEL",
221224
"tolerance":10
225+
},
226+
{
227+
"mz":1014.5099732499732,
228+
"rt_start":14.0600881872,
229+
"rt_end":14.4167198290667,
230+
"tolerance":5,
231+
"tolerance_unit":"ppm",
232+
"comment":"Only ion trap scans"
233+
"scan_filter":"ITMS"
222234
}
235+
}
223236
]
224237
225238
```

RawFileParser.cs

100644100755
Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,27 +136,32 @@ private static void ProcessFile(ParseInput parseInput)
136136

137137
// Get the number of instruments (controllers) present in the RAW file and set the
138138
// selected instrument to the MS instrument, first instance of it
139-
rawFile.SelectInstrument(Device.MS, 1);
139+
var firstScanNumber = -1;
140+
var lastScanNumber = -1;
140141

141-
rawFile.IncludeReferenceAndExceptionData = !parseInput.ExData;
142+
if (rawFile.GetInstrumentCountOfType(Device.MS) != 0)
143+
{
144+
rawFile.SelectInstrument(Device.MS, 1);
145+
rawFile.IncludeReferenceAndExceptionData = !parseInput.ExData;
142146

143-
// Get the first and last scan from the RAW file
144-
var firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
145-
var lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;
147+
// Get the first and last scan from the RAW file
148+
firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
149+
lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;
146150

147-
// Check for empty file
148-
if (lastScanNumber < 1)
149-
{
150-
throw new RawFileParserException("Empty RAW file, no output will be produced");
151+
// Check for empty file
152+
if (lastScanNumber < 1)
153+
{
154+
throw new RawFileParserException("Empty RAW file, no output will be produced");
155+
}
151156
}
152157

153-
if (parseInput.MetadataFormat != MetadataFormat.NONE)
158+
if (parseInput.MetadataFormat != MetadataFormat.None)
154159
{
155160
MetadataWriter metadataWriter = new MetadataWriter(parseInput);
156161
metadataWriter.WriteMetadata(rawFile, firstScanNumber, lastScanNumber);
157162
}
158163

159-
if (parseInput.OutputFormat != OutputFormat.NONE)
164+
if (parseInput.OutputFormat != OutputFormat.None)
160165
{
161166
SpectrumWriter spectrumWriter;
162167
switch (parseInput.OutputFormat)

ThermoRawFileParser.csproj

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,11 @@
176176
<Reference Include="System.Data.DataSetExtensions" />
177177
<Reference Include="System.Data" />
178178
<Reference Include="System.Xml" />
179-
<Reference Include="ThermoFisher.CommonCore.Data, Version=5.0.0.88, Culture=neutral, PublicKeyToken=1aef06afb5abd953, processorArchitecture=MSIL">
180-
<HintPath>packages\ThermoFisher.CommonCore.Data.5.0.0.88\lib\ThermoFisher.CommonCore.Data.dll</HintPath>
179+
<Reference Include="ThermoFisher.CommonCore.Data, Version=5.0.0.93, Culture=neutral, PublicKeyToken=1aef06afb5abd953, processorArchitecture=MSIL">
180+
<HintPath>packages\ThermoFisher.CommonCore.Data.5.0.0.93\lib\netstandard2.0\ThermoFisher.CommonCore.Data.dll</HintPath>
181181
</Reference>
182-
<Reference Include="ThermoFisher.CommonCore.RawFileReader, Version=5.0.0.88, Culture=neutral, PublicKeyToken=1aef06afb5abd953, processorArchitecture=MSIL">
183-
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.5.0.0.88\lib\ThermoFisher.CommonCore.RawFileReader.dll</HintPath>
182+
<Reference Include="ThermoFisher.CommonCore.RawFileReader, Version=5.0.0.93, Culture=neutral, PublicKeyToken=1aef06afb5abd953, processorArchitecture=MSIL">
183+
<HintPath>packages\ThermoFisher.CommonCore.RawFileReader.5.0.0.93\lib\netstandard2.0\ThermoFisher.CommonCore.RawFileReader.dll</HintPath>
184184
</Reference>
185185
<Reference Include="zlib.net, Version=1.0.3.0, Culture=neutral, PublicKeyToken=47d7877cb3620160">
186186
<HintPath>packages\zlib.net.1.0.4.0\lib\zlib.net.dll</HintPath>
@@ -199,6 +199,7 @@
199199
<Compile Include="Query\ProxiSpectrumReader.cs" />
200200
<Compile Include="RawFileParserException.cs" />
201201
<Compile Include="RawFileParser.cs" />
202+
<Compile Include="Util\CVHelpers.cs" />
202203
<Compile Include="Util\LimitedSizeDictionary.cs" />
203204
<Compile Include="Util\MZArray.cs" />
204205
<Compile Include="Util\NativeMethods.cs" />

ThermoRawFileParserTest/OntologyMappingTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ public class OntologyMappingTests
1212
public void TestGetInstrumentModel()
1313
{
1414
// exact match
15-
var match = OntologyMapping.getInstrumentModel("LTQ Orbitrap");
15+
var match = OntologyMapping.GetInstrumentModel("LTQ Orbitrap");
1616
Assert.AreEqual("MS:1000449", match.accession);
1717
// partial match, should return the longest partial match
18-
var partialMatch = OntologyMapping.getInstrumentModel("LTQ Orbitrap XXL");
18+
var partialMatch = OntologyMapping.GetInstrumentModel("LTQ Orbitrap XXL");
1919
Assert.AreEqual("MS:1000449", partialMatch.accession);
2020
// no match, should return the generic thermo instrument
21-
var noMatch = OntologyMapping.getInstrumentModel("non existing model");
21+
var noMatch = OntologyMapping.GetInstrumentModel("non existing model");
2222
Assert.AreEqual("MS:1000483", noMatch.accession);
2323
}
2424
}

0 commit comments

Comments
 (0)