Skip to content

Commit 089ad04

Browse files
authored
Merge pull request crux-toolkit#698 from crux-toolkit/new_app
2 parents 880299c + 1d1220e commit 089ad04

File tree

11 files changed

+341
-5
lines changed

11 files changed

+341
-5
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ rules.ninja
4343
/doc/user/subtract-index.html
4444
/doc/user/tide-index.html
4545
/doc/user/tide-search.html
46+
/doc/user/spectrum-converter.html
4647
/doc/user/commands/assign-confidence.html
4748
/doc/user/commands/bullseye.html
4849
/doc/user/commands/cascade-search.html
@@ -69,6 +70,7 @@ rules.ninja
6970
/doc/user/commands/subtract-index.html
7071
/doc/user/commands/tide-index.html
7172
/doc/user/commands/tide-search.html
73+
/doc/user/commands/spectrum-converter.html
7274

7375
# /ext
7476
/ext/Makefile

doc/user/release-notes.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ <h4>Major changes</h4>
7171
</li>
7272
<h4>Minor changes</h4>
7373
<ul>
74+
<li>6 Feb 2025: create Spectrum Convert application.</li>
7475
<li>28 Jan 2025: Bug fix in tide-search when searching without decoy peptides. </li>
7576
<li>25 Oct 2024: Fixed bug in parameter handling for spectral-counts inside of the pipeline command.</li>
7677
<li>25 Oct 2024: add .gz format as read fasta file option.

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ set (
202202
io/SQTWriter.cpp
203203
app/TideIndexApplication.cpp
204204
app/TideMatchSet.cpp
205+
app/SpectrumConvertApplication.cpp
205206
app/TideSearchApplication.cpp
206207
io/DIAmeterFeatureScaler.cpp
207208
io/DIAmeterPSMFilter.cpp

src/app/CreateDocs.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include "app/AssignConfidenceApplication.h"
3434
#include "app/SubtractIndexApplication.h"
3535
#include "DIAmeterApplication.h"
36-
36+
#include "app/SpectrumConvertApplication.h"
3737
using namespace std;
3838

3939
CreateDocs::CreateDocs() {
@@ -67,6 +67,7 @@ int CreateDocs::main(int argc, char** argv) {
6767
apps.add(new TideIndexApplication());
6868
apps.add(new TideSearchApplication());
6969
apps.add(new DIAmeterApplication());
70+
apps.add(new SpectrumConvertApplication());
7071

7172
string targetApp = Params::GetString("tool-name");
7273
if (targetApp == "list") {
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#include <cstddef>
2+
#include <cstdio>
3+
#include "app/tide/abspath.h"
4+
#include "app/tide/records_to_vector-inl.h"
5+
6+
#include "io/carp.h"
7+
#include "parameter.h"
8+
#include "io/SpectrumRecordWriter.h"
9+
#include "SpectrumConvertApplication.h"
10+
#include "util/Params.h"
11+
#include "util/FileUtils.h"
12+
#include "util/StringUtils.h"
13+
#include <math.h>
14+
#include <map>
15+
#include "crux_version.h"
16+
using namespace std;
17+
#define CHECK(x) GOOGLE_CHECK(x)
18+
19+
SpectrumConvertApplication::SpectrumConvertApplication() {
20+
for (int i = 0; i < NUMBER_LOCK_TYPES; i++) { // LOCK_TYPES are defined in model/objects.h
21+
locks_array_.push_back(new boost::mutex());
22+
}
23+
}
24+
25+
SpectrumConvertApplication::~SpectrumConvertApplication() {
26+
for (int i = 0; i < NUMBER_LOCK_TYPES; i++) {
27+
delete locks_array_[i];
28+
}
29+
}
30+
31+
int SpectrumConvertApplication::main(int argc, char** argv) {
32+
return main(Params::GetStrings("tide spectra file"));
33+
}
34+
35+
int SpectrumConvertApplication::main(const vector<string>& input_files) {
36+
37+
carp(CARP_INFO, "Running spectrum-convert...");
38+
39+
num_threads_ = Params::GetInt("num-threads");
40+
if (Params::IsDefault("spectrum-outdir")) {
41+
output_folder_ = Params::GetString("output-dir");
42+
} else {
43+
output_folder_ = Params::GetString("spectrum-outdir");
44+
}
45+
bool overwrite = Params::GetBool("overwrite");
46+
if (create_output_directory(output_folder_, overwrite)) {
47+
carp(CARP_FATAL, "Couldn't create output directory");
48+
}
49+
50+
if (num_threads_ < 1) {
51+
num_threads_ = boost::thread::hardware_concurrency(); // MINIMUM # = 1.
52+
// (Meaning just main thread) Do not make this value below 1.
53+
} else if (num_threads_ > 64) {
54+
// make sure that number of threads are reasonable, e.g. user did not specify millions of threads...
55+
carp(CARP_FATAL, "Requested more than 64 threads.");
56+
}
57+
carp(CARP_INFO, "Number of Threads: %d", num_threads_);
58+
59+
60+
// Convert the original file names into spectrum records if needed
61+
// Update the file names in the variable inputFiles_ locally.
62+
// Run spectrum file convertion in parallel.
63+
for (vector<string>::const_iterator original_file_name = input_files.begin(); original_file_name != input_files.end(); ++original_file_name) {
64+
inputFiles_.push_back(SpectrumConvertApplication::InputFile(*original_file_name, *original_file_name, false));
65+
}
66+
// Launch threads to convert files
67+
boost::thread_group threadgroup_input_files;
68+
for (int t = 1; t < num_threads_; ++t) {
69+
boost::thread * currthread = new boost::thread(boost::bind(&SpectrumConvertApplication::getInputFiles, this, t));
70+
threadgroup_input_files.add_thread(currthread);
71+
}
72+
getInputFiles(0);
73+
// Join threads
74+
threadgroup_input_files.join_all();
75+
76+
if (total_spectra_num_ > 0) {
77+
carp(CARP_INFO, "There were a total of %d spectrum conversions from %d input spectrum files.",
78+
total_spectra_num_, inputFiles_.size());
79+
}
80+
carp(CARP_INFO, "Elapsed time: %.3g s", wall_clock() / 1e6);
81+
82+
return 0;
83+
}
84+
85+
86+
// In order to add more options, you need to add them to ./src/util/Params.cpp
87+
vector<string> SpectrumConvertApplication::getOptions() const {
88+
string arr[] = {
89+
"fileroot",
90+
"num-threads",
91+
"output-dir",
92+
"spectrum-outdir",
93+
"overwrite",
94+
"parameter-file",
95+
"verbosity"
96+
};
97+
return vector<string>(arr, arr + sizeof(arr) / sizeof(string));
98+
}
99+
100+
string SpectrumConvertApplication::getName() const {
101+
return "spectrum-converter";
102+
}
103+
104+
string SpectrumConvertApplication::getDescription() const {
105+
return
106+
"[[nohtml:This command converts spectrum files into the binary spectrumrecords format]]"
107+
"[[html:<p>This command converts spectrum files into the binary spectrumrecords format "
108+
"used by the tide-search command. Most people will not need to use this command at all, "
109+
"because tide-search will do the conversions automatically as needed. However, if you plan "
110+
"to run multiple searches using the same input file, then you can save some time by using this "
111+
"command to pre-convert the spectra into spectrumrecords format.</p>]]";
112+
}
113+
114+
vector<string> SpectrumConvertApplication::getArgs() const {
115+
string arr[] = {
116+
"tide spectra file+"
117+
};
118+
return vector<string>(arr, arr + sizeof(arr) / sizeof(string));
119+
}
120+
121+
122+
vector< pair<string, string> > SpectrumConvertApplication::getOutputs() const {
123+
vector< pair<string, string> > outputs;
124+
outputs.push_back(make_pair("<fileroot>.spectrumrecords",
125+
"The spectra from the input file, written in spectrumrecords format. "
126+
"The <fileroot> is taken from the input file."));
127+
outputs.push_back(make_pair("spectrum-converter.params.txt",
128+
"a file containing the name and value of all parameters/options for the "
129+
"current operation. Not all parameters in the file may have been used in "
130+
"the operation. The resulting file can be used with the --parameter-file "
131+
"option for other Crux programs."));
132+
outputs.push_back(make_pair("spectrum-converter.log.txt",
133+
"a log file containing a copy of all messages that were printed to the "
134+
"screen during execution."));
135+
return outputs;
136+
}
137+
bool SpectrumConvertApplication::needsOutputDirectory() const {
138+
return true;
139+
}
140+
141+
COMMAND_T SpectrumConvertApplication::getCommand() const {
142+
return SPECTRUM_CONVERT_COMMAND;
143+
}
144+
145+
void SpectrumConvertApplication::processParams() {
146+
}
147+
148+
void SpectrumConvertApplication::getInputFiles(int thread_id) {
149+
// Try to read all spectrum files as spectrumrecords, convert those that fail
150+
if (thread_id > inputFiles_.size())
151+
return;
152+
for (vector<SpectrumConvertApplication::InputFile>::iterator original_file_name = inputFiles_.begin()+thread_id;
153+
original_file_name < inputFiles_.begin() + (inputFiles_.size());
154+
original_file_name = original_file_name + num_threads_)
155+
{
156+
carp(CARP_DEBUG, "Start processing input files");
157+
bool keepSpectrumrecords = true;
158+
string original_name = (*original_file_name).OriginalName;
159+
string spectrumrecords = original_name;
160+
// Check if the input file is spectrum records of google protocol buffer
161+
pb::Header header;
162+
HeadedRecordReader reader(original_name, &header);
163+
if (header.file_type() != pb::Header::SPECTRA) {
164+
// converting to spectrumrecords file
165+
166+
carp(CARP_INFO, "Converting %s to spectrumrecords format", original_name.c_str());
167+
carp(CARP_DEBUG, "Elapsed time starting conversion: %.3g s", wall_clock() / 1e6);
168+
169+
spectrumrecords = Params::GetString("store-spectra");
170+
keepSpectrumrecords = !spectrumrecords.empty();
171+
if (!keepSpectrumrecords) {
172+
spectrumrecords = make_file_path(FileUtils::BaseName( original_name) + ".spectrumrecords", output_folder_);
173+
} else if (inputFiles_.size() > 1) {
174+
carp(CARP_FATAL, "Cannot use store-spectra option with multiple input "
175+
"spectrum files");
176+
}
177+
carp(CARP_DEBUG, "New spectrumrecords filename: %s", spectrumrecords.c_str());
178+
int spectra_num = 0;
179+
if (!SpectrumRecordWriter::convert(original_name, spectrumrecords, spectra_num)) {
180+
carp(CARP_FATAL, "Error converting %s to spectrumrecords format", original_name.c_str());
181+
}
182+
locks_array_[LOCK_SPECTRUM_READING]->lock();
183+
total_spectra_num_ += spectra_num;
184+
locks_array_[LOCK_SPECTRUM_READING]->unlock();
185+
186+
}
187+
(*original_file_name).SpectrumRecords = spectrumrecords;
188+
(*original_file_name).Keep = keepSpectrumrecords;
189+
carp(CARP_DEBUG, "Finish converting");
190+
}
191+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#ifndef TIDECONVERTAPPLICATION_H
2+
#define TIDECONVERTAPPLICATION_H
3+
4+
#include "CruxApplication.h"
5+
#include "TideMatchSet.h"
6+
7+
#include <iostream>
8+
#include <fstream>
9+
#include <iomanip>
10+
#include <gflags/gflags.h>
11+
#include "peptides.pb.h"
12+
#include "spectrum.pb.h"
13+
#include "tide/theoretical_peak_set.h"
14+
#include "tide/max_mz.h"
15+
#include "util/MathUtil.h"
16+
#include "tide/ActivePeptideQueue.h"
17+
#include "TideIndexApplication.h"
18+
#include "TideMatchSet.h"
19+
20+
using namespace std;
21+
22+
23+
24+
class SpectrumConvertApplication : public CruxApplication {
25+
private:
26+
struct InputFile {
27+
std::string OriginalName;
28+
std::string SpectrumRecords;
29+
bool Keep;
30+
InputFile(const std::string& name,
31+
const std::string& spectrumrecords,
32+
bool keep):
33+
OriginalName(name), SpectrumRecords(spectrumrecords), Keep(keep) {}
34+
};
35+
protected:
36+
37+
int num_threads_;
38+
std::string output_folder_ = "crux-output";
39+
40+
map<pair<string, unsigned int>, bool>* spectrum_flag_;
41+
42+
int total_spectra_num_;
43+
44+
vector<boost::mutex *> locks_array_;
45+
46+
//vector<SpectrumConvertApplication::InputFile> getInputFiles(const vector<string>& filepaths) const;
47+
void getInputFiles(int thread_id);
48+
49+
vector<InputFile> inputFiles_;
50+
51+
// sprectrum search executed in parallel threads
52+
53+
// comparition of Spectrum data, based on neutral mass
54+
struct compare_spectrum{
55+
bool operator()(pair<pb::Spectrum, int> &spec_1, pair<pb::Spectrum, int> &spec_2){
56+
57+
return spec_1.first.neutral_mass() > spec_2.first.neutral_mass();
58+
}
59+
};
60+
61+
public:
62+
63+
/**
64+
* Constructor
65+
*/
66+
SpectrumConvertApplication();
67+
68+
/**
69+
* Destructor
70+
*/
71+
~SpectrumConvertApplication();
72+
73+
/**
74+
* Main methods
75+
*/
76+
virtual int main(int argc, char** argv);
77+
78+
int main(const vector<string>& input_files);
79+
80+
/**
81+
* Returns the command name
82+
*/
83+
virtual string getName() const;
84+
85+
/**
86+
* Returns the command description
87+
*/
88+
virtual string getDescription() const;
89+
90+
/**
91+
* Returns the command arguments
92+
*/
93+
virtual vector<string> getArgs() const;
94+
95+
/**
96+
* Returns the command options
97+
*/
98+
virtual vector<string> getOptions() const;
99+
100+
/**
101+
* Returns the command outputs
102+
*/
103+
virtual vector< pair<string, string> > getOutputs() const;
104+
105+
/**
106+
* Returns whether the application needs the output directory or not. (default false)
107+
*/
108+
virtual bool needsOutputDirectory() const;
109+
110+
/**
111+
* Returns the command ID
112+
*/
113+
virtual COMMAND_T getCommand() const;
114+
115+
/**
116+
* Processes the output file names
117+
*/
118+
string getOutputFileName();
119+
120+
/**
121+
* Processes the parameters
122+
*/
123+
virtual void processParams();
124+
125+
};
126+
127+
#endif
128+
129+
/*
130+
* Local Variables:
131+
* mode: c
132+
* c-basic-offset: 2
133+
* End:
134+
*/

src/crux-main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
#include "app/CascadeSearchApplication.h"
3838
#include "app/AssignConfidenceApplication.h"
3939
#include "app/SubtractIndexApplication.h"
40-
40+
#include "app/SpectrumConvertApplication.h"
4141
#include "app/DIAmeterApplication.h"
4242
#include "app/KojakApplication.h"
4343

@@ -65,6 +65,7 @@ int main(int argc, char** argv) {
6565
applications.add(new CruxBullseyeApplication());
6666
applications.add(new TideIndexApplication());
6767
applications.add(new TideSearchApplication());
68+
applications.add(new SpectrumConvertApplication());
6869
applications.add(new ReadSpectrumRecordsApplication());
6970
applications.add(new ReadTideIndex());
7071
applications.add(new CometApplication());

0 commit comments

Comments
 (0)