-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscan.h
More file actions
149 lines (125 loc) · 3.74 KB
/
scan.h
File metadata and controls
149 lines (125 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#pragma once
#include <string>
#include <vector>
#include <unordered_map>
#include <fstream>
struct Scan {
int id;
double mass;
int isotope_error;
double precursor_error;
int charge;
std::string peptide;
std::string protein;
int de_novo_scope;
int msgf_scope;
double spec_e_value;
double e_value;
std::vector < std::pair < double, double > > peaks;
double mass_2;
void operator= (Scan &other);
};
enum deconv_program {MS_Deconv, Thermo_Xtract};
const std::string MGF_SCAN_BEGINNING = "BEGIN IONS";
const std::string MGF_SCAN_ENDING = "END IONS";
const std::string ID_PREF = "TITLE=";
const std::string PEPMASS_PREF = "PEPMASS=";
const std::string CHARGE_PREF = "CHARGE=";
const double DA = 1.007276;
const int MAX_PEPTIDE_LENGTH = 70;
const std::string TSV_SUF = ".tsv";
const std::string XTRACT_SUF = "_xtract.mgf";
const std::string MSDECONV_SUF = "_msdeconv.mgf";
void reset_scan(Scan &scan);
Scan parse_tsv_line(std::string &line);
int thermo_xtract_id(std::string);
int msalign_id(std::string);
template < class Func >
void go_through_mgf(deconv_program format, std::string filename, Func &action) {
const std::string DIGITS = "1234567890";
std::ifstream file(filename);
Scan cur_scan;
while (!file.eof()) {
std::string line;
getline(file, line);
if (line == MGF_SCAN_BEGINNING) {
reset_scan(cur_scan);
}
else if (line == MGF_SCAN_ENDING) {
action(cur_scan);
}
else if (line.compare(0, ID_PREF.size(), ID_PREF) == 0) {
if (format == MS_Deconv) {
cur_scan.id = msalign_id(line);
}
else {
cur_scan.id = thermo_xtract_id(line);
}
}
else if (line.compare(0, PEPMASS_PREF.size(), PEPMASS_PREF) == 0) {
cur_scan.mass = std::stod(line.substr(PEPMASS_PREF.size(), line.size() - PEPMASS_PREF.size())) - DA;
if (format == Thermo_Xtract) {
int space_pos = line.find(' ');
if (space_pos != std::string::npos) {
cur_scan.mass_2 = std::stod(line.substr(space_pos, line.size() - space_pos));
}
}
}
else if (line.compare(0, CHARGE_PREF.size(), CHARGE_PREF) == 0) {
int charge_beginning = CHARGE_PREF.size();
int charge_len = line.find_first_not_of(DIGITS, charge_beginning) - charge_beginning;
cur_scan.charge = std::stoi(line.substr(charge_beginning, charge_len));
}
else if (line[0] >= '0' && line[0] <= '9') {
int space_pos = line.find(' ');
std::pair < double, double > peak;
peak.first = std::stod(line.substr(0, space_pos));
peak.second = std::stod(line.substr(space_pos, line.size() - space_pos));
cur_scan.peaks.push_back(peak);
}
}
file.close();
}
template < class Func >
void go_through_tsv(std::string filename, Func &f) {
std::ifstream file(filename);
std::string line;
getline(file, line);
while (getline(file, line)) {
Scan cur_scan = parse_tsv_line(line);
f(cur_scan);
}
file.close();
}
class ScansMapCreator {
private:
std::unordered_map < int, Scan > scans_map;
public:
void operator() (Scan &scan);
std::unordered_map < int, Scan > get_map();
};
class EValueTester {
private:
const double E_VALUE_BORDER;
std::unordered_map < int, Scan > &theoretic_map;
public:
EValueTester(std::unordered_map < int, Scan > &new_map, double new_e_value_border);
bool operator()(Scan &scan);
};
template < class Func >
class ScansCollector {
private:
std::unordered_map < int, Scan > &good_scans;
Func check_scan;
public:
ScansCollector(std::unordered_map < int, Scan > &new_map, Func new_checker) :
check_scan(new_checker), good_scans(new_map) {}
void operator() (Scan &scan) {
if (check_scan(scan)) {
good_scans[scan.id] = scan;
}
}
std::unordered_map < int, Scan >& get_map() {
return good_scans;
}
};