1- // Copyright 2019 Nadia Davidson for Murdoch Childrens Research
2- // Institute Australia. This program is distributed under the GNU
1+ // Copyright 2019 Nadia Davidson. This program is distributed under the GNU
32// General Public License. We also ask that you cite this software in
43// publications where you made use of it for any part of the data
54// analysis.
@@ -35,6 +34,7 @@ void print_usage(){
3534 cerr << endl;
3635}
3736
37+ // struct for fusion information
3838struct fusion_candidate {
3939 string read ;
4040 int break_min ;
@@ -174,16 +174,13 @@ int main(int argc, char **argv){
174174 reads.erase ( unique ( reads.begin (), reads.end () ), reads.end () );
175175 // now loop over the reads and separate the read start and ends
176176 for (int r=0 ; r<reads.size (); r++){
177- /* *smatch m;
178- regex_search(reads.at(r),m,regex("(.*)/([12])$"));
179- if(m[2].str()=="1")**/
180177 // separate the read id and pair end
181178 string read_id=reads.at (r).substr (0 ,reads.at (r).size ()-1 );
182179 char read_end=reads.at (r).back ();
183180 if (read_end==' 1' ) // First of pair (assumes the read IDs ends with 1).
184- gene_reads[gt_itr->first ].first .push_back (read_id); // m[1].str());
181+ gene_reads[gt_itr->first ].first .push_back (read_id);
185182 else
186- gene_reads[gt_itr->first ].second .push_back (read_id); // m[1].str());
183+ gene_reads[gt_itr->first ].second .push_back (read_id);
187184 }
188185 }
189186 trans_read_map_fixed.clear ();
@@ -196,39 +193,20 @@ int main(int argc, char **argv){
196193 // loop over the fusion list
197194 map<pair<string,string >, int > spanning_reads;
198195 for (int f=0 ; f<fusion_list.size (); f++){
199- // cout << "Up to " << f << endl;
200196 // check from intersection of read ids.
201197 vector<string> g1_r1=gene_reads[fusion_list.at (f).first ].first ;
202198 vector<string> g1_r2=gene_reads[fusion_list.at (f).first ].second ;
203199 vector<string> g2_r1=gene_reads[fusion_list.at (f).second ].first ;
204200 vector<string> g2_r2=gene_reads[fusion_list.at (f).second ].second ;
205- // cout << fusion_list.at(f) << endl;
206201 int total=0 ;
207- // cout << g1_r1.size() << " " << g2_r2.size() << endl;
208202 unordered_set<string> temp_set1 (g1_r1.begin (),g1_r1.end ());
209203 temp_set1.insert (g2_r2.begin (), g2_r2.end ());
210- // cout << "Number=" << g1_r1.size() + g2_r2.size() - temp_set1.size() << endl;
211204 total+=g1_r1.size () + g2_r2.size () - temp_set1.size ();
212- /* * for(vector<string>::iterator i = g1_r1.begin(); i!=g1_r1.end(); ++i){
213- if (find(g2_r2.begin(), g2_r2.end(), *i) != g2_r2.end()){
214- //cout << *i << endl;
215- total++;
216- }
217- }**/
218- // cout <<"Total=" << total << endl;
219- // cout << g2_r1.size() << " " << g1_r2.size() << endl;
205+
220206 unordered_set<string> temp_set2 (g2_r1.begin (),g2_r1.end ());
221207 temp_set2.insert (g1_r2.begin (), g1_r2.end ());
222- // cout << "Number=" << g1_r2.size() + g2_r1.size() - temp_set2.size() << endl;
223208 total+=g1_r2.size () + g2_r1.size () - temp_set2.size ();
224- /* * for(vector<string>::iterator i = g2_r1.begin(); i!=g2_r1.end(); ++i){
225- if (find(g1_r2.begin(), g1_r2.end(), *i) != g1_r2.end()){
226- //cout << *i << endl;
227- total++;
228- }
229- }**/
230209 spanning_reads[fusion_list.at (f)]=total;
231- // cout << total << endl;
232210 }
233211 cerr << " Done calculating spanning pairs" << endl;
234212
0 commit comments