@@ -83,6 +83,10 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
8383 size_t log_n = bitsize (uint64_t (this ->bwt .size ()));
8484
8585 FORCE_LOG (" build_profiles" , " bwt statistics: n = %ld, r = %ld\n " , this ->bwt .size (), this ->r );
86+
87+
88+ // for (size_t i = 0; i < n; i++)
89+ // std::cout << "i = " << i << " bwt[i] = " << this->bwt[i] << std::endl;
8690
8791 // Determine the number of documents and verify the that file
8892 // sizes are correct.
@@ -230,27 +234,21 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
230234 found_one = true ;
231235 }
232236 }
237+
233238 if (found_one)
234239 output_str = " {" + output_str.substr (2 ) + " } " ;
235240 else {
236241 output_str = " {} " ;
237- // std::cout << "length = " << length << std::endl;
238- // for (size_t i = 0; i < profile.size();i++)
239- // std::cout << profile[i] << " ";
240- // std::cout << "\n";
241- // std::exit(1);
242- }
243- // for (size_t i = 0; i < profile.size();i++)
244- // std::cout << profile[i] << " ";
245- // std::cout << "\n";
242+ }
246243 listings_fd << output_str;
247244 };
248245
249246 // Process each read, and print out the document lists
250247 while (kseq_read (seq)>=0 ) {
251248
252249 // Uppercase every character in read
253- for (size_t i = 0 ; i < seq->seq .l ; ++i) {
250+ for (size_t i = 0 ; i < seq->seq .l ; ++i)
251+ {
254252 seq->seq .s [i] = static_cast <char >(std::toupper (seq->seq .s [i]));
255253 }
256254
@@ -267,8 +265,6 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
267265 // Tell us what type of profile to grab based on pointer variables
268266 bool use_start = false , use_end = false ;
269267
270- // std::cout << "\n";
271-
272268 // Perform backward search and report document listings when
273269 // range goes empty or we reach the end
274270 for (int i = (seq->seq .l -1 ); i >= 0 ; i--) {
@@ -292,10 +288,9 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
292288 curr_profile = end_doc_profiles[curr_prof_ch][curr_prof_pos];
293289 else
294290 curr_profile = start_doc_profiles[curr_prof_ch][curr_prof_pos];
295- std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x+= num_LF_steps;});
291+ std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x = std::min (( size_t ) MAXLCPVALUE, x+ num_LF_steps) ;});
296292
297293 listings_fd << " [" << (i+1 ) << " ," << end_pos_of_match << " ] " ;
298- // std::cout << "[" << (i+1) << "," << end_pos_of_match << "] " << std::endl;
299294
300295 length = std::min ((size_t ) MAXLCPVALUE, (end_pos_of_match-i));
301296 process_profile (curr_profile, length);
@@ -313,6 +308,7 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
313308 num_LF_steps = 0 ;
314309 use_start = false ; use_end = false ;
315310
311+ // DEBUG
316312 // std::cout << "case 1: next_ch = " << next_ch << std::endl;
317313
318314 // If the start position run is the same as query
@@ -322,6 +318,20 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
322318 use_end = true ;
323319 else
324320 use_start = true ;
321+
322+
323+ // DEBUG:
324+ // if (use_end) {
325+ // for (auto x: end_doc_profiles[curr_prof_ch][curr_prof_pos])
326+ // std::cout << x << " ";
327+ // std::cout << "\n";
328+ // }
329+ // else {
330+ // for (auto x: start_doc_profiles[curr_prof_ch][curr_prof_pos])
331+ // std::cout << x << " ";
332+ // std::cout << "\n";
333+ // }
334+
325335 }
326336 // range is within BWT run, but wrong character
327337 else if (this ->bwt [start] != next_ch)
@@ -331,10 +341,9 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
331341 curr_profile = end_doc_profiles[curr_prof_ch][curr_prof_pos];
332342 else
333343 curr_profile = start_doc_profiles[curr_prof_ch][curr_prof_pos];
334- std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x+= num_LF_steps;});
344+ std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x = std::min (( size_t ) MAXLCPVALUE, x+ num_LF_steps) ;});
335345
336346 listings_fd << " [" << (i+1 ) << " ," << end_pos_of_match << " ] " ;
337- // std::cout << "[" << (i+1) << "," << end_pos_of_match << "] " << std::endl;
338347
339348 length = std::min ((size_t ) MAXLCPVALUE, (end_pos_of_match-i));
340349 process_profile (curr_profile, length);
@@ -365,9 +374,7 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
365374 else
366375 {
367376 num_LF_steps++;
368- // std::cout << "case 3" << std::endl;
369- // std::transform(curr_profile.begin(), curr_profile.end(), curr_profile.begin(),
370- // [](size_t x) { return (++x); });
377+ // std::cout << "case 3" << std::endl;
371378 }
372379
373380 // Perform an LF step
@@ -379,12 +386,35 @@ class doc_queries : ri::r_index<sparse_bv_type, rle_string_t>
379386 curr_profile = end_doc_profiles[curr_prof_ch][curr_prof_pos];
380387 else
381388 curr_profile = start_doc_profiles[curr_prof_ch][curr_prof_pos];
382- std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x+=num_LF_steps;});
383389
384- listings_fd << " [" << 0 << " ," << end_pos_of_match << " ] " ;
385- // std::cout << "[" << 0 << "," << end_pos_of_match << "] " << std::endl;
390+ // DEBUG:
391+ // if (use_end) {
392+ // for (auto x: curr_profile)
393+ // std::cout << x << " ";
394+ // std::cout << "\n";
395+ // }
396+ // else {
397+ // for (auto x: curr_profile)
398+ // std::cout << x << " ";
399+ // std::cout << "\n";
400+ // }
401+
402+ // Update profile based on LF steps
403+ std::for_each (curr_profile.begin (), curr_profile.end (), [&](uint16_t &x){x = std::min ((size_t ) MAXLCPVALUE, x+num_LF_steps);});
404+
405+ // DEBUG:
406+ // if (use_end) {
407+ // for (auto x: curr_profile)
408+ // std::cout << x << " ";
409+ // std::cout << "\n";
410+ // }
411+ // else {
412+ // for (auto x: curr_profile)
413+ // std::cout << x << " ";
414+ // std::cout << "\n";
415+ // }
386416
387- // std::cout << num_LF_steps << std::endl ;
417+ listings_fd << " [ " << 0 << " , " << end_pos_of_match << " ] " ;
388418 length = std::min ((size_t ) MAXLCPVALUE, end_pos_of_match+1 );
389419 process_profile (curr_profile, length);
390420 listings_fd << " \n " ;
0 commit comments