Skip to content

Commit 7426ae2

Browse files
author
sena
committed
reducing the number of reads to do k-mer stats to 1 in 50 like FastQC
1 parent e115b55 commit 7426ae2

File tree

3 files changed

+8
-2
lines changed

3 files changed

+8
-2
lines changed

src/Module.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1999,7 +1999,7 @@ ModuleKmerContent::summarize_module(FastqStats &stats) {
19991999
stats.kmer_count[(i << Constants::bit_shift_kmer) | kmer];
20002000

20012001
expected_count = pos_kmer_count[i] / dividend;
2002-
obs_exp_ratio = observed_count / expected_count;
2002+
obs_exp_ratio = (expected_count > 0) ? (observed_count / expected_count) : 0;
20032003

20042004
if (i == 0 || obs_exp_ratio > obs_exp_max[kmer]) {
20052005
obs_exp_max[kmer] = obs_exp_ratio;
@@ -2059,6 +2059,7 @@ ModuleKmerContent::make_html_data() {
20592059
size_t xlim = 0;
20602060
for (size_t i = 0; i < lim; ++i)
20612061
xlim = max(xlim, where_obs_exp_is_max[kmers_to_report[i].first]);
2062+
xlim += kmer_size;
20622063

20632064
for (size_t i = 0; i < lim; ++i) {
20642065
const size_t kmer = kmers_to_report[i].first;

src/StreamReader.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ StreamReader::StreamReader(FalcoConfig &config,
8787

8888
// keep track of which reads to do tile
8989
next_tile_read = 0;
90+
next_kmer_read = 0;
9091
do_tile_read = true;
9192

9293
// Subclasses will use this to deflate if necessary
@@ -256,7 +257,7 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
256257
cur_kmer = ((cur_kmer << Constants::bit_shift_base) | base_ind);
257258

258259
// registers k-mer if seen at least k nucleotides since the last n
259-
if (do_kmer && (num_bases_after_n >= Constants::kmer_size)) {
260+
if (do_kmer && do_kmer_read && (num_bases_after_n >= Constants::kmer_size)) {
260261

261262
stats.kmer_count[(read_pos << Constants::bit_shift_kmer)
262263
| (cur_kmer & Constants::kmer_mask)]++;
@@ -354,6 +355,7 @@ StreamReader::read_sequence_line(FastqStats &stats) {
354355
num_bases_after_n = 1;
355356
still_in_buffer = true;
356357
next_truncation = 100;
358+
do_kmer_read = (stats.num_reads == next_kmer_read);
357359

358360
/*********************************************************/
359361
/********** THIS LOOP MUST BE ALWAYS OPTIMIZED ***********/
@@ -537,6 +539,7 @@ StreamReader::postprocess_fastq_record(FastqStats &stats) {
537539
next_tile_read += num_reads_for_tile;
538540
}
539541
}
542+
next_kmer_read += do_kmer_read*num_reads_for_kmer;
540543
}
541544

542545
/*******************************************************/

src/StreamReader.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,14 @@ class StreamReader{
7272

7373
// keep track of reads for which to do kmer and tile count
7474
static const size_t num_reads_for_tile = 10;
75+
static const size_t num_reads_for_kmer = 50;
7576

7677
bool continue_storing_sequences;
7778
bool do_kmer_read;
7879
bool do_tile_read;
7980

8081
size_t next_tile_read;
82+
size_t next_kmer_read;
8183

8284
// Whether or not we have passed the buffer while reading and need to allocate
8385
// more space / use dynamically allocated space to process the base

0 commit comments

Comments
 (0)