Skip to content

Commit 36ac79b

Browse files
committed
use native vector to keep pair counts internally; safer approach to populate pair counts vector
1 parent d575177 commit 36ac79b

File tree

1 file changed

+30
-18
lines changed

1 file changed

+30
-18
lines changed

src/read_mismatchbam_cpp.cpp

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ int process_mismatch_bam_record_pair(
4040
unsigned long long &n_total, // total number of modified bases
4141
// vectors for return values (per modification)
4242
// ... mode == MISMATCHBAM_MODE_STATE
43-
Rcpp::NumericMatrix &pair_counts) {
43+
double *pair_counts_ptr,
44+
int windowSize) {
4445

4546
// allocate variable only used inside process_mismatch_bam_record_pair()
4647
unsigned int i = 0, j = 0, k = 0, mod_pos = 0;
@@ -162,13 +163,15 @@ int process_mismatch_bam_record_pair(
162163
}
163164

164165
// process modposref and modstate to update counter in pair_counts
165-
int maxdist = pair_counts.nrow() - 1, currdist = 0;
166+
int maxdist = windowSize - 1, currdist = 0;
166167
for (i = 0; i < modposref.size(); i++) {
167168
for (j = i; j < modposref.size(); j++) {
168-
currdist = modposref[j] - modposref[i];
169-
if (currdist > maxdist)
170-
break;
171-
pair_counts(currdist, 2 * modstate[i] + modstate[j])++;
169+
// use abs() to protect against situation where modposref
170+
// is not sorted
171+
currdist = std::abs(modposref[j] - modposref[i]);
172+
if (currdist <= maxdist) {
173+
pair_counts_ptr[((2 * modstate[i] + modstate[j]) * windowSize) + currdist]++;
174+
}
172175
}
173176
}
174177

@@ -207,7 +210,8 @@ int process_mismatch_bam_record(
207210
std::vector<int> &ref_position,
208211
std::vector<double> &mod_prob,
209212
// ... mode == MISMATCHBAM_MODE_STATE
210-
Rcpp::NumericMatrix &pair_counts,
213+
double *pair_counts_ptr,
214+
int windowSize,
211215
// vectors for return values (per alignment)
212216
std::vector<std::string> &df_read_id,
213217
std::vector<double> &df_qscore,
@@ -335,13 +339,13 @@ int process_mismatch_bam_record(
335339

336340
} else if (mode == MISMATCHBAM_MODE_STATE) {
337341
// process modposref and modstate to update counter in pair_counts
338-
int maxdist = pair_counts.nrow() - 1, currdist = 0;
342+
int maxdist = windowSize - 1, currdist = 0;
339343
for (i = 0; i < modposref.size(); i++) {
340344
for (j = i; j < modposref.size(); j++) {
341-
currdist = modposref[j] - modposref[i];
342-
if (currdist > maxdist)
343-
break;
344-
pair_counts(currdist, 2 * modstate[i] + modstate[j])++;
345+
currdist = std::abs(modposref[j] - modposref[i]);
346+
if (currdist <= maxdist) {
347+
pair_counts_ptr[((2 * modstate[i] + modstate[j]) * windowSize) + currdist]++;
348+
}
345349
}
346350
}
347351
}
@@ -523,8 +527,11 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
523527
Rcpp::CharacterVector df_variant_label;
524528
Rcpp::CharacterVector df_ref_strand;
525529

526-
// ... return value for mode 3
530+
// ... return value for mode 3, and native vector to hold
531+
// counts while processing
527532
Rcpp::NumericMatrix pair_counts;
533+
std::vector<double> native_counts(windowSize * 4, 0.0);
534+
528535
// random number generation
529536
// first generate a single random number from R
530537
// to link the C++ RNG to the R session's current seed state
@@ -569,7 +576,6 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
569576
// start reading according to analysis mode
570577
if (windowSize > 0) {
571578
// Modes 3 or 4 (state-pair counting)
572-
pair_counts = Rcpp::NumericMatrix(windowSize, 4);
573579

574580
if (n_alns_to_sample > 0) {
575581
// Mode 4: random-sampling-based counting of pairs of bases by distance and modification state
@@ -639,7 +645,8 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
639645
chrom,
640646
ref_position,
641647
mod_prob,
642-
pair_counts,
648+
native_counts.data(),
649+
windowSize,
643650
// vectors for return values (per alignment)
644651
df_read_id,
645652
df_qscore,
@@ -691,7 +698,8 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
691698
n_unaligned, // number of unaligned positions
692699
n_total, // total number of positions
693700
// vectors for return values (per modification)
694-
pair_counts);
701+
native_counts.data(),
702+
windowSize);
695703
}
696704

697705
// remove now processed record from map
@@ -746,7 +754,8 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
746754
chrom,
747755
ref_position,
748756
mod_prob,
749-
pair_counts,
757+
native_counts.data(),
758+
windowSize,
750759
// vectors for return values (per alignment)
751760
df_read_id,
752761
df_qscore,
@@ -837,7 +846,8 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
837846
chrom,
838847
ref_position,
839848
mod_prob,
840-
pair_counts,
849+
native_counts.data(),
850+
windowSize,
841851
// vectors for return values (per alignment)
842852
df_read_id,
843853
df_qscore,
@@ -911,6 +921,8 @@ Rcpp::List read_mismatchbam_cpp(std::string inname_str,
911921
if (windowSize > 0) {
912922
// Mode 3 or 4
913923
// create return list
924+
pair_counts = Rcpp::NumericMatrix(windowSize, 4);
925+
std::copy(native_counts.begin(), native_counts.end(), pair_counts.begin());
914926
res = Rcpp::List::create(
915927
Rcpp::_["pair_counts"] = pair_counts
916928
);

0 commit comments

Comments
 (0)