Skip to content

Commit 6ee7bad

Browse files
committed
fix reversed pam (e.g., cpf1)
1 parent 2405b88 commit 6ee7bad

File tree

1 file changed

+73
-20
lines changed

1 file changed

+73
-20
lines changed

cas-offinder.cpp

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@ void Cas_OFFinder::writeHeaders(const char* outfilename) {
329329

330330
void Cas_OFFinder::compareAll(const char* outfilename, bool issummary) {
331331
unsigned int i, j, dev_index;
332-
unsigned int bulge_size;
332+
unsigned int bulge_size, bulge_index;
333+
bool is_reversed_pam;
333334
cl_ushort threshold;
334335
string compare;
335336
string seq_rna;
@@ -399,22 +400,47 @@ void Cas_OFFinder::compareAll(const char* outfilename, bool issummary) {
399400
for (j = 0; ((j < m_chrpos.size()) && (loci >= m_chrpos[j])); j++) idx = j;
400401
for (auto &bi: ci.second.second) {
401402
seq_dna = string(strbuf);
403+
if (bi.second < 0) {
404+
is_reversed_pam = true;
405+
bulge_index = -bi.second;
406+
} else {
407+
is_reversed_pam = false;
408+
bulge_index = bi.second;
409+
}
402410
if (isnumeric(bi.first)) {
403411
// dna bulge or none
404412
bulge_size = (unsigned int)stoi(bi.first);
405-
offset = m_dnabulgesize - bulge_size;
406-
seq_rna = compare.substr(offset, bi.second) + string(bulge_size, '-') + compare.substr(offset + bi.second + bulge_size);
407-
seq_dna = seq_dna.substr(offset);
408-
if (bulge_size == 0)
413+
if (is_reversed_pam) {
414+
offset = 0;
415+
seq_rna = compare.substr(0, bulge_index) + string(bulge_size, '-') + compare.substr(bulge_index + bulge_size);
416+
} else {
417+
offset = m_dnabulgesize - bulge_size;
418+
seq_rna = compare.substr(offset, bulge_index) + string(bulge_size, '-') + compare.substr(offset + bulge_index + bulge_size);
419+
seq_dna = seq_dna.substr(offset);
420+
}
421+
if (bulge_size == 0) {
409422
bulge_type = "X";
410-
else
423+
if (is_reversed_pam) {
424+
seq_rna = seq_rna.substr(0, seq_rna.size() - m_dnabulgesize);
425+
seq_dna = seq_dna.substr(0, seq_dna.size() - m_dnabulgesize);
426+
}
427+
} else {
411428
bulge_type = "DNA";
429+
}
412430
} else {
413431
// rna bulge
414432
bulge_size = (unsigned int)bi.first.size();
415-
offset = m_dnabulgesize + bulge_size;
416-
seq_rna = compare.substr(offset, bi.second) + bi.first + compare.substr(bi.second + bulge_size);
417-
seq_dna = seq_dna.substr(offset, bi.second) + string(bulge_size, '-') + seq_dna.substr(bi.second + bulge_size);
433+
if (is_reversed_pam) {
434+
offset = 0;
435+
seq_rna = compare.substr(0, bulge_index) + bi.first + compare.substr(bulge_index);
436+
seq_dna = seq_dna.substr(0, bulge_index) + string(bulge_size, '-') + seq_dna.substr(bulge_index);
437+
seq_rna = seq_rna.substr(0, seq_rna.size() - m_dnabulgesize - bulge_size);
438+
seq_dna = seq_dna.substr(0, seq_dna.size() - m_dnabulgesize - bulge_size);
439+
} else {
440+
offset = m_dnabulgesize + bulge_size;
441+
seq_rna = compare.substr(offset, bulge_index) + bi.first + compare.substr(offset + bulge_index);
442+
seq_dna = seq_dna.substr(offset, bulge_index) + string(bulge_size, '-') + seq_dna.substr(offset + bulge_index);
443+
}
418444
bulge_type = "RNA";
419445
}
420446
(*fo) << id << "\t" << bulge_type << "\t" << seq_rna << "\t" << seq_dna << "\t" << m_chrnames[idx] << "\t" << loci - m_chrpos[idx] + offset << "\t" << m_directions[dev_index][i] << "\t" << m_mmcounts[dev_index][i] << "\t" << bulge_size << endl;
@@ -566,6 +592,8 @@ void Cas_OFFinder::parseInput(istream& input) {
566592
m_compare_t::iterator it; // used in `add_compare`
567593
compareinfo ci;
568594
bulgeinfo bi;
595+
bool is_reversed_pam = false;
596+
string compare, tmp;
569597

570598
try {
571599
if (!input.good())
@@ -584,7 +612,12 @@ void Cas_OFFinder::parseInput(istream& input) {
584612
m_dnabulgesize = atoi(sline[1].c_str());
585613
m_rnabulgesize = atoi(sline[2].c_str());
586614
}
587-
m_pattern = string(m_dnabulgesize, 'N') + sline[0];
615+
if (sline[0][0] != 'N') { // As of today (2021.07.29), there is no reversed PAM starts with 'N'
616+
is_reversed_pam = true;
617+
m_pattern = sline[0] + string(m_dnabulgesize, 'N');
618+
} else {
619+
m_pattern = string(m_dnabulgesize, 'N') + sline[0];
620+
}
588621
transform(m_pattern.begin(), m_pattern.end(), m_pattern.begin(), ::toupper);
589622

590623
size_t linecnt = 0;
@@ -607,20 +640,40 @@ void Cas_OFFinder::parseInput(istream& input) {
607640
else
608641
id = to_string(linecnt);
609642
ci = make_pair(id, threshold);
610-
bi = make_pair("0", 0);
611-
add_compare(string(m_dnabulgesize, 'N') + sline[0], ci, bi);
612-
for (i = 1; i <= m_dnabulgesize; i++) {
613-
preNcnt = m_dnabulgesize - i;
614-
for (j = 1; j < sline[0].find_last_not_of('N') + 1; j++) {
615-
bi = make_pair(to_string(i), j);
616-
add_compare(string(preNcnt, 'N') + sline[0].substr(0, j) + string(i, 'N') + sline[0].substr(j), ci, bi);
643+
if (m_dnabulgesize == 0) {
644+
bi = make_pair("0", 0);
645+
add_compare(sline[0], ci, bi);
646+
} else {
647+
if (is_reversed_pam)
648+
tmp = sline[0] + string(m_dnabulgesize, 'N');
649+
else
650+
tmp = string(m_dnabulgesize, 'N') + sline[0];
651+
for (i = 1; i <= m_dnabulgesize; i++) {
652+
preNcnt = m_dnabulgesize - i;
653+
for (j = sline[0].find_first_not_of('N'); j < sline[0].find_last_not_of('N') + 2; j++) {
654+
if (is_reversed_pam) {
655+
compare = sline[0].substr(0, j) + string(i, 'N') + sline[0].substr(j) + string(preNcnt, 'N');
656+
} else {
657+
compare = string(preNcnt, 'N') + sline[0].substr(0, j) + string(i, 'N') + sline[0].substr(j);
658+
}
659+
if (compare == tmp)
660+
bi = make_pair("0", (is_reversed_pam?-j:j));
661+
else
662+
bi = make_pair(to_string(i), (is_reversed_pam?-j:j));
663+
add_compare(compare, ci, bi);
664+
}
617665
}
618666
}
619667
for (i = 1; i <= m_rnabulgesize; i++) {
620668
preNcnt = m_dnabulgesize + i;
621-
for (j = 1; j < sline[0].find_last_not_of('N') + 1 - i; j++) {
622-
bi = make_pair(sline[0].substr(j, i), j);
623-
add_compare(string(preNcnt, 'N') + sline[0].substr(0, j) + sline[0].substr(j + i), ci, bi);
669+
for (j = sline[0].find_first_not_of('N'); j < sline[0].find_last_not_of('N') + 1 - i; j++) {
670+
bi = make_pair(sline[0].substr(j, i), (is_reversed_pam?-j:j));
671+
if (is_reversed_pam) {
672+
compare = sline[0].substr(0, j) + sline[0].substr(j + i) + string(preNcnt, 'N');
673+
} else {
674+
compare = string(preNcnt, 'N') + sline[0].substr(0, j) + sline[0].substr(j + i);
675+
}
676+
add_compare(compare, ci, bi);
624677
}
625678
}
626679
if (entrycnt == 0) {

0 commit comments

Comments
 (0)