Skip to content

Commit f3efa4f

Browse files
committed
pre-v0.0.3
1 parent f09ba2e commit f3efa4f

File tree

4 files changed

+23
-78
lines changed

4 files changed

+23
-78
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
<!-- [![BioConda Install](https://img.shields.io/conda/dn/bioconda/longcallD.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/longcallD) -->
99
<!-- [![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-blue.svg)](https://dx.doi.org/10.1093/bioinformatics/btaa963) -->
1010
<!-- [![GitHub Issues](https://img.shields.io/github/issues/yangao07/longcallD.svg?label=Issues)](https://github.com/yangao07/longcallD/issues) -->
11-
## Updates (pre-release v0.0.2)
11+
## Updates (pre-release v0.0.3)
1212

13-
* Add -M/--min-mapq
14-
* Fix check_eqx_cigar
13+
* Fix a couple of corner cases
1514

1615
## Getting Started
1716
```sh

src/align.c

Lines changed: 17 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
357357
}
358358
if (target_end != -1 && query_end != -1) break;
359359
}
360+
if (query_end == -1) query_end = target_end;
360361
assert(query_end <= target_end);
361362
aln_str->aln_len = target_end+1;
362363
aln_str->target_beg = 0; aln_str->target_end = target_end;
@@ -379,6 +380,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
379380
}
380381
if (target_start != -1 && query_start != -1) break;
381382
}
383+
if (query_start == -1) query_start = target_start; // no = operation
382384
assert(query_start >= target_start);
383385
aln_str->aln_len = aln_str->aln_len - target_start;
384386
if (target_start != 0) {
@@ -630,73 +632,6 @@ int abpoa_partial_aln_msa_cons(const call_var_opt_t *opt, abpoa_t *ab, int wb, i
630632
return n_cons;
631633
}
632634

633-
// reads are sorted by full-cover and length before calling abPOA
634-
// use reads with full_cover == 3 as backbone for poa
635-
// skip reads with full_cover == 0
636-
int abpoa_partial_aln(const call_var_opt_t *opt, int n_reads, uint8_t **read_seqs, int *read_lens, int *read_full_cover, char **names, int max_n_cons, int *cons_lens, uint8_t **cons_seqs) {
637-
abpoa_t *ab = abpoa_init();
638-
abpoa_para_t *abpt = abpoa_init_para();
639-
// abpt->wb = -1;
640-
abpt->out_msa = 0;
641-
// abp->cons_algrm = ABPOA_MF;
642-
abpt->inc_path_score = 1;
643-
abpt->max_n_cons = max_n_cons;
644-
// msa
645-
abpt->out_cons = 1;
646-
abpt->match = opt->match; abpt->mismatch = opt->mismatch;
647-
abpt->gap_open1 = opt->gap_open1; abpt->gap_ext1 = opt->gap_ext1;
648-
abpt->gap_open2 = opt->gap_open2; abpt->gap_ext2 = opt->gap_ext2;
649-
if (LONGCALLD_VERBOSE >= 2) abpt->out_msa = 1;
650-
abpoa_post_set_para(abpt);
651-
ab->abs->n_seq = n_reads;
652-
for (int i = 0; i < n_reads; ++i) {
653-
if (LONGCALLD_VERBOSE >= 2) {
654-
fprintf(stderr, ">%s %d %d\n", names[i], read_lens[i], read_full_cover[i]);
655-
for (int j = 0; j < read_lens[i]; ++j) {
656-
fprintf(stderr, "%c", "ACGTN"[read_seqs[i][j]]);
657-
} fprintf(stderr, "\n");
658-
}
659-
abpoa_res_t res;
660-
res.graph_cigar = 0, res.n_cigar = 0;
661-
int exc_beg = 0, exc_end = 1, seq_beg_cut = 0, seq_end_cut = 0;
662-
if (i != 0) {
663-
int ref_beg, ref_end, read_beg, read_end, beg_id, end_id;
664-
if (collect_partial_aln_beg_end(opt->gap_aln, opt->match, opt->mismatch, opt->gap_open1, opt->gap_ext1, opt->gap_open2, opt->gap_ext2,
665-
read_seqs[0], read_lens[0], read_full_cover[0], read_seqs[i], read_lens[i], read_full_cover[i], &ref_beg, &ref_end, &read_beg, &read_end) == 0) {
666-
if (LONGCALLD_VERBOSE >= 2) fprintf(stderr, "Skipped in POA: %s\n", names[i]);
667-
continue;
668-
}
669-
beg_id = ref_beg+1, end_id = ref_end+1;
670-
seq_beg_cut = read_beg - 1, seq_end_cut = read_lens[i] - read_end;
671-
abpoa_subgraph_nodes(ab, abpt, beg_id, end_id, &exc_beg, &exc_end);
672-
}
673-
if (LONGCALLD_VERBOSE >= 2) fprintf(stderr, "ExcBeg: %d, ExcEnd: %d, SeqBegCut: %d, SeqEndCut: %d\n", exc_beg, exc_end, seq_beg_cut, seq_end_cut);
674-
abpoa_align_sequence_to_subgraph(ab, abpt, exc_beg, exc_end, read_seqs[i]+seq_beg_cut, read_lens[i]-seq_beg_cut-seq_end_cut, &res);
675-
abpoa_add_subgraph_alignment(ab, abpt, exc_beg, exc_end, read_seqs[i]+seq_beg_cut, NULL, read_lens[i]-seq_beg_cut-seq_end_cut, NULL, res, i, n_reads, 0);
676-
if (res.n_cigar) free(res.graph_cigar);
677-
}
678-
if (LONGCALLD_VERBOSE >= 2) abpoa_output(ab, abpt, stderr);
679-
else abpoa_output(ab, abpt, NULL);
680-
abpoa_cons_t *abc = ab->abc;
681-
682-
int n_cons = 0;
683-
if (abc->n_cons > 0) {
684-
for (int i = 0; i < abc->n_cons; ++i) {
685-
cons_lens[i] = abc->cons_len[i];
686-
cons_seqs[i] = (uint8_t*)malloc(abc->cons_len[i] * sizeof(uint8_t));
687-
for (int j = 0; j < abc->cons_len[i]; ++j) {
688-
cons_seqs[i][j] = abc->cons_base[i][j];
689-
}
690-
if (LONGCALLD_VERBOSE >= 2) fprintf(stderr, "ConsLen: %d\n", abc->cons_len[i]);
691-
}
692-
n_cons = abc->n_cons;
693-
} else {
694-
fprintf(stderr, "Unable to call consensus: %s\n", names[0]);
695-
}
696-
abpoa_free(ab); abpoa_free_para(abpt);
697-
return n_cons;
698-
}
699-
700635
// XXX limit abpoa memory usage, avoid memory allocation failure
701636
int abpoa_aln_msa_cons(const call_var_opt_t *opt, int wb, int n_reads, int *read_ids, uint8_t **read_seqs, int *read_lens, int max_n_cons,
702637
int *cons_lens, uint8_t **cons_seqs,
@@ -1254,6 +1189,7 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12541189
int reg_read_beg = 0, reg_read_end = bam_cigar2qlen(chunk->reads[read_i]->core.n_cigar, bam_get_cigar(chunk->reads[read_i]))-1;
12551190
(*read_names)[i] = bam_get_qname(chunk->reads[read_i]);
12561191
(*strands)[i] = bam_is_rev(chunk->reads[read_i]);
1192+
int beg_is_del = 0, end_is_del = 0;
12571193
for (int i = 0; i < n_digar; ++i) {
12581194
hts_pos_t digar_beg = digars[i].pos, digar_end;
12591195
int op = digars[i].type, len = digars[i].len, qi = digars[i].qi;
@@ -1266,6 +1202,7 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12661202
if (op == BAM_CDEL) {
12671203
reg_digar_beg = reg_beg;
12681204
reg_read_beg = qi;
1205+
beg_is_del = 1;
12691206
} else {
12701207
reg_digar_beg = reg_beg;
12711208
reg_read_beg = qi + (reg_beg - digar_beg);
@@ -1275,16 +1212,25 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12751212
if (op == BAM_CDEL) {
12761213
reg_digar_end = reg_end;
12771214
reg_read_end = qi-1;
1215+
end_is_del = 1;
12781216
} else {
12791217
reg_digar_end = reg_end;
12801218
reg_read_end = qi + (reg_end - digar_beg);
12811219
}
12821220
}
12831221
}
1284-
if (reg_digar_beg == reg_beg && reg_digar_end == reg_end) (*fully_covers)[i] = 3;
1285-
else if (reg_digar_beg == reg_beg) (*fully_covers)[i] = 1;
1286-
else if (reg_digar_end == reg_end) (*fully_covers)[i] = 2;
1287-
else (*fully_covers)[i] = 0;
1222+
if (reg_digar_beg == reg_beg && reg_digar_end == reg_end) {
1223+
if (beg_is_del == 0 && end_is_del == 0) (*fully_covers)[i] = 3;
1224+
else if (beg_is_del == 0 && end_is_del == 1) (*fully_covers)[i] = 1;
1225+
else if (beg_is_del == 1 && end_is_del == 0) (*fully_covers)[i] = 2;
1226+
else (*fully_covers)[i] = 0;
1227+
} else if (reg_digar_beg == reg_beg) {
1228+
if (beg_is_del) (*fully_covers)[i] = 0;
1229+
else (*fully_covers)[i] = 1;
1230+
} else if (reg_digar_end == reg_end) {
1231+
if (end_is_del) (*fully_covers)[i] = 0;
1232+
else (*fully_covers)[i] = 2;
1233+
} else (*fully_covers)[i] = 0;
12881234
// if (2*(reg_read_end-reg_read_beg+1) < (reg_end-reg_beg+1)) return 0;
12891235
(*read_seqs)[i] = (uint8_t*)malloc((reg_read_end - reg_read_beg + 1) * sizeof(uint8_t));
12901236
(*read_quals)[i] = (uint8_t*)malloc((reg_read_end - reg_read_beg + 1) * sizeof(uint8_t));

src/collect_var.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ int var_is_repeat_region(char *ref_seq, hts_pos_t ref_beg, hts_pos_t ref_end, ca
199199
int del_len = ref_len;
200200
len = del_len * 3; // see if del seq is 3-fold repeat
201201
if (pos < ref_beg || pos+del_len+len >= ref_end) {
202-
fprintf(stderr, "DelLen: %d, RefLen: %d, Pos: %" PRId64 ", RefBeg: %" PRId64 ", RefEnd: %" PRId64 "\n", del_len, ref_len, pos, ref_beg, ref_end);
202+
// fprintf(stderr, "DelLen: %d, RefLen: %d, Pos: %" PRId64 ", RefBeg: %" PRId64 ", RefEnd: %" PRId64 "\n", del_len, ref_len, pos, ref_beg, ref_end);
203203
return 0;
204204
}
205205
ref_bseq = get_bseq(ref_seq+pos-ref_beg, len);
@@ -213,7 +213,7 @@ int var_is_repeat_region(char *ref_seq, hts_pos_t ref_beg, hts_pos_t ref_end, ca
213213
int ins_len = var->alt_len;
214214
len = ins_len * 3; // see if ins seq is 3-fold repeat
215215
if (pos < ref_beg || pos+len >= ref_end) {
216-
fprintf(stderr, "InsLen: %d, RefLen: %d, Pos: %" PRId64 ", RefBeg: %" PRId64 ", RefEnd: %" PRId64 "\n", ins_len, ref_len, pos, ref_beg, ref_end);
216+
// fprintf(stderr, "InsLen: %d, RefLen: %d, Pos: %" PRId64 ", RefBeg: %" PRId64 ", RefEnd: %" PRId64 "\n", ins_len, ref_len, pos, ref_beg, ref_end);
217217
return 0;
218218
}
219219
ref_bseq = get_bseq(ref_seq+pos-ref_beg, len);

src/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
const char PROG[20] = "longcallD";
1010
const char DESCRIP[100] = "local-haplotagging-based small and structural variant calling";
11-
const char VERSION[20] = "0.0.2";
11+
const char VERSION[20] = "0.0.3";
1212
const char CONTACT[30] = "yangao@ds.dfci.harvard.edu";
1313
int LONGCALLD_VERBOSE = 0;
1414
char *CMD;
@@ -49,4 +49,4 @@ int main(int argc, char *argv[]) {
4949
}
5050
}
5151
free(CMD); return ret;
52-
}
52+
}

0 commit comments

Comments
 (0)