Skip to content

Commit ba09d5d

Browse files
committed
Merge branch 'dev-static-reg'
2 parents f9f1300 + d70ee36 commit ba09d5d

File tree

14 files changed

+1287
-1249
lines changed

14 files changed

+1287
-1249
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ ifeq ($(UNAME_S),Linux) # Linux
4545
LIB += -static-libgcc -static-libstdc++
4646
ifneq ($(opt_lib),)
4747
LIB = $(HTSLIB) $(ABPOA_LIB) $(WFA2_LIB) -static-libgcc -static-libstdc++ -L${opt_lib} -lm -lz -lpthread -llzma -lbz2 -lcurl -lssl -lcrypto -lssh2 -ldeflate -lzstd
48+
else
49+
ifneq ($(OPT_LIB),)
50+
LIB = $(HTSLIB) $(ABPOA_LIB) $(WFA2_LIB) -static-libgcc -static-libstdc++ -L${OPT_LIB} -lm -lz -lpthread -llzma -lbz2 -lcurl -lssl -lcrypto -lssh2 -ldeflate -lzstd
51+
endif
4852
endif
4953
endif
5054
endif

README.md

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
<!-- # LongcallD: local-haplotagging-based small and structural variant calling -->
22

33
<!-- [![Latest Release](https://img.shields.io/github/release/yangao07/longcallD.svg?label=Release)](https://github.com/yangao07/longcallD/releases/latest) -->
4+
<!-- [![Github All Releases](https://img.shields.io/github/downloads/yangao07/longcallD/total.svg?label=Download)](https://github.com/yangao07/longcallD/releases) -->
45
[![C/C++ CI](https://github.com/yangao07/longcallD/actions/workflows/linux-CI.yml/badge.svg)](https://github.com/yangao07/longcallD/actions/workflows/linux-CI.yml)
56
[![C/C++ CI](https://github.com/yangao07/longcallD/actions/workflows/macos-CI.yml/badge.svg)](https://github.com/yangao07/longcallD/actions/workflows/macos-CI.yml)
67
[![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/longcallD/blob/main/LICENSE)
7-
<!-- [![Github All Releases](https://img.shields.io/github/downloads/yangao07/longcallD/total.svg?label=Download)](https://github.com/yangao07/longcallD/releases) -->
88
<!-- [![BioConda Install](https://img.shields.io/conda/dn/bioconda/longcallD.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/longcallD) -->
99
<!-- [![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-blue.svg)](https://dx.doi.org/10.1093/bioinformatics/btaa963) -->
1010
<!-- [![GitHub Issues](https://img.shields.io/github/issues/yangao07/longcallD.svg?label=Issues)](https://github.com/yangao07/longcallD/issues) -->
11-
## Updates (pre-release v0.0.3)
11+
## Updates (pre-release v0.0.4)
1212

13-
* Fix a couple of corner cases
13+
* Use static regions for multi-thread computing
14+
* Extended phase set
15+
* Fixed HP/PS tags in output phased bam
16+
* Fixed a few edge cases
1417

1518
## Getting Started
1619
```sh
@@ -19,11 +22,11 @@
1922

2023
# Download pre-built executables and test data (recommended)
2124
# Linux
22-
wget https://github.com/yangao07/longcallD/releases/download/v0.0.3/longcallD-v0.0.3_x64-linux.tar.gz
23-
tar -zxvf longcallD-v0.0.3_x64-linux.tar.gz && cd longcallD-v0.0.3_x64-linux
25+
wget https://github.com/yangao07/longcallD/releases/download/v0.0.4/longcallD-v0.0.4_x64-linux.tar.gz
26+
tar -zxvf longcallD-v0.0.4_x64-linux.tar.gz && cd longcallD-v0.0.4_x64-linux
2427
# MacOS
25-
wget https://github.com/yangao07/longcallD/releases/download/v0.0.3/longcallD-v0.0.3_arm64-macos.tar.gz
26-
tar -zxvf longcallD-v0.0.3_arm64-macos.tar.gz && cd longcallD-v0.0.3_arm64-macos
28+
wget https://github.com/yangao07/longcallD/releases/download/v0.0.4/longcallD-v0.0.4_arm64-macos.tar.gz
29+
tar -zxvf longcallD-v0.0.4_arm64-macos.tar.gz && cd longcallD-v0.0.4_arm64-macos
2730

2831
# PacBio HiFi reads
2932
./longcallD call ./test_data/chr11_2M.fa ./test_data/HG002_chr11_hifi_test.bam --hifi > HG002_hifi_test.vcf
@@ -35,7 +38,7 @@ man ./longcallD.1
3538
``` -->
3639

3740
## Table of Contents
38-
- [Updates (pre-release v0.0.3)](#updates-pre-release-v003)
41+
- [Updates (pre-release v0.0.4)](#updates-pre-release-v004)
3942
- [Getting Started](#getting-started)
4043
- [Table of Contents](#table-of-contents)
4144
- [Introduction](#introduction)
@@ -60,22 +63,22 @@ LongcallD phases long reads into haplotypes using SNPs and small indels before c
6063
### Pre-built executables (recommended)
6164
**For Linux:**
6265
```
63-
wget https://github.com/yangao07/longcallD/releases/download/v0.0.3/longcallD-v0.0.3_x64-linux.tar.gz
64-
tar -zxvf longcallD-v0.0.3_x64-linux.tar.gz
66+
wget https://github.com/yangao07/longcallD/releases/download/v0.0.4/longcallD-v0.0.4_x64-linux.tar.gz
67+
tar -zxvf longcallD-v0.0.4_x64-linux.tar.gz
6568
```
6669
**For macOS:**
6770
```
68-
wget https://github.com/yangao07/longcallD/releases/download/v0.0.3/longcallD-v0.0.3_arm64-macos.tar.gz
69-
tar -zxvf longcallD-v0.0.3_arm64-macos.tar.gz
71+
wget https://github.com/yangao07/longcallD/releases/download/v0.0.4/longcallD-v0.0.4_arm64-macos.tar.gz
72+
tar -zxvf longcallD-v0.0.4_arm64-macos.tar.gz
7073
```
7174

7275
### Build from source
7376
To compile longcallD from source, ensure you have **GCC/clang(9.0+)** and **zlib** installed.
7477
It is recommended to use the [latest release](https://github.com/yangao07/longcallD/releases).
7578
```
76-
wget https://github.com/yangao07/longcallD/releases/download/v0.0.3/longcallD-v0.0.3.tar.gz
77-
tar -zxvf longcallD-v0.0.3.tar.gz
78-
cd longcallD-v0.0.3; make
79+
wget https://github.com/yangao07/longcallD/releases/download/v0.0.4/longcallD-v0.0.4.tar.gz
80+
tar -zxvf longcallD-v0.0.4.tar.gz
81+
cd longcallD-v0.0.4; make
7982
```
8083

8184
## Usage
@@ -91,6 +94,8 @@ LongcallD supports region-based variant calling, similar to `samtools view`.
9194
```
9295
longcallD call -t16 ref.fa hifi.bam chr11:10,229,956-10,256,221 > hifi_reg.vcf
9396
longcallD call -t16 ref.fa hifi.bam chr11:10,229,956-10,256,221 chr12:10,576,356-10,583,438 > hifi_regs.vcf
97+
longcallD call -t16 ref.fa hifi.bam --region-file reg.bed > hifi_regs.vcf
98+
longcallD call -t16 ref.fa hifi.bam --autosome > hifi_autosome.vcf
9499
```
95100

96101
### Variant calling and output phased long reads

src/align.c

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
357357
}
358358
if (target_end != -1 && query_end != -1) break;
359359
}
360-
if (query_end == -1) query_end = target_end;
360+
if (query_end == -1) query_end = target_end; // no = operation, query_end is the last position
361361
assert(query_end <= target_end);
362362
aln_str->aln_len = target_end+1;
363363
aln_str->target_beg = 0; aln_str->target_end = target_end;
@@ -380,7 +380,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
380380
}
381381
if (target_start != -1 && query_start != -1) break;
382382
}
383-
if (query_start == -1) query_start = target_start; // no = operation
383+
if (query_start == -1) query_start = target_start; // no = operation, query_start is the first position
384384
assert(query_start >= target_start);
385385
aln_str->aln_len = aln_str->aln_len - target_start;
386386
if (target_start != 0) {
@@ -407,7 +407,7 @@ int wfa_collect_aln_str(const call_var_opt_t *opt, uint8_t *target, int tlen, ui
407407
aln_str->target_aln = 0; aln_str->query_aln = 0; aln_str->aln_len = 0;
408408
int gap_aln = opt->gap_aln, a = opt->match, b = opt->mismatch, q = opt->gap_open1, e = opt->gap_ext1, q2 = opt->gap_open2, e2 = opt->gap_ext2;
409409
if (full_cover == 3) {
410-
wfa_end2end_aln(target, tlen, query, qlen, opt->gap_aln, a, b, q, e, q2, e2,
410+
wfa_end2end_aln(target, tlen, query, qlen, gap_aln, a, b, q, e, q2, e2,
411411
NULL, NULL, &aln_str->target_aln, &aln_str->query_aln, &aln_str->aln_len);
412412
aln_str->target_beg = 0; aln_str->target_end = aln_str->aln_len-1;
413413
aln_str->query_beg = 0; aln_str->query_end = aln_str->aln_len-1;
@@ -430,8 +430,9 @@ int wfa_collect_aln_str(const call_var_opt_t *opt, uint8_t *target, int tlen, ui
430430

431431
int end2end_aln(const call_var_opt_t *opt, char *tseq, int tlen, uint8_t *qseq, int qlen, uint32_t **cigar_buf) {
432432
if (qlen <= 0 || tlen <= 0) return 0;
433-
int min_len = MIN_OF_TWO(tlen, qlen), max_len = MAX_OF_TWO(tlen, qlen);
434-
int delta_len = MAX_OF_TWO(1, max_len - min_len);
433+
// int min_len = MIN_OF_TWO(tlen, qlen)
434+
int max_len = MAX_OF_TWO(tlen, qlen);
435+
// int delta_len = MAX_OF_TWO(1, max_len - min_len);
435436

436437
uint8_t *tseq2 = (uint8_t*)malloc(max_len);
437438
for (int i = 0; i < tlen; ++i) tseq2[i] = nst_nt4_table[(uint8_t)tseq[i]];
@@ -614,7 +615,7 @@ int abpoa_partial_aln_msa_cons(const call_var_opt_t *opt, abpoa_t *ab, int wb, i
614615
}
615616
n_cons = abc->n_cons;
616617
} else {
617-
fprintf(stderr, "Unable to call consensus: %s\n", names[0]);
618+
if (LONGCALLD_VERBOSE >= 2) fprintf(stderr, "Unable to call consensus: %s\n", names[0]);
618619
}
619620
}
620621

@@ -753,7 +754,7 @@ void sort_by_full_cover_and_length(int n_reads, int *read_ids, int *read_lens, u
753754
}
754755
}
755756

756-
int add_phase_set(hts_pos_t ps, hts_pos_t *uniq_phase_sets, int *n_uniq_phase_sets) {
757+
static int add_phase_set(hts_pos_t ps, hts_pos_t *uniq_phase_sets, int *n_uniq_phase_sets) {
757758
int i;
758759
for (i = 0; i < *n_uniq_phase_sets; ++i) {
759760
if (uniq_phase_sets[i] == ps) return i;
@@ -842,7 +843,7 @@ int wfa_collect_noisy_aln_str_no_ps_hap(const call_var_opt_t *opt, int n_reads,
842843
int n_cons = 0;
843844
if (n_full_reads == 0) goto collect_noisy_msa_cons_no_ps_hap_end;
844845

845-
int hp_flank_start, hp_flank_end, hp_len;
846+
// int hp_flank_start, hp_flank_end, hp_len;
846847
// if (opt->is_ont) { //opt->ont_hp_profile != NULL) {
847848
// XXX two cons
848849
// if (opt->is_ont && cons_is_homopolymer(ref_seq, ref_seq_len, opt->noisy_reg_flank_len, &hp_flank_start, &hp_flank_end, &hp_len)) { // for ont & homopolymer, do Bayesian inference instead of consensus calling
@@ -942,7 +943,7 @@ hts_pos_t collect_phase_set_with_both_haps(int n_reads, int *read_haps, int *rea
942943
}
943944
}
944945
hts_pos_t max_ps = -1; int max_ps_i = -1;
945-
int max_ps_full_read_count1 = -1, max_ps_full_read_count2 = -1, max_ps_all_read_count1 = -1, max_ps_all_read_count2 = -1;
946+
int max_ps_full_read_count1 = -1, max_ps_full_read_count2 = -1;
946947
for (int i = 0; i < n_uniq_phase_sets; ++i) {
947948
int phase_set_full_read_count1 = phase_set_to_hap_full_read_count[i][0] < phase_set_to_hap_full_read_count[i][1] ? phase_set_to_hap_full_read_count[i][0] : phase_set_to_hap_full_read_count[i][1];
948949
int phase_set_full_read_count2 = phase_set_to_hap_full_read_count[i][0] > phase_set_to_hap_full_read_count[i][1] ? phase_set_to_hap_full_read_count[i][0] : phase_set_to_hap_full_read_count[i][1];
@@ -1156,7 +1157,6 @@ int wfa_collect_noisy_aln_str_with_ps_hap(const call_var_opt_t *opt, int n_reads
11561157
if (lens[i] <= 0 || phase_sets[i] != ps || haps[i] != hap) continue;
11571158
if (use_non_full == 0 && fully_covers[i] != 3) continue;
11581159
// cons vs read
1159-
int read_beg, read_end;
11601160
wfa_collect_aln_str(opt, cons_seqs[hap-1], cons_lens[hap-1], seqs[i], lens[i], fully_covers[i], LONGCALLD_CONS_READ_ALN_STR(clu_aln_str, n_ps_hap_reads));
11611161
n_ps_hap_reads++;
11621162
}
@@ -1186,9 +1186,10 @@ int collect_noisy_read_info(const call_var_opt_t *opt, bam_chunk_t *chunk, hts_p
11861186
int read_i = noisy_reg_reads[i];
11871187
digar_t *read_digars = chunk->digars+read_i; int n_digar = read_digars->n_digar; digar1_t *digars = read_digars->digars;
11881188
hts_pos_t reg_digar_beg = -1, reg_digar_end = -1;
1189-
int reg_read_beg = 0, reg_read_end = bam_cigar2qlen(chunk->reads[read_i]->core.n_cigar, bam_get_cigar(chunk->reads[read_i]))-1;
1190-
(*read_names)[i] = bam_get_qname(chunk->reads[read_i]);
1191-
(*strands)[i] = bam_is_rev(chunk->reads[read_i]);
1189+
int reg_read_beg = 0, reg_read_end = digar2qlen(read_digars)-1;
1190+
if (LONGCALLD_VERBOSE >= 2) (*read_names)[i] = bam_get_qname(chunk->reads[read_i]);
1191+
else (*read_names)[i] = NULL;
1192+
(*strands)[i] = read_digars->is_rev;
11921193
int beg_is_del = 0, end_is_del = 0;
11931194
for (int i = 0; i < n_digar; ++i) {
11941195
hts_pos_t digar_beg = digars[i].pos, digar_end;
@@ -1240,7 +1241,7 @@ int collect_noisy_read_info(const call_var_opt_t *opt, bam_chunk_t *chunk, hts_p
12401241
}
12411242
(*read_lens)[i] = reg_read_end - reg_read_beg + 1;
12421243
(*read_haps)[i] = chunk->haps[read_i];
1243-
(*phase_sets)[i] = chunk->PS[read_i];
1244+
(*phase_sets)[i] = chunk->phase_sets[read_i];
12441245
}
12451246
return 0;
12461247
}

0 commit comments

Comments
 (0)