Skip to content

Commit 89c9f8c

Browse files
committed
merge branch develop
2 parents 61a8173 + d039f9a commit 89c9f8c

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

Makefile

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ SOURCE := source
33
STATIC_LIBS := $(shell mkdir -p libraries && echo libraries)
44

55
# compiler flags
6-
CXX := g++
76
CXXFLAGS := -Wall -Wno-parentheses -pthread -std=c++0x -O2
87

98
# make a statically linked binary by default and a dynamically linked one for bioconda
@@ -24,19 +23,19 @@ $(STATIC_LIBS)/tsl/htrie_map.h:
2423
$(WGET) 'https://github.com/Tessil/hat-trie/archive/v0.6.0.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
2524
cp -r $(STATIC_LIBS)/hat-trie-*/include/tsl $(STATIC_LIBS)
2625
$(STATIC_LIBS)/libdeflate.a:
27-
$(WGET) 'https://github.com/ebiggers/libdeflate/archive/v1.14.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
28-
cd $(STATIC_LIBS)/libdeflate-*/ && $(MAKE) libdeflate.a && cp libdeflate.a libdeflate.h ..
26+
$(WGET) 'https://github.com/ebiggers/libdeflate/archive/v1.24.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
27+
cd $(STATIC_LIBS)/libdeflate-*/ && $(CC) -O2 -Ilib -c lib/*.c lib/*/*.c && $(AR) rcs libdeflate.a *.o && cp libdeflate.a libdeflate.h ..
2928
$(STATIC_LIBS)/libz.a:
3029
$(WGET) 'https://zlib.net/fossils/zlib-1.3.1.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
3130
cd $(STATIC_LIBS)/zlib-*/ && ./configure && $(MAKE) libz.a && cp zlib.h zconf.h libz.a ..
3231
$(STATIC_LIBS)/libbz2.a:
3332
$(WGET) 'https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
3433
cd $(STATIC_LIBS)/bzip2-*/ && $(MAKE) libbz2.a bzip2 && cp libbz2.a bzlib.h ..
3534
$(STATIC_LIBS)/liblzma.a:
36-
$(WGET) 'https://sourceforge.net/projects/lzmautils/files/xz-5.6.4.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
35+
$(WGET) 'https://sourceforge.net/projects/lzmautils/files/xz-5.8.1.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
3736
cd $(STATIC_LIBS)/xz-*/ && ./configure && $(MAKE) && cp -r src/liblzma/.libs/liblzma.a src/liblzma/api/lzma src/liblzma/api/lzma.h ..
3837
$(STATIC_LIBS)/libhts.a: $(STATIC_LIBS)/libdeflate.a $(STATIC_LIBS)/libz.a $(STATIC_LIBS)/libbz2.a $(STATIC_LIBS)/liblzma.a
39-
$(WGET) 'https://github.com/samtools/htslib/releases/download/1.19.1/htslib-1.19.1.tar.bz2' | $(STATIC_LIBS)/bzip2-*/bzip2 -d -c - | tar -xf - -C $(STATIC_LIBS) && \
38+
$(WGET) 'https://github.com/samtools/htslib/releases/download/1.22.1/htslib-1.22.1.tar.bz2' | $(STATIC_LIBS)/bzip2-*/bzip2 -d -c - | tar -xf - -C $(STATIC_LIBS) && \
4039
cd $(STATIC_LIBS)/htslib-*/ && $(MAKE) config.h && sed -i -e 's/CURL/DEFLATE/' config.h && $(MAKE) NONCONFIGURE_OBJS="" CPPFLAGS="$(CPPFLAGS) -I.." libhts.a && cp -r libhts.a htslib ..
4140

4241
# cleanup routine

scripts/quantify_virus_expression.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ KMER_LENGTH="${MAX_LENGTH-12}" # identify related viral strains by looking for c
1515
MAX_SHARED_KMERS_PCT="${MAX_SHARED_KMERS-10}" # consider two viral strains to be related when they share this fraction (%) of kmers
1616
MIN_COVERED_GENOME_PCT="${MIN_COVERED_GENOME_PCT-5}" # ignore viruses with only a small fraction (%) of their genome covered
1717
MIN_COVERED_GENOME_BASES="${MIN_COVERED_GENOME_BASES-100}" # ignore viruses with only few bases of their genome covered
18+
TANDEM_REPEAT_REGEX='AA.?AA.?AA.?AA.?AA.?AA.?AA.?AA|AC.?AC.?AC.?AC.?AC.?AC.?AC.?AC.?|AG.?AG.?AG.?AG.?AG.?AG.?AG.?AG.?|AT.?AT.?AT.?AT.?AT.?AT.?AT.?AT.?|CA.?CA.?CA.?CA.?CA.?CA.?CA.?CA.?|CC.?CC.?CC.?CC.?CC.?CC.?CC.?CC.?|CG.?CG.?CG.?CG.?CG.?CG.?CG.?CG.?|CT.?CT.?CT.?CT.?CT.?CT.?CT.?CT.?|GA.?GA.?GA.?GA.?GA.?GA.?GA.?GA.?|GC.?GC.?GC.?GC.?GC.?GC.?GC.?GC.?|GG.?GG.?GG.?GG.?GG.?GG.?GG.?GG.?|GT.?GT.?GT.?GT.?GT.?GT.?GT.?GT.?|TA.?TA.?TA.?TA.?TA.?TA.?TA.?TA.?|TC.?TC.?TC.?TC.?TC.?TC.?TC.?TC.?|TG.?TG.?TG.?TG.?TG.?TG.?TG.?TG.?|TT.?TT.?TT.?TT.?TT.?TT.?TT.?TT.?'
1819

1920
# tell bash to abort on error
2021
set -e -u -o pipefail
@@ -36,7 +37,7 @@ echo "${EXPRESSED_VIRAL_CONTIGS-}" |
3637
samtools view -F 4 -h ${EXAMINE_ONLY_VIRAL_CONTIGS-} "$INPUT" |
3738

3839
# quantify expression of viral contigs
39-
awk -F '\t' -v OFS='\t' -v kmer_length="$KMER_LENGTH" -v max_shared_kmers_pct="$MAX_SHARED_KMERS_PCT" -v viral_contigs="$VIRAL_CONTIGS" -v min_covered_genome_pct="$MIN_COVERED_GENOME_PCT" -v min_covered_genome_bases="$MIN_COVERED_GENOME_BASES" -v total_mapped_reads="${TOTAL_MAPPED_READS-0}" '
40+
awk -F '\t' -v OFS='\t' -v kmer_length="$KMER_LENGTH" -v max_shared_kmers_pct="$MAX_SHARED_KMERS_PCT" -v viral_contigs="$VIRAL_CONTIGS" -v min_covered_genome_pct="$MIN_COVERED_GENOME_PCT" -v min_covered_genome_bases="$MIN_COVERED_GENOME_BASES" -v total_mapped_reads="${TOTAL_MAPPED_READS-0}" -v TANDEM_REPEAT_REGEX="$TANDEM_REPEAT_REGEX" '
4041
{
4142
if (/^@SQ\t/) { # this is a header line => remember sizes of viral genomes
4243
@@ -53,7 +54,7 @@ awk -F '\t' -v OFS='\t' -v kmer_length="$KMER_LENGTH" -v max_shared_kmers_pct="$
5354
if (($2 % 4 >= 2 || $2 % 2 == 0) && # reads must be aligned in proper pair, unless they are single-end
5455
match($3, viral_contigs) && # read must map to a viral contig
5556
$6 ~ /^[0-9NMX]+$/ && # only count fully aligned reads (i.e., CIGAR string contains only N, M, or X)
56-
$10 !~ /(AA.?){8}|(AC.?){8}|(AG.?){8}|(AT.?){8}|(CC.?){8}|(CG.?){8}|(CT.?){8}|(GG.?){8}|(GT.?){8}|(TT.?){8}/) { # ignore tandem repeat regions
57+
!match($10, TANDEM_REPEAT_REGEX)) { # ignore tandem repeat regions
5758
5859
id = ids[$3]
5960
viral_mapped_reads[id]++

source/read_chimeric_alignments.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -420,22 +420,21 @@ unsigned int remove_malformed_alignments(chimeric_alignments_t& chimeric_alignme
420420
chimeric_alignment->second[SPLIT_READ].supplementary = false;
421421

422422
// set strands like they would be set if we had paired-end data
423+
bool flip_mate1_strand;
423424
if (chimeric_alignment->second[SPLIT_READ].sequence.length() - chimeric_alignment->second[SPLIT_READ].preclipping() - ((chimeric_alignment->second[SPLIT_READ].strand == chimeric_alignment->second[SUPPLEMENTARY].strand) ? chimeric_alignment->second[SUPPLEMENTARY].postclipping() : chimeric_alignment->second[SUPPLEMENTARY].preclipping()) <
424425
chimeric_alignment->second[SPLIT_READ].sequence.length() - chimeric_alignment->second[SPLIT_READ].postclipping() - ((chimeric_alignment->second[SPLIT_READ].strand == chimeric_alignment->second[SUPPLEMENTARY].strand) ? chimeric_alignment->second[SUPPLEMENTARY].preclipping() : chimeric_alignment->second[SUPPLEMENTARY].postclipping())) {
425-
if (chimeric_alignment->second[SPLIT_READ].strand == FORWARD) {
426-
chimeric_alignment->second[MATE1].strand = complement_strand(chimeric_alignment->second[MATE1].strand);
427-
} else {
428-
chimeric_alignment->second[SPLIT_READ].strand = complement_strand(chimeric_alignment->second[SPLIT_READ].strand);
429-
chimeric_alignment->second[SUPPLEMENTARY].strand = complement_strand(chimeric_alignment->second[SUPPLEMENTARY].strand);
430-
}
426+
flip_mate1_strand = chimeric_alignment->second[SPLIT_READ].strand == FORWARD;
431427
} else {
432-
if (chimeric_alignment->second[SPLIT_READ].strand == REVERSE) {
433-
chimeric_alignment->second[MATE1].strand = complement_strand(chimeric_alignment->second[MATE1].strand);
434-
} else {
435-
chimeric_alignment->second[SPLIT_READ].strand = complement_strand(chimeric_alignment->second[SPLIT_READ].strand);
436-
chimeric_alignment->second[SUPPLEMENTARY].strand = complement_strand(chimeric_alignment->second[SUPPLEMENTARY].strand);
437-
}
428+
flip_mate1_strand = chimeric_alignment->second[SPLIT_READ].strand == REVERSE;
438429
}
430+
chimeric_alignment->second[MATE1].strand = complement_strand_if(chimeric_alignment->second[MATE1].strand, flip_mate1_strand);
431+
chimeric_alignment->second[SPLIT_READ].strand = complement_strand_if(chimeric_alignment->second[SPLIT_READ].strand, !flip_mate1_strand);
432+
chimeric_alignment->second[SUPPLEMENTARY].strand = complement_strand_if(chimeric_alignment->second[SUPPLEMENTARY].strand, !flip_mate1_strand);
433+
434+
// set first-in-pair flag like it would be set if we had paired-end data
435+
chimeric_alignment->second[MATE1].first_in_pair = !flip_mate1_strand;
436+
chimeric_alignment->second[SPLIT_READ].first_in_pair = flip_mate1_strand;
437+
chimeric_alignment->second[SUPPLEMENTARY].first_in_pair = flip_mate1_strand;
439438

440439
// if possible, try to fix overlapping segments between split read and supplementary
441440
if (!disjoin_split_read_segments(chimeric_alignment->second[SPLIT_READ], chimeric_alignment->second[SUPPLEMENTARY]))

0 commit comments

Comments
 (0)