Skip to content

Commit 2de486c

Browse files
committed
merge branch develop
2 parents 3492d2c + 3bb245b commit 2de486c

File tree

84 files changed

+1524
-616
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1524
-616
lines changed

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ apt-get install -y --no-install-recommends wget ca-certificates samtools r-base
88
apt-get clean && rm -rf /var/lib/apt/lists/*
99

1010
# install version of STAR that supports --chimMultimapNmax and --chimOutType WithinBAM
11-
RUN wget -qO - 'https://github.com/alexdobin/STAR/archive/2.7.6a.tar.gz' | \
12-
tar --strip-components=3 -C /usr/local/bin -xzf - 'STAR-2.7.6a/bin/Linux_x86_64/STAR'
11+
RUN wget -qO - 'https://github.com/alexdobin/STAR/archive/2.7.10a.tar.gz' | \
12+
tar --strip-components=3 -C /usr/local/bin -xzf - 'STAR-2.7.10a/bin/Linux_x86_64/STAR'
1313

1414
# install arriba
1515
RUN wget -qO - 'https://github.com/suhrig/arriba/releases/download/v2.1.0/arriba_v2.1.0.tar.gz' | tar -xzf - --exclude='arriba*/.git'

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,24 @@ arriba: $(SOURCE)/arriba.cpp $(SOURCE)/annotation.o $(SOURCE)/assembly.o $(SOURC
1919
$(CXX) -c $(CXXFLAGS) $(CPPFLAGS) -I$(STATIC_LIBS)/htslib -I$(STATIC_LIBS)/tsl -o $@ $<
2020

2121
# download and compile dependencies for a static build
22-
WGET := $(shell (which wget && echo " -O -") || echo "curl -L")
22+
WGET := $(shell (which wget && echo "--no-check-certificate -O -") || echo "curl -k -L")
2323
$(STATIC_LIBS)/tsl/htrie_map.h:
2424
$(WGET) 'https://github.com/Tessil/hat-trie/archive/v0.6.0.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
2525
cp -r $(STATIC_LIBS)/hat-trie-*/include/tsl $(STATIC_LIBS)
2626
$(STATIC_LIBS)/libdeflate.a:
27-
$(WGET) 'https://github.com/ebiggers/libdeflate/archive/v1.7.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
27+
$(WGET) 'https://github.com/ebiggers/libdeflate/archive/v1.8.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
2828
cd $(STATIC_LIBS)/libdeflate-*/ && $(MAKE) libdeflate.a && cp libdeflate.a libdeflate.h ..
2929
$(STATIC_LIBS)/libz.a:
3030
$(WGET) 'https://zlib.net/zlib-1.2.11.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
3131
cd $(STATIC_LIBS)/zlib-*/ && ./configure && $(MAKE) libz.a && cp zlib.h zconf.h libz.a ..
3232
$(STATIC_LIBS)/libbz2.a:
3333
$(WGET) 'https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
34-
cd $(STATIC_LIBS)/bzip2-*/ && $(MAKE) libbz2.a && cp libbz2.a bzlib.h ..
34+
cd $(STATIC_LIBS)/bzip2-*/ && $(MAKE) libbz2.a bzip2 && cp libbz2.a bzlib.h ..
3535
$(STATIC_LIBS)/liblzma.a:
3636
$(WGET) 'https://sourceforge.net/projects/lzmautils/files/xz-5.2.5.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
3737
cd $(STATIC_LIBS)/xz-*/ && ./configure && $(MAKE) && cp -r src/liblzma/.libs/liblzma.a src/liblzma/api/lzma src/liblzma/api/lzma.h ..
3838
$(STATIC_LIBS)/libhts.a: $(STATIC_LIBS)/libdeflate.a $(STATIC_LIBS)/libz.a $(STATIC_LIBS)/libbz2.a $(STATIC_LIBS)/liblzma.a
39-
$(WGET) 'https://github.com/samtools/htslib/archive/1.11.tar.gz' | tar -xzf - -C $(STATIC_LIBS) && \
39+
$(WGET) 'https://github.com/samtools/htslib/releases/download/1.14/htslib-1.14.tar.bz2' | $(STATIC_LIBS)/bzip2-*/bzip2 -d -c - | tar -xf - -C $(STATIC_LIBS) && \
4040
cd $(STATIC_LIBS)/htslib-*/ && $(MAKE) config.h && sed -i -e 's/CURL/DEFLATE/' config.h && $(MAKE) NONCONFIGURE_OBJS="" CPPFLAGS="$(CPPFLAGS) -I.." libhts.a && cp -r libhts.a htslib ..
4141

4242
# cleanup routine

README.md

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
About
2-
=====
2+
-----
33

4-
Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. It was developed for the use in a clinical research setting. Therefore, short runtimes and high sensitivity were important design criteria. It is based on the ultrafast [STAR aligner](https://github.com/alexdobin/STAR) and the post-alignment runtime is typically just ~2 minutes. In contrast to many other fusion detection tools which build on STAR, Arriba does not require to reduce the `alignIntronMax` parameter of STAR to detect fusions arising from focal deletions.
4+
Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. It was developed for the use in a clinical research setting. Therefore, short runtimes and high sensitivity were important design criteria. It is based on the ultrafast [STAR aligner](https://github.com/alexdobin/STAR), and the post-alignment runtime is typically just ~2 minutes. Arriba's workflow produces fully reusable alignments, which can serve as input to other common analyses, such as quantification of gene expression. In contrast to many other fusion detection tools which build on STAR, Arriba does not require to reduce the STAR parameter `--alignIntronMax` to detect fusions arising from focal deletions. Reducing this parameter impairs mapping of reads to genes with long introns and may affect expression quantification, hence.
55

6-
Apart from gene fusions, Arriba can detect other structural rearrangements with potential clinical relevance, such as viral integration sites, internal tandem duplications, whole exon duplications, truncations of genes (i.e., breakpoints in introns and intergenic regions).
6+
Apart from gene fusions, Arriba can detect other structural rearrangements with potential clinical relevance, including viral integration sites, internal tandem duplications, whole exon duplications, intragenic inversions, enhancer hijacking events involving immunoglobulin/T-cell receptor loci, translocations affecting genes with many paralogs such as DUX4, and truncations of genes (i.e., breakpoints in introns or intergenic regions).
77

8-
Arriba is the winner of the [DREAM SMC-RNA Challenge](https://www.synapse.org/SMC_RNA), an international competition organized by ICGC, TCGA, IBM, and Sage Bionetworks to determine the current gold standard for the detection of gene fusions from RNA-Seq data. The final results of the challenge are posted on the [Round 5 Leaderboard](https://www.synapse.org/#!Synapse:syn2813589/wiki/588511).
8+
Arriba is the winner of the [DREAM SMC-RNA Challenge](https://www.synapse.org/SMC_RNA), an international competition organized by ICGC, TCGA, IBM, and Sage Bionetworks to determine the current gold standard for the detection of gene fusions from RNA-Seq data. The final results of the challenge are posted on the [Round 5 Leaderboard](https://www.synapse.org/#!Synapse:syn2813589/wiki/588511) and discussed in the accompanying [publication](https://doi.org/10.1016/j.cels.2021.05.021).
99

1010
Get help
11-
========
11+
--------
1212

1313
Use the [GitHub issue tracker](https://github.com/suhrig/arriba/issues) to get help or to report bugs.
1414

1515
Citation
16-
========
16+
--------
1717

18-
Sebastian Uhrig, Julia Ellermann, Tatjana Walther, Pauline Burkhardt, Martina Fröhlich, Barbara Hutter, Umut H. Toprak, Olaf Neumann, Albrecht Stenzinger, Claudia Scholl, Stefan Fröhling and Benedikt Brors: *Accurate and efficient detection of gene fusions from RNA sequencing data.* Genome Research. Published in Advance January 13, 2021. doi: [10.1101/gr.257246.119](https://doi.org/10.1101/gr.257246.119)
18+
Sebastian Uhrig, Julia Ellermann, Tatjana Walther, Pauline Burkhardt, Martina Fröhlich, Barbara Hutter, Umut H. Toprak, Olaf Neumann, Albrecht Stenzinger, Claudia Scholl, Stefan Fröhling and Benedikt Brors: *Accurate and efficient detection of gene fusions from RNA sequencing data.* Genome Research. March 2021 31: 448-460; Published in Advance January 13, 2021. doi: [10.1101/gr.257246.119](https://doi.org/10.1101/gr.257246.119)
1919

2020
License
2121
-------
2222

2323
The code, software and database files of Arriba are distributed under the MIT/Expat License, with the exception of the script `draw_fusions.R`, which is distributed under the GNU GPL v3 due to dependencies on GPL-licensed R packages. The terms and conditions of both licenses can be found in the [LICENSE file](https://raw.githubusercontent.com/suhrig/arriba/master/LICENSE).
2424

2525
User manual
26-
===========
26+
-----------
2727

2828
Please refer to the [user manual](http://arriba.readthedocs.io/en/latest/) for installation instructions and information about usage. **Note: You should not use `git clone` to download Arriba, because the git repository does not include the blacklist and other database files!**
2929

@@ -75,12 +75,24 @@ Please refer to the [user manual](http://arriba.readthedocs.io/en/latest/) for i
7575
- [Multiple transcript variants](https://arriba.readthedocs.io/en/latest/interpretation-of-results/#multiple-transcript-variants)
7676
- [Cohort analysis](https://arriba.readthedocs.io/en/latest/interpretation-of-results/#cohort-analysis)
7777

78-
8. [Current limitations](https://arriba.readthedocs.io/en/latest/current-limitations/)
78+
8. [Utility scripts](https://arriba.readthedocs.io/en/latest/utility-scripts/)
79+
80+
- [Extract fusion-supporting alignments](https://arriba.readthedocs.io/en/latest/utility-scripts/#extract-fusion-supporting-alignments)
81+
- [Convert fusions.tsv to VCF format](https://arriba.readthedocs.io/en/latest/utility-scripts/#convert-fusionstsv-to-vcf-format)
82+
- [Run Arriba on prealigned BAM file](https://arriba.readthedocs.io/en/latest/utility-scripts/#run-arriba-on-prealigned-bam-file)
83+
- [Quantify virus expression](https://arriba.readthedocs.io/en/latest/utility-scripts/#quantify-virus-expression)
84+
85+
9. [Current limitations](https://arriba.readthedocs.io/en/latest/current-limitations/)
7986

8087
- [Intragenic deletions](https://arriba.readthedocs.io/en/latest/current-limitations/#intragenic-deletions)
88+
- [RefSeq annotation](https://arriba.readthedocs.io/en/latest/current-limitations/#refseq-annotation)
8189
- [Memory consumption](https://arriba.readthedocs.io/en/latest/current-limitations/#memory-consumption)
90+
- [Adapter trimming](https://arriba.readthedocs.io/en/latest/current-limitations/#adapter-trimming)
91+
- [Small insert size](https://arriba.readthedocs.io/en/latest/current-limitations/#small-insert-size)
92+
- [Viral detection](https://arriba.readthedocs.io/en/latest/current-limitations/#viral-detection)
93+
- [Targeted sequencing](https://arriba.readthedocs.io/en/latest/current-limitations/#targeted-sequencing)
8294

83-
9. [Internal algorithm](https://arriba.readthedocs.io/en/latest/internal-algorithm/)
95+
10. [Internal algorithm](https://arriba.readthedocs.io/en/latest/internal-algorithm/)
8496

8597
- [Read-level filters](https://arriba.readthedocs.io/en/latest/internal-algorithm/#read-level-filters)
8698
- [Event-level filters](https://arriba.readthedocs.io/en/latest/internal-algorithm/#event-level-filters)

0 commit comments

Comments
 (0)