Skip to content

Commit b06afba

Browse files
authored
Merge pull request #146 from LANL-Bioinformatics/nf-gene-family-analysis
Nf gene family analysis
2 parents 972f1d7 + cbbb80a commit b06afba

File tree

12 files changed

+991
-1
lines changed

12 files changed

+991
-1
lines changed

workflows/Nextflow/configs/container.config

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ process {
6161
withLabel: 'phyl' {
6262
container = 'ghcr.io/lanl-bioinformatics/edge_snp_tree:0.6.0'
6363
}
64+
withLabel: 'gfa' {
65+
container = 'apwat/gene_family_analysis:pf2_0.3.3'
66+
}
67+
withLabel: 'vf' {
68+
container = 'apwat/mvf:0.3'
69+
}
6470
withLabel: 'report' {
6571
container = 'ghcr.io/lanl-bioinformatics/edge_report:0.5.0'
6672
}

workflows/Nextflow/main.nf

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ include {ANTISMASH} from './modules/runAntiSmash/runAntiSmash.nf'
1515
include {BINNING} from './modules/contigBinning/contigBinning.nf'
1616
include {REFERENCEBASEDANALYSIS} from './modules/referenceBasedAnalysis/refBasedAnalysis.nf'
1717
include {PHYLOGENETICANALYSIS} from './modules/SNPtree/SNPtree.nf'
18+
include {READSGENEFAMILYANALYSIS} from './modules/geneFamilyAnalysis/geneFamilyAnalysis.nf'
19+
include {CONTIGSGENEFAMILYANALYSIS} from './modules/geneFamilyAnalysis/geneFamilyAnalysis.nf'
1820
include {REPORT} from './modules/report/report.nf'
1921

2022
workflow {
@@ -145,12 +147,16 @@ workflow {
145147
//Annotation and PhageFinder
146148
antismashInput = contigs
147149
annStats = channel.empty()
150+
annGFF = channel.empty()
151+
annFAA = channel.empty()
148152
if(params.modules.annotation) {
149153
ANNOTATION(baseSettings.plus(params.annotation), annContigs)
150154
annStats = ANNOTATION.out.annStats
155+
annGFF = ANNOTATION.out.gff
156+
annFAA = ANNOTATION.out.faa
151157

152158
if(params.modules.phageFinder && (params.annotation.taxKingdom == null || !(params.annotation.taxKingdom.equalsIgnoreCase("viruses")))) {
153-
PHAGEFINDER(baseSettings, ANNOTATION.out.gff, ANNOTATION.out.faa, ANNOTATION.out.fna)
159+
PHAGEFINDER(baseSettings, annGFF, annFAA, ANNOTATION.out.fna)
154160
}
155161

156162
antismashInput = ANNOTATION.out.gbk
@@ -171,6 +177,36 @@ workflow {
171177
PHYLOGENETICANALYSIS(baseSettings.plus(params.snpTree).plus(params.annotation), paired.ifEmpty(["${projectDir}/nf_assets/NO_FILE"]), unpaired.ifEmpty("${projectDir}/nf_assets/NO_FILE2"), contigs.ifEmpty("${projectDir}/nf_assets/NO_FILE3"))
172178
}
173179

180+
//gene family analysis
181+
if(params.modules.readsGeneFamilyAnalysis) {
182+
READSGENEFAMILYANALYSIS(baseSettings.plus(params.geneFamily),
183+
paired.ifEmpty(["${projectDir}/nf_assets/NO_FILE"]),
184+
unpaired.ifEmpty("${projectDir}/nf_assets/NO_FILE2"),
185+
)
186+
}
187+
188+
if(params.modules.contigsGeneFamilyAnalysis) {
189+
geneFamilyFAA = channel.empty()
190+
geneFamilyGFF = channel.empty()
191+
if(params.geneFamily.inputFAA.endsWith("NO_FILE3")) {
192+
geneFamilyFAA = annFAA.ifEmpty("${projectDir}/nf_assets/NO_FILE3")
193+
}
194+
else {
195+
geneFamilyFAA = channel.fromPath(params.geneFamily.inputFAA)
196+
}
197+
if(params.geneFamily.inputGFF.endsWith("NO_FILE4")) {
198+
geneFamilyGFF = annGFF.ifEmpty("${projectDir}/nf_assets/NO_FILE4")
199+
}
200+
else {
201+
geneFamilyGFF = channel.fromPath(params.geneFamily.inputGFF)
202+
}
203+
CONTIGSGENEFAMILYANALYSIS(baseSettings.plus(params.geneFamily),
204+
geneFamilyFAA,
205+
geneFamilyGFF,
206+
contigs.ifEmpty("${projectDir}/nf_assets/NO_FILE5")
207+
)
208+
}
209+
174210
//report generation
175211
REPORT(
176212
baseSettings,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# syntax=docker/dockerfile:1
2+
FROM continuumio/miniconda3:main AS build
3+
ENV version=0.1
4+
5+
ENV container=docker
6+
7+
RUN git clone --depth=1 https://github.com/aw-watson/MetaVF_toolkit.git
8+
9+
# add conda channels
10+
RUN conda config --add channels conda-forge \
11+
&& conda config --add channels bioconda
12+
13+
RUN conda init bash \
14+
&& . ~/.bashrc \
15+
&& conda env create --name MetaVF_toolkit -f /MetaVF_toolkit/env/MetaVF_toolkit_no_builds.yaml \
16+
&& conda activate MetaVF_toolkit
17+
18+
# #pack environment
19+
# RUN conda install -c conda-forge conda-pack
20+
21+
# RUN conda-pack -n MetaVF_toolkit -o /tmp/env.tar && \
22+
# mkdir /venv && cd /venv && tar xf /tmp/env.tar && \
23+
# rm /tmp/env.tar
24+
25+
# RUN /venv/bin/conda-unpack
26+
27+
# FROM debian:latest AS runtime
28+
29+
# COPY --from=build /venv /venv
30+
# COPY --from=build /MetaVF_toolkit /MetaVF_toolkit
31+
32+
RUN gunzip /MetaVF_toolkit/databases/*.gz && chmod 755 /MetaVF_toolkit/metaVF.py
33+
34+
ENV PATH=/venv/bin:/MetaVF_toolkit:$PATH
35+
RUN apt-get update && apt-get install procps -y && apt-get clean
36+
37+
SHELL ["/bin/bash", "-c"]
38+
CMD /bin/bash
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# syntax=docker/dockerfile:1
2+
FROM continuumio/miniconda3:main AS build
3+
4+
ENV container=docker
5+
6+
# add conda channels
7+
RUN conda config --add channels conda-forge && \
8+
conda config --add channels bioconda
9+
10+
#TODO: evaluate if all environments here are required for running PathoFact2, given that our interest is in VF prediction
11+
RUN conda create -n rgi_env -c conda-forge -c bioconda rgi=6.0.3 -y
12+
RUN conda create -n PathoFact_env -c conda-forge -c bioconda snakemake=7.25.0 Python=3.11.4 pandas=2.2.2 -y
13+
RUN conda create -n deeparg_env -c defaults -c conda-forge -c bioconda python=2.7.18 diamond=0.9.24 cxx-compiler pip -y
14+
RUN conda create -n amrfinder_plus_env -c conda-forge -c bioconda ncbi-amrfinderplus=3.12.8 -y
15+
RUN conda create -n genomad -c conda-forge -c bioconda genomad=1.8.0 -y
16+
17+
RUN conda init bash \
18+
&& . ~/.bashrc
19+
20+
#update databases for amrfinder_plus--the database update doesn't support a custom location, so we'll do it in the container
21+
#as opposed to putting the DBs elsewhere, which we do for other tools
22+
#TODO: evaluate impact of running this command during NF processes instead
23+
RUN /opt/conda/envs/amrfinder_plus_env/bin/amrfinder -u
24+
25+
#install DeepARG
26+
RUN /opt/conda/envs/deeparg_env/bin/pip install git+https://github.com/gaarangoa/deeparg.git
27+
28+
#clone PF2
29+
RUN git clone https://gitlab.lcsb.uni.lu/ESB/PathoFact2.git --depth 1
30+
31+
32+
# create empty files to satisfy Snakemake rules, run Snakemake to generate needed conda environments
33+
# Without this step, running the Snakemake pipeline attempts to create those environments at runtime, and will create
34+
# errors due to running out of space in non-writeable containers
35+
RUN mkdir -p /abs/path/to/PathoFact2/DATABASES/HMM \
36+
&& mkdir -p /dry/run \
37+
&& mkdir /prefix \
38+
&& echo "#sample_name/folder,abs_path_to_contigs,input_type,extension(if folder)" > /abs/path/to/PathoFact2/list_of_samples.csv \
39+
&& echo "Test_contigs,/dry/run/contigs.fa,contigs" >> /abs/path/to/PathoFact2/list_of_samples.csv \
40+
&& echo "#end" >> /abs/path/to/PathoFact2/list_of_samples.csv \
41+
&& touch /dry/run/contigs.fa \
42+
&& touch /abs/path/to/PathoFact2/DATABASES/HMM/virulence_factors_CDD.hmm \
43+
&& touch /abs/path/to/PathoFact2/DATABASES/HMM/TOX_CDD.hmm \
44+
&& touch /abs/path/to/PathoFact2/DATABASES/cddid_all.tbl \
45+
&& conda run -n PathoFact_env snakemake -s /PathoFact2/Main.smk -c4 --configfile /PathoFact2/Config.yaml --use-conda --conda-frontend conda --conda-prefix /prefix --conda-create-envs-only
46+
47+
48+
# deeparg download_data -o "<path_to_database_dir>/deeparg_db" #TODO: download DBs for DeepARG
49+
50+
51+
RUN apt-get update && apt-get install procps -y && apt-get clean
52+
ENV PATH=/PathoFact2:$PATH
53+
54+
55+
SHELL ["/bin/bash", "-c"]
56+
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)