-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_env.Snakefile
More file actions
129 lines (111 loc) · 3.83 KB
/
setup_env.Snakefile
File metadata and controls
129 lines (111 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
################################################################################
# Snakefile for preparing all the resources before being able to run the
# mitoBench ancient DNA MT pipeline.
#
# Alex Huebner, 01/03/19
################################################################################
from snakemake.utils import R
workdir: "/tmp"
# Constants
PATH = workflow.basedir
HUMAN_MT_FAS = f"{PATH}/resources/NC_012920.fa.gz"
localrules: decompress_fasta, bwa_index, samtools_index
rule all:
input:
f"{PATH}/resources/NC_012920.fa.fai",
f"{PATH}/resources/NC_012920_1000.fa.fai",
f"{PATH}/resources/NC_012920_1000.fa.ann",
f"{PATH}/resources/haplogrep",
f"{PATH}/resources/contamMix/exec/estimate.R",
f"{PATH}/resources/install_contamMix.done",
f"{PATH}/resources/install_summary.done"
# Prepare MT genome for analysis
rule decompress_fasta:
output:
"{PATH}/resources/NC_012920.fa"
message: "De-compress the FastA sequence of the human MT genome with 1000 bp extension"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
version: "0.3"
shell:
"gunzip -c {HUMAN_MT_FAS} > {output}"
rule extend_fasta:
input:
"{PATH}/resources/NC_012920.fa"
output:
"{PATH}/resources/NC_012920_1000.fa"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
version: "0.3"
shell:
"""
bioawk -c fastx '{{
print ">" $name; \
print $seq substr($seq,1,1000);
}}' {input} > {output}
"""
rule bwa_index:
input:
"{PATH}/resources/NC_012920_1000.fa"
output:
"{PATH}/resources/NC_012920_1000.fa.ann"
message: "BWA index the FastA sequence of the human MT genome with 1000 bp extension"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
version: "0.3"
shell:
"bwa index {input}"
rule samtools_index_extended:
input:
"{PATH}/resources/NC_012920_1000.fa"
output:
"{PATH}/resources/NC_012920_1000.fa.fai"
message: "Samtools faidx the FastA sequence of the human MT genome with 1000 bp extension"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
version: "0.3"
shell:
"samtools faidx {input}"
rule samtools_index:
input:
"{PATH}/resources/NC_012920.fa"
output:
"{PATH}/resources/NC_012920.fa.fai"
message: "Samtools faidx the FastA sequence of the human MT genome"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
version: "0.3"
shell:
"samtools faidx {input}"
# Download software not available via conda
rule download_haplogrep:
output:
"{PATH}/resources/haplogrep"
message: "Download haplogrep-cmd from GitHub"
params: url = "https://github.com/seppinho/haplogrep-cmd/releases/download/v2.2.5/haplogrep.zip"
shell:
"""
wget -O {PATH}/resources/haplogrep.zip {params.url}
unzip {PATH}/resources/haplogrep.zip -d {PATH}/resources
rm {PATH}/resources/haplogrep.zip
"""
rule uncompress_contamMix:
output:
"{PATH}/resources/contamMix/exec/estimate.R"
message: "Uncompress the tar ball of contamMix"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
params:
tarball = f"{workflow.basedir}/resources/contamMix_1.0-10.tar.gz"
shell:
"tar xvf {params.tarball}"
rule install_contammix:
output:
touch("{PATH}/resources/install_contamMix.done")
message: "Install R package of contamMix"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
params:
tarball = f"{workflow.basedir}/resources/contamMix_1.0-10.tar.gz"
script:
"scripts/install_contamMix.R"
rule install_summary:
output:
touch("{PATH}/resources/install_summary.done")
message: "Install R package used for the summary"
conda: f"{PATH}/env/mitoBench_bioconda.yaml"
script:
"scripts/install_summary.R"