Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
1e63fb0
feat: added VEP annotation with REVEL, Sift and PolyPhen Scores to fp…
BiancaStoecker Dec 19, 2025
30894c7
Merge branch 'main' into feat/add-revel-scores
BiancaStoecker Dec 19, 2025
e991968
fix: also all sift and polyphen to unique fp fn.
BiancaStoecker Dec 19, 2025
b6e3955
fix: update curl version
BiancaStoecker Dec 19, 2025
ba99965
Merge branch 'feat/add-revel-scores' of github.com:snakemake-workflow…
BiancaStoecker Dec 19, 2025
9777d13
Merge branch 'main' into feat/add-revel-scores
famosab Jan 16, 2026
e9657ce
fix: Update wrapper to latests version
BiancaStoecker Jan 20, 2026
4812629
fix: use existing env tools
BiancaStoecker Jan 20, 2026
49ef78e
fix: Update wrapper to latest version
BiancaStoecker Jan 20, 2026
2aba570
fix: Update wrapper to latest version
BiancaStoecker Jan 20, 2026
90d50f1
fix: Update wrapper to latest version
BiancaStoecker Jan 20, 2026
45a13e3
fix: Update wrapper to latest version
BiancaStoecker Jan 20, 2026
1caa2e0
fix: use existing env tools
BiancaStoecker Jan 20, 2026
8725eb2
test: Add disk space cleanup step for github action
BiancaStoecker Jan 20, 2026
24d716f
test: remove docker-images in free disc space action
BiancaStoecker Jan 20, 2026
8f63d86
try: using downsampled VEP cache for CI tests
famosab Jan 22, 2026
44f5fe5
fix: use downsampled cache for both annotations
famosab Jan 22, 2026
bf82976
fix: gunzip vep tar file
famosab Jan 22, 2026
66888a2
fix: utilize chr22 revel for testing
famosab Jan 22, 2026
8c2a640
fix: linting
famosab Jan 22, 2026
dcb06ca
test: use downsampled revel score file
famosab Jan 22, 2026
e5ff56b
fix: linting
famosab Jan 22, 2026
7b5b5dc
fix: correct path
famosab Jan 22, 2026
a05f625
fix: use locally created tbi
famosab Jan 22, 2026
99441a4
fix: use locally created tbi
famosab Jan 23, 2026
9bf2d89
fix: linting
famosab Jan 23, 2026
da06c03
test: move resources to test folder
famosab Jan 23, 2026
6a0db08
test: change to source_path
famosab Jan 23, 2026
02b09b7
fix: move to resources folder
famosab Jan 23, 2026
8772120
fix: gitignore
famosab Jan 23, 2026
e469e50
fix: tar
famosab Jan 23, 2026
bb8f426
fix: path
famosab Jan 23, 2026
8b23d23
fix: path
famosab Jan 23, 2026
0181ccf
fix: path
famosab Jan 23, 2026
7e291c7
fix: get tar from git
famosab Jan 23, 2026
92af514
Merge branch 'main' into feat/add-revel-scores
famosab Jan 27, 2026
f73d896
fix: use source path for vep cache
famosab Jan 29, 2026
a8ee11e
feat: add vembrane table
famosab Jan 29, 2026
01b6293
feat: add note in readme
famosab Jan 29, 2026
ada1039
test: setting snakemake min version
famosab Jan 29, 2026
0557fb3
fix: add bcftools/norm and special expression
famosab Jan 30, 2026
ce735e0
feat: more entries for variant table
famosab Jan 30, 2026
89ac4c8
fix: update vep cache to 113
famosab Jan 30, 2026
594a52d
fix: update vep cache to 115
famosab Jan 30, 2026
b7118fe
perf: autobump snakemake wrappers (#158)
johanneskoester Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ if "variant-calls" in config:

include: "rules/compare-vcfs.smk"
include: "rules/eval-results.smk"
include: "rules/annotation.smk"

rule all:
input:
Expand All @@ -30,11 +31,11 @@ if "variant-calls" in config:
),
get_fp_fn_reports,
expand(
"results/fp-fn/vcf/{benchmark}/{benchmark}.shared_fn.sorted.vcf.gz",
"results/fp-fn/annotated_vcf/{benchmark}/{benchmark}.shared_fn.annotated.vcf.gz",
benchmark=used_benchmarks,
),
expand(
"results/fp-fn/vcf/{benchmark_callset[0]}/{benchmark_callset[1]}.unique_{classification}.sorted.vcf.gz",
"results/fp-fn/annotated_vcf/{benchmark_callset[0]}/{benchmark_callset[1]}.unique_{classification}.annotated.vcf.gz",
benchmark_callset=used_benchmarks_callsets,
classification=["fp", "fn"],
),
Expand Down
5 changes: 5 additions & 0 deletions workflow/envs/curl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- nodefaults
dependencies:
- curl =7.86.0
6 changes: 6 additions & 0 deletions workflow/envs/htslib.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- htslib =1.12
- unzip =6.0
125 changes: 125 additions & 0 deletions workflow/rules/annotation.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
rule get_vep_cache:
output:
directory("resources/vep/cache"),
params:
species="homo_sapiens",
build=get_reference_genome_build(),
release="111",
log:
"logs/vep/cache.log",
cache: "omit-software"
wrapper:
"v8.0.2/bio/vep/cache"


rule get_vep_plugins:
output:
directory("resources/vep/plugins"),
params:
release="111",
log:
"logs/vep/plugins.log",
wrapper:
"v8.0.2/bio/vep/plugins"


rule download_revel:
output:
temp("resources/revel_scores.zip"),
log:
"logs/vep_plugins/download_revel.log",
conda:
"../envs/curl.yaml"
shell:
"curl https://zenodo.org/records/7072866/files/revel-v1.3_all_chromosomes.zip -o {output} &> {log}"


rule process_revel_scores:
input:
"resources/revel_scores.zip",
output:
"resources/revel_scores.tsv.gz",
params:
build=get_reference_genome_build(),
log:
"logs/vep_plugins/process_revel_scores.log",
conda:
"../envs/htslib.yaml"
shell:
"""
tmpfile=$(mktemp {resources.tmpdir}/revel_scores.XXXXXX)
unzip -p {input} | tr "," "\t" | sed '1s/.*/#&/' | bgzip -c > $tmpfile
if [ "{params.build}" == "GRCh38" ] ; then
zgrep -h -v ^#chr $tmpfile | awk '$3 != "." ' | sort -k1,1 -k3,3n - | cat <(zcat $tmpfile | head -n1) - | bgzip -c > {output}
elif [ "{params.build}" == "GRCh37" ] ; then
cat $tmpfile > {output}
else
echo "Annotation of REVEL scores only supported for GRCh37 or GRCh38" > {log}
exit 125
fi
"""


rule tabix_revel_scores:
input:
"resources/revel_scores.tsv.gz",
output:
"resources/revel_scores.tsv.gz.tbi",
params:
get_tabix_revel_params(),
log:
"logs/tabix/revel.log",
wrapper:
"v8.0.2/bio/tabix/index"


rule annotate_shared_fn:
input:
calls="results/fp-fn/vcf/{benchmark}/{benchmark}.shared_fn.sorted.vcf.gz",
cache=access.random("resources/vep/cache"),
plugins=access.random("resources/vep/plugins"),
revel=lambda wc: get_plugin_aux("REVEL"),
revel_tbi=lambda wc: get_plugin_aux("REVEL", True),
fasta=access.random("resources/reference/genome.fasta"),
fai="resources/reference/genome.fasta.fai",
output:
calls="results/fp-fn/annotated_vcf/{benchmark}/{benchmark}.shared_fn.annotated.vcf.gz",
stats="results/fp-fn/annotated_vcf/{benchmark}/{benchmark}.shared_fn.stats.html",
params:
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
plugins=["REVEL"],
extra="--everything --check_existing --vcf_info_field ANN --hgvsg --sift b --polyphen b ",
log:
"logs/vep/fp-fn/{benchmark}/{benchmark}.shared_fn.annotate.log",
threads: 4
group:
"annotation"
wrapper:
"v8.0.2/bio/vep/annotate"


rule annotate_unique_fp_fn:
input:
calls="results/fp-fn/vcf/{benchmark}/{callset}.unique_{classification}.sorted.vcf.gz",
cache=access.random("resources/vep/cache"),
plugins=access.random("resources/vep/plugins"),
revel=lambda wc: get_plugin_aux("REVEL"),
revel_tbi=lambda wc: get_plugin_aux("REVEL", True),
fasta=access.random("resources/reference/genome.fasta"),
fai="resources/reference/genome.fasta.fai",
output:
calls="results/fp-fn/annotated_vcf/{benchmark}/{callset}.unique_{classification}.annotated.vcf.gz",
stats="results/fp-fn/annotated_vcf/{benchmark}/{callset}.unique_{classification}.stats.html",
params:
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
plugins=["REVEL"],
extra="--everything --check_existing --vcf_info_field ANN --hgvsg",
log:
"logs/vep/fp-fn/{benchmark}/{callset}.unique_{classification}.annotate.log",
threads: 4
group:
"annotation"
wrapper:
"v8.0.2/bio/vep/annotate"
13 changes: 13 additions & 0 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -733,3 +733,16 @@ if "variant-calls" in config:
classification="fp|fn",
comparison="genotype|existence",
vartype="snvs|indels",


def get_tabix_revel_params():
# Indexing of REVEL-score file where the column depends on the reference
column = 2 if config["reference-genome"] == "grch37" else 3
return f"-f -s 1 -b {column} -e {column}"


def get_plugin_aux(plugin, index=False):
if plugin == "REVEL":
suffix = ".tbi" if index else ""
return "resources/revel_scores.tsv.gz{suffix}".format(suffix=suffix)
return []
Loading