-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathSnakefile
More file actions
70 lines (57 loc) · 2.44 KB
/
Snakefile
File metadata and controls
70 lines (57 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# import some python modules to define variables
import os
import shutil
from pathlib import Path
from os.path import isfile, join
# shell.prefix(config['bcftools'])
# shell.prefix(config['vcftools'])
##### set minimum snakemake version #####
#min_version("6.1")
##### setup report #####
#configfile: '../config/ancestral_Eddie.yaml'
configfile: '../config/glob_cattle.yaml'
report: "reports/tsinfer.rst"
##### setup singularity #####
# Can we set conda env here???
##### Define some variables #####
# all files should be in the vcfDir/RawVCF folder
# files that need spliting are named: Combined_something.vcf.gz
# files already separated by chr are named: Chr{n}_something.vcf.gz
# the script will identify and (1) split, if necessary,
# (2) sort into the correct Chr folder in vcfDir.
# defining variables that will be used throughout the script
vcfdir=config['vcf_dir'] # from config file, the path to vcf
# this folder can be any where but must have the following structure:
# vcfDir:
# - RawVCF
# - Chr1
# -Chr...
# the final directory name is now editable -- so can be named according to specific project
# mapdir indicates where to find the genetic map for phasing (full path in the config)
Project = config['PROJECT']
projdir = config['work_dir'] + '/' + Project
mapdir = config['genmap']
chromosomes = [f'chr{n}' for n in range(config['start_chromosome'], config['end_chromosome'] + 1)]
# set a list of 'Chr' + chr# combinations to be used as wildcards
wildcard_constraints:
chromosome = r'chr\d+'
# constraining the wildcard to avoid errors
##### load rules #####
#include: "rules/ancestral_inference.smk"
include: "rules/split.smk"
include: "rules/filter.smk"
include: "rules/merge_samples_vcf.smk"
include: "rules/phasing.smk"
include: "rules/ancestral_info_to_vcf.smk"
include: "rules/new_infer_trees.smk"
##### Conditional inputs for rule all based on the existence of phased VCFs and config['phase_only'] #####
if not config['phase_only']:
rule_all_input = expand(f'{projdir}/Tsinfer/trees/{{chromosome}}.dated.trees', chromosome=chromosomes)
else:
rule_all_input = expand(f'{vcfdir}/{{chromosome}}/{{chromosome}}_phased.vcf.gz.csi', chromosome=chromosomes)
##### target rules #####
# At the end it should generate a phased vcf file that combines all files for
# each chromosome. These files are stored in the vcf folder declaired in the
# configuration file. It also checks that vcfs have been filtered before merged
rule all:
input: rule_all_input