11# Usage:
22
33# Run all stages (default, takes hours):
4- # snakemake
4+ # snakemake --cores 'all'
55# or
6- # snakemake --config stages="all"
6+ # snakemake --config stages="all" --cores 'all'
77
88# Skip retrieve and manubot, which will speed things up substantially (runs in seconds):
9- # snakemake --config stages="skip-refs"
9+ # snakemake --config stages="skip-refs" --cores 'all'
1010
1111# Fetches new citations but doesn't update exising ones. Slow, but faster than "all".
12- # snakemake --config stages="new-refs"
12+ # snakemake --config stages="new-refs" --cores 'all'
1313
1414
1515configfile : 'workflow/config.yaml'
@@ -40,6 +40,10 @@ if stages == "all" or stages == "new-refs":
4040 expand ("{base_dir}STRchive-disease-loci.hg38.TRGT.bed" , base_dir = base_dir ),
4141 expand ("{base_dir}STRchive-disease-loci.hg19.TRGT.bed" , base_dir = base_dir ),
4242 expand ("{base_dir}STRchive-disease-loci.T2T-chm13.TRGT.bed" , base_dir = base_dir ),
43+ # Atarva bed files
44+ expand ("{base_dir}STRchive-disease-loci.hg38.atarva.bed.gz" , base_dir = base_dir ),
45+ expand ("{base_dir}STRchive-disease-loci.hg19.atarva.bed.gz" , base_dir = base_dir ),
46+ expand ("{base_dir}STRchive-disease-loci.T2T-chm13.atarva.bed.gz" , base_dir = base_dir ),
4347 # Extended BED files
4448 expand ("{base_dir}STRchive-disease-loci.hg38.bed" , base_dir = base_dir ),
4549 expand ("{base_dir}STRchive-disease-loci.hg19.bed" , base_dir = base_dir ),
@@ -61,18 +65,22 @@ elif stages == "skip-refs":
6165 expand ("{base_dir}STRchive-disease-loci.hg38.TRGT.bed" , base_dir = base_dir ),
6266 expand ("{base_dir}STRchive-disease-loci.hg19.TRGT.bed" , base_dir = base_dir ),
6367 expand ("{base_dir}STRchive-disease-loci.T2T-chm13.TRGT.bed" , base_dir = base_dir ),
68+ # Atarva bed files
69+ expand ("{base_dir}STRchive-disease-loci.hg38.atarva.bed.gz" , base_dir = base_dir ),
70+ expand ("{base_dir}STRchive-disease-loci.hg19.atarva.bed.gz" , base_dir = base_dir ),
71+ expand ("{base_dir}STRchive-disease-loci.T2T-chm13.atarva.bed.gz" , base_dir = base_dir ),
6472 # Extended BED files
6573 expand ("{base_dir}STRchive-disease-loci.hg38.bed" , base_dir = base_dir ),
6674 expand ("{base_dir}STRchive-disease-loci.hg19.bed" , base_dir = base_dir ),
6775 expand ("{base_dir}STRchive-disease-loci.T2T-chm13.bed" , base_dir = base_dir ),
6876 # Plots
6977 # expand("{base_dir}plots/gnomad.json", base_dir = base_dir),
7078 expand ("{base_dir}plots/age-onset.json" , base_dir = base_dir ),
71- expand ("{base_dir}plots/path-size.json" , base_dir = base_dir ),
79+ expand ("{base_dir}plots/path-size.json" , base_dir = base_dir )# ,
7280 # Reference alleles
73- expand ("{base_dir}ref-alleles/ref-alleles.hg19.txt" , base_dir = base_dir ),
74- expand ("{base_dir}ref-alleles/ref-alleles.hg38.txt" , base_dir = base_dir ),
75- expand ("{base_dir}ref-alleles/ref-alleles.T2T-chm13.txt" , base_dir = base_dir )
81+ # expand("{base_dir}ref-alleles/ref-alleles.hg19.txt", base_dir = base_dir),
82+ # expand("{base_dir}ref-alleles/ref-alleles.hg38.txt", base_dir = base_dir),
83+ # expand("{base_dir}ref-alleles/ref-alleles.T2T-chm13.txt", base_dir = base_dir)
7684else :
7785 raise ValueError ("Invalid stages value. Must be 'all', 'new-refs', or 'skip-refs'" )
7886
@@ -181,6 +189,51 @@ rule TRGT_T2T:
181189 python {scripts_dir}make-catalog.py -f TRGT -g T2T {input.in_json} {output.results}
182190 """
183191
192+ rule atarva_hg38 :
193+ input :
194+ in_json = in_json ,
195+ check = "{base_dir}check-loci.txt"
196+ output :
197+ bed = "{base_dir}STRchive-disease-loci.hg38.atarva.bed" ,
198+ bed_gz = "{base_dir}STRchive-disease-loci.hg38.atarva.bed.gz" ,
199+ tbi = "{base_dir}STRchive-disease-loci.hg38.atarva.bed.gz.tbi"
200+ shell :
201+ """
202+ python {scripts_dir}make-catalog.py -f atarva -g hg38 {input.in_json} {output.bed}
203+ bedtools sort -i {output.bed} | bgzip -c > {output.bed_gz}
204+ tabix -p bed {output.bed_gz}
205+ """
206+
207+ rule atarva_hg19 :
208+ input :
209+ in_json = in_json ,
210+ check = "{base_dir}check-loci.txt"
211+ output :
212+ bed = "{base_dir}STRchive-disease-loci.hg19.atarva.bed" ,
213+ bed_gz = "{base_dir}STRchive-disease-loci.hg19.atarva.bed.gz" ,
214+ tbi = "{base_dir}STRchive-disease-loci.hg19.atarva.bed.gz.tbi"
215+ shell :
216+ """
217+ python {scripts_dir}make-catalog.py -f atarva -g hg19 {input.in_json} {output.bed}
218+ bedtools sort -i {output.bed} | bgzip -c > {output.bed_gz}
219+ tabix -p bed {output.bed_gz}
220+ """
221+
222+ rule atarva_T2T :
223+ input :
224+ in_json = in_json ,
225+ check = "{base_dir}check-loci.txt"
226+ output :
227+ bed = "{base_dir}STRchive-disease-loci.T2T-chm13.atarva.bed" ,
228+ bed_gz = "{base_dir}STRchive-disease-loci.T2T-chm13.atarva.bed.gz" ,
229+ tbi = "{base_dir}STRchive-disease-loci.T2T-chm13.atarva.bed.gz.tbi"
230+ shell :
231+ """
232+ python {scripts_dir}make-catalog.py -f atarva -g T2T {input.in_json} {output.bed}
233+ bedtools sort -i {output.bed} | bgzip -c > {output.bed_gz}
234+ tabix -p bed {output.bed_gz}
235+ """
236+
184237rule bed_hg38 :
185238 input :
186239 in_json = in_json ,
0 commit comments