-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathmaggie.rules
More file actions
81 lines (74 loc) · 2.54 KB
/
maggie.rules
File metadata and controls
81 lines (74 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import itertools
import os
import collections
import json
import glob
from snakemake.utils import R
#from snakemake.utils import min_vession
#min_version("3.2")
#module load snakemake/3.8.0
from snakemake.exceptions import MissingInputException
#Author: Rajesh Patidar
#Email: patidarr
# This will make a output file for every model containing output of bcftools gtcheck
# Snakemake Base location
shell.prefix("""
set -e -o pipefail
module purge
sleep 20s
""")
PATH="/data/MoCha/processedDATA/MAGGIE/"
PAT_FILES = {}
PATIENT ={}
F=sorted(glob.glob(PATH+"*germline*WES*gz"))
for files in F:
column = files.split("_")
patient=column[0].split("/")
PATIENT[patient[5]] =["/data/MoCha/processedDATA/"+patient[5]+"/20170910/qc/"+patient[5]+".maggie.txt"]
PAT_FILES[patient[5]] =[files]
F=sorted(glob.glob(PATH+"*WES*gz"))
for files in F:
column = files.split("~")
patient=column[0].split("/")
if "_germline" not in patient[5]:
PATIENT[patient[5]] =["/data/MoCha/processedDATA/"+patient[5]+"/20170910/qc/"+patient[5]+".maggie.txt"]
if patient[5] not in PAT_FILES:
PAT_FILES[patient[5]] =[files]
F=sorted(glob.glob(PATH+"*RNASEQ*gz"))
for files in F:
column = files.split("~")
patient=column[0].split("/")
if patient[5] not in PAT_FILES:
PATIENT[patient[5]] =["/data/MoCha/processedDATA/"+patient[5]+"/20170910/qc/"+patient[5]+".maggie.txt"]
PAT_FILES[patient[5]] =[files]
#################################
#localrules: Final
#################################
rule Final:
input: PATIENT.values()
#################################
# This rule will make the Patient Level file
#################################
rule PatientLevel:
input:
lambda wildcards: PAT_FILES[wildcards.patient]
output:
txt="/data/MoCha/processedDATA/{patient}/20170910/qc/{patient}.maggie.txt",
pdf="/data/MoCha/processedDATA/{patient}/20170910/qc/{patient}.maggie.pdf"
params:
rulename = "MAGGIE",
shell:"""
#################################
module load samtools/1.2
echo -e "Model\\tSample\\tScore\\tSites" >{output.txt}
for file2 in {PATH}/*.codingsynon.sorted.vcf.gz
do
name=`basename ${{file2}} .codingsynon.sorted.vcf.gz`
name=`echo ${{name}}|sed -e 's/.star//g'|sed -e 's/.bwa//g'`
score=`bcftools gtcheck -g {input} -G 1 ${{file2}} |grep ^CN |cut -f2,4 |awk '{{OFS="\\t"}}{{print +$1,$2}}'`
echo -e "{wildcards.patient}\\t${{name}}\\t${{score}}"
done >>{output.txt}
module load R
/data/MoCha/patidarr/ngs_pipeline/scripts/PlotMaggie.R --input={output.txt} --patient={wildcards.patient} --output={output.pdf}
#################################
"""