Skip to content

Commit 8d78eeb

Browse files
author
Lin Yang
committed
RIMA mouse module
1 parent edbc037 commit 8d78eeb

File tree

321 files changed

+44915
-503413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

321 files changed

+44915
-503413
lines changed

RIMA.snakefile

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/env python
2+
#----------------main snakefile to run RIMA-------------------------#
3+
import os
4+
import sys
5+
import subprocess
6+
import pandas as pd
7+
import yaml
8+
9+
from string import Template
10+
#from snakemake_files.scripts.utils import getTargetInfo
11+
12+
def getRuns(config):
13+
ret = {}
14+
#LEN: Weird, but using pandas to handle the comments in the file
15+
#KEY: need skipinitialspace to make it fault tolerant to spaces!
16+
metadata = pd.read_csv(config['metasheet'], index_col=0, sep=',', comment='#', skipinitialspace=True)
17+
f = metadata.to_csv().split() #make it resemble an actual file with lines
18+
#SKIP the hdr
19+
for l in f[1:]:
20+
tmp = l.strip().split(",")
21+
#print(tmp)
22+
ret[tmp[0]] = tmp[1:]
23+
#print(ret)
24+
config['runs'] = ret
25+
return config
26+
27+
28+
def addCondaPaths_Config(config):
29+
"""ADDS the python2 paths to config"""
30+
conda_root = subprocess.check_output('conda info --root',shell=True).decode('utf-8').strip()
31+
conda_path = os.path.join(conda_root, 'pkgs')
32+
current_path = os.getcwd()
33+
config['conda_root'] = conda_root
34+
config['stat_root'] = "%s/envs/stat_perl_r" % conda_root
35+
config['centrifuge_root']= "/%s/envs/centrifuge_env" % conda_root
36+
config['rseqc_root']= "/%s/envs/rseqc_env" % conda_root
37+
38+
39+
def addExecPaths(config):
40+
conda_root = subprocess.check_output('conda info --root',shell=True).decode('utf-8').strip()
41+
conda_path = os.path.join(conda_root, 'pkgs')
42+
current_path = os.getcwd()
43+
#NEED the following when invoking python2 (to set proper PYTHONPATH)
44+
if not "trust4_path" in config or not config["trust4_path"]:
45+
config["trust4_path"] = os.path.join(current_path,'TRUST4')
46+
return config
47+
48+
def loadRef(config):
49+
f = open(config['ref'])
50+
ref_info = yaml.safe_load(f)
51+
f.close()
52+
#print(ref_info[config['assembly']])
53+
for (k,v) in ref_info[config['assembly']].items():
54+
#NO CLOBBERING what is user-defined!
55+
if k not in config:
56+
config[k] = v
57+
58+
def load_config(config_file):
59+
#load the main config file including parameters which are not change a lot
60+
with open(config_file, 'r') as stream:
61+
try:
62+
return yaml.safe_load(stream)
63+
except yaml.YAMLError as exc:
64+
print(exc)
65+
66+
def load_execution(execution_file):
67+
#load the main config file including parameters which are not change a lot
68+
with open(execution_file, 'r') as stream:
69+
try:
70+
return yaml.safe_load(stream)
71+
except yaml.YAMLError as exc:
72+
print(exc)
73+
74+
#--------- CONFIG set up ---------------
75+
config = load_config('config.yaml')
76+
execution = load_execution('execution.yaml')
77+
addCondaPaths_Config(config)
78+
loadRef(config)
79+
addExecPaths(config)
80+
81+
82+
83+
84+
#------------------------------------------------------------------------------
85+
# TARGETS
86+
#------------------------------------------------------------------------------
87+
def all_targets(wildcards):
88+
ls = []
89+
if execution["preprocess_individual"]:
90+
ls.extend(preprocess_individual_targets(wildcards))
91+
if execution["preprocess_cohort"]:
92+
ls.extend(preprocess_cohort_targets(wildcards))
93+
if execution["differential_expression_cohort"]:
94+
ls.extend(diffexpr_targets(wildcards))
95+
if execution["immune_infiltration_cohort"]:
96+
ls.extend(immune_infiltration_targets(wildcards))
97+
if execution["immune_repertoire_individual"]:
98+
ls.extend(immune_repertoire_individual_targets(wildcards))
99+
if execution["microbiome_individual"]:
100+
ls.extend(microbiome_individual_targets(wildcards))
101+
if execution["microbiome_cohort"]:
102+
ls.extend(microbiome_cohort_targets(wildcards))
103+
return ls
104+
105+
106+
rule target:
107+
input:
108+
all_targets
109+
message: "Compiling all outputs"
110+
111+
112+
if execution["preprocess_individual"]:
113+
include: "./modules/preprocess/preprocess_individual.snakefile"
114+
if execution["preprocess_cohort"]:
115+
include: "./modules/preprocess/preprocess_cohort.snakefile"
116+
if execution["differential_expression_cohort"]:
117+
include: "./modules/differential_expression/differential_expression_cohort.snakefile"
118+
if execution["immune_infiltration_cohort"]:
119+
include: "./modules/immune_infiltration/immune_infiltration_cohort.snakefile"
120+
if execution["immune_repertoire_individual"]:
121+
include: "./modules/immune_repertoire/immune_repertoire_individual.snakefile"
122+
if execution["microbiome_individual"]:
123+
include: "./modules/microbiome/microbiome_individual.snakefile"
124+
if execution["microbiome_cohort"]:
125+
include: "./modules/microbiome/microbiome_cohort.snakefile"

config.yaml

Lines changed: 53 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,61 @@
1-
---
2-
############################################################
3-
# Data information #
4-
############################################################
5-
6-
ref: ref.yaml
7-
assembly: mm10 #hg38 or mm10
8-
9-
cancer_type: AUTO #short name of cancer type
10-
metasheet: metasheet.txt
11-
designs: [Condition] #the column from metasheet which is used to do comparsion
12-
13-
############################################################
14-
# Data Processing #
15-
############################################################
16-
17-
### star
18-
#Possible values are [ff-firststrand, ff-secondstrand, ff-unstranded, fr-firststrand, fr-secondstrand, fr-unstranded (default), transfrags]
19-
library_type: 'fr-firststrand'
20-
stranded: true
21-
threads: 8
22-
23-
### rseqc parameters
24-
rseqc_ref: house_keeping #rseqc ref model
25-
26-
############################################################
27-
# Differential Gene Expression #
28-
############################################################
29-
30-
31-
### batch_removal and PCA analysis
32-
#If you don't need to do batch removal, just setting [no]
33-
batch_covariates: [no]
34-
35-
neoantigen_callers: "NetMHC"
36-
neoantigen_epitope_lengths: "8,9,10,11"
37-
38-
### deseq2
39-
batch: [no] #[Clinical Phenotype] or [no], Clinical phenotype could be the column from metasheet which accounts for batch effect
40-
comparison: between ##between or loop. between: compare any two phenotypes in a given condition column;loop: compare one phenotype and all the others
41-
42-
############################################################
43-
# level3 #
44-
############################################################
45-
46-
### microbiota
47-
centrifuge: true #run centrifuge or pathseq
48-
49-
### Trust4
50-
trust4_clinical_phenotype: [Condition]
51-
52-
### arcasHLA
53-
##specifying group imformation annotated in HLA oncoplot
54-
hla_annot_group: [Responder, Gender]
1+
#########Fixed and user-defined parameters################
2+
metasheet: metasheet.csv # Meta info
3+
ref: ref.yaml # Reference config
4+
assembly: mm10
5+
cancer_type: GBM #TCGA cancer type abbreviations
6+
rseqc_ref: house_keeping #Option: 'house_keeping' or 'false'.
7+
#By default, a subset of housekeeping genes is used by RSeQC to assess alignment quality.
8+
#This reduces the amount of time needed to run RSeQC.
9+
mate: [1,2] #paired-end fastq format, we recommend naming paired-end reads with _1.fq.gz and _2.fq.gz
10+
11+
12+
#########Cohort level analysis parameters################
13+
design: Responder # Condition on which to do comparsion (as set up in metasheet.csv)
14+
Treatment: R # Treatment use in DESeq2, corresponding to positive log fold change
15+
Control: NR # Control use in DESeq2, corresponding to negative log fold change
16+
batch: syn_batch # Options: 'false' or a column name from the metasheet.csv.
17+
# If set to a column name in the metasheet.csv, the column name will be used for batch effect analysis (limma).
18+
# It will also be used as a covariate for differential analysis (DESeq2) to account for batch effect.
5519

5620
############################################################
5721
# list samples #
5822
############################################################
23+
5924
samples:
60-
SRR8281228:
61-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281228_1.fastq.gz
62-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281228_2.fastq.gz
63-
SRR8281222:
64-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281222_1.fastq.gz
65-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281222_2.fastq.gz
66-
SRR8281224:
67-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281224_1.fastq.gz
68-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281224_2.fastq.gz
69-
SRR8281225:
70-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281225_1.fastq.gz
71-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281225_2.fastq.gz
7225
SRR8281218:
73-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281218_1.fastq.gz
74-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281218_2.fastq.gz
26+
- /mnt/RIMA/data/SRR8281218_1.fastq.gz
27+
- /mnt/RIMA/data/SRR8281218_2.fastq.gz
7528
SRR8281219:
76-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281219_1.fastq.gz
77-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281219_2.fastq.gz
78-
SRR8281220:
79-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281220_1.fastq.gz
80-
- /liulab/yang/Immunotherapy/toDIGEST/rawRNASeq/fastq/Zhao2019_PD1_Glioblastoma_RNASeq/SRR8281220_2.fastq.gz
81-
82-
###########################################################
83-
# run settings #
84-
###########################################################
85-
runs:
86-
run1:
87-
- SRR8281228
88-
run3:
89-
- SRR8281222
90-
run4:
91-
- SRR8281224
92-
run5:
93-
- SRR8281225
94-
run6:
95-
- SRR8281218
96-
run7:
97-
- SRR8281219
98-
run8:
99-
- SRR8281220
29+
- /mnt/RIMA/data/SRR8281219_1.fastq.gz
30+
- /mnt/RIMA/data/SRR8281219_2.fastq.gz
31+
SRR8281226:
32+
- /mnt/RIMA/data/SRR8281226_1.fastq.gz
33+
- /mnt/RIMA/data/SRR8281226_2.fastq.gz
34+
SRR8281236:
35+
- /mnt/RIMA/data/SRR8281236_1.fastq.gz
36+
- /mnt/RIMA/data/SRR8281236_2.fastq.gz
37+
SRR8281230:
38+
- /mnt/RIMA/data/SRR8281230_1.fastq.gz
39+
- /mnt/RIMA/data/SRR8281230_2.fastq.gz
40+
SRR8281233:
41+
- /mnt/RIMA/data/SRR8281233_1.fastq.gz
42+
- /mnt/RIMA/data/SRR8281233_2.fastq.gz
43+
SRR8281244:
44+
- /mnt/RIMA/data/SRR8281244_1.fastq.gz
45+
- /mnt/RIMA/data/SRR8281244_2.fastq.gz
46+
SRR8281245:
47+
- /mnt/RIMA/data/SRR8281245_1.fastq.gz
48+
- /mnt/RIMA/data/SRR8281245_2.fastq.gz
49+
SRR8281243:
50+
- /mnt/RIMA/data/SRR8281243_1.fastq.gz
51+
- /mnt/RIMA/data/SRR8281243_2.fastq.gz
52+
SRR8281251:
53+
- /mnt/RIMA/data/SRR8281251_1.fastq.gz
54+
- /mnt/RIMA/data/SRR8281251_2.fastq.gz
55+
SRR8281238:
56+
- /mnt/RIMA/data/SRR8281238_1.fastq.gz
57+
- /mnt/RIMA/data/SRR8281238_2.fastq.gz
58+
SRR8281250:
59+
- /mnt/RIMA/data/SRR8281250_1.fastq.gz
60+
- /mnt/RIMA/data/SRR8281250_2.fastq.gz
10061

execution.yaml

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,20 @@
1-
##level1
2-
star: true
3-
salmon: true
4-
rseqc: true
5-
##level2
6-
batch_removal: false
7-
deseq2: false
8-
gsea: false
9-
ssgsea: false
10-
##level3
11-
microbiota: false
12-
trust4: false
13-
arcasHLA: false
14-
pvacseq: false
15-
##level4
16-
immunedeconv: false
17-
mMCP: false
18-
##level5
19-
msisensor2: false
20-
##Report
21-
report: false
221

2+
##Note: Preprocess individual and cohort module necessary to get the alignment and quality results.
3+
##Run the remaining modules only after these two modules.
4+
preprocess_individual: true
5+
preprocess_cohort: true
6+
7+
##Optional modules
8+
##Note: The below modules are specialized modules, each dealing with specific targets.
9+
##Make sure to run individual and cohort of each module to get all the results.
10+
11+
##Individual runs
12+
immune_repertoire_individual: false
13+
microbiome_individual: false
14+
15+
16+
##Cohort runs
17+
differential_expression_cohort: true
18+
immune_infiltration_cohort: false
19+
microbiome_cohort: false
2320

metasheet.txt

Lines changed: 0 additions & 8 deletions
This file was deleted.

modules/.DS_Store

10 KB
Binary file not shown.

0 commit comments

Comments
 (0)