Skip to content

Commit 81e5277

Browse files
committed
v1.1
1 parent 4227115 commit 81e5277

File tree

1 file changed

+86
-70
lines changed

1 file changed

+86
-70
lines changed

configuration.yaml

Lines changed: 86 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,86 @@
1-
# PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
2-
# P. Spahn et al., UC San Diego (10/2016)
3-
4-
# ***********************************************************************
5-
# CONFIGURATION FILE
6-
# ***********************************************************************
7-
8-
# PROJECT PARAMETERS
9-
ScreenType: 'enrichment' # type of screen ('enrichment'/'depletion')
10-
LibFilename: 'GeCKOv2_library.tsv' # filename of library spreadsheet
11-
seq_5_end: 'TCTTGTGGAAAGGACGAAACACCG' # sequence 5' of sgRNA in read
12-
seq_3_end: 'GTTTTAGAGCTAGAAATAGCAAGTT' # sequence 3' of sgRNA in read
13-
CtrlPrefix: 'Control_' # name prefix of control samples
14-
NonTargetPrefix: 'NonTargeting' # prefix for non-targeting sgRNAs in library (keep at default if none present)
15-
sgRNAsPerGene: 6 # number of sgRNAs targeting each gene (not taking non-targeting / microRNAs into account)
16-
17-
# ANALYSIS OPTIONS
18-
AlnOutput: 'Compress' # keep raw alignment output? ('Keep'/'Compress'/'Delete')
19-
keepCutReads: False # keep files containing trimmed reads? ('True'/'False')
20-
VarEst: 'model' # method for count variance estimation ('model'/'sample')
21-
GeneMetric: 'aRRA' # metric for gene enrichment ('aRRA'/'STARS'/'ES')
22-
scatter_annotate: True # annotate scatterplot with sgRNA IDs? ('True'/'False')
23-
ClusterBy: 'variance' # clustering criterion ('variance'/'counts')
24-
TopN: 100 # number of top sgRNAs to take into account for clustering
25-
26-
# TECHNICAL PARAMETERS
27-
CutErrorTol: 0.25 # Cutadapt error tolerance
28-
R_min: 10 # minimal required read length after cutadapt trimming
29-
L_bw: 11 # Bowtie2 -L parameter (seed length)
30-
N_bw: 1 # Bowtie2 -N parameter (seed mismatch)
31-
i_bw: 'S,1,0.75' # Bowtie2 -i parameter (interval function)
32-
Theta: 2 # alignment ambiguity tolerance
33-
N0: 1000000 # read normalization
34-
max_q: 95 # maximum quantile for histogram plots
35-
alpha: 0.01 # significance level
36-
pcorr: 'fdr_bh' # method for p-value correction
37-
Np_ES: 100 # number of permutations for gene enrichment analysis (ES)
38-
Np_aRRA: 100 # number of permutations for gene enrichment analysis (aRRA)
39-
Np_STARS: 10 # number of permutations for gene enrichment analysis (STARS)
40-
thr_STARS: 10 # Threshold percentage for STARS analysis
41-
42-
# VISUALIZATION PARAMETERS
43-
delta_s: 0.01 # count shift before log transformation in scatterplots
44-
delta_p: 1 # count shift before log transformation in heatmap
45-
dpi: 300 # resolution of PNG plots
46-
dotsize: 10 # size of dots in scatterplot
47-
48-
# DIRECTORIES
49-
WorkingDir: '/workingdir/'
50-
DataDir: '/workingdir/Data/'
51-
LibDir: '/workingdir/Library/'
52-
IndexDir: '/workingdir/Library/Bowtie2_Index/'
53-
ScriptsDir: '/opt/PinAPL-Py/Scripts/'
54-
AnalysisDir: '/workingdir/Analysis/'
55-
AlignDir: '/workingdir/Alignments/'
56-
QCDir: '/workingdir/Analysis/QC/'
57-
bw2Dir: '/usr/bin/'
58-
CutAdaptDir: '/root/.local/bin/'
59-
STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
60-
61-
# SCRIPT FILENAMES
62-
script00: 'BuildLibraryIndex'
63-
script01: 'AlignReads'
64-
script02: 'AnalyzeCounts'
65-
script03: 'AnalyzeControl'
66-
script04: 'ListCandidateGuides'
67-
script05: 'ListCandidateGenes'
68-
script06: 'PlotSample'
69-
script07: 'PlotReplicates'
70-
script08: 'PlotHeatmap'
1+
# PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
2+
# P. Spahn et al., UC San Diego (10/2016)
3+
4+
# ***********************************************************************
5+
# CONFIGURATION FILE
6+
# ***********************************************************************
7+
8+
# PROJECT PARAMETERS
9+
ScreenType: 'enrichment' # type of screen ['enrichment'/'depletion']
10+
LibFilename: 'GeCKOv2_library.tsv' # filename of library spreadsheet
11+
seq_5_end: 'TCTTGTGGAAAGGACGAAACACCG' # sequence 5' of sgRNA in read
12+
seq_3_end: 'GTTTTAGAGCTAGAAATAGCAAGTT' # sequence 3' of sgRNA in read
13+
NonTargetPrefix: 'NonTargeting' # prefix for non-targeting sgRNAs in library (keep at default if none present)
14+
sgRNAsPerGene: 6 # number of sgRNAs targeting each gene (excluding non-targeting controls and miRNAs).
15+
# ONLY IMPORTANT IF 'ES' is chosen for gene ranking method !
16+
17+
# ANALYSIS OPTIONS
18+
AlnOutput: 'Compress' # keep raw alignment output? ['Keep'/'Compress'/'Delete']
19+
keepCutReads: False # keep files containing trimmed reads? ['True'/'False']
20+
VarEst: 'model' # method for count variance estimation ['model'/'sample')]
21+
GeneMetric: 'aRRA' # metric for gene ranking ['aRRA'/'STARS'/'ES']
22+
scatter_annotate: False # annotate scatterplot with sgRNA IDs? ['True'/'False']
23+
ClusterBy: 'variance' # clustering criterion ['variance'/'counts']
24+
TopN: 25 # number of top sgRNAs to take into account for clustering
25+
HitListFormat: 'tsv' # Format of results spreadsheets (sgRNA hits and gene ranking) ['tsv'/'xlsx']
26+
27+
# TECHNICAL PARAMETERS
28+
CutErrorTol: 0.25 # Cutadapt error tolerance
29+
R_min: 10 # minimal required read length after cutadapt trimming
30+
L_bw: 11 # Bowtie2 -L parameter (seed length)
31+
N_bw: 1 # Bowtie2 -N parameter (seed mismatch)
32+
i_bw: 'S,1,0.75' # Bowtie2 -i parameter (interval function)
33+
Theta: 2 # alignment ambiguity tolerance
34+
max_q: 95 # maximum quantile for histogram plots
35+
alpha: 0.01 # significance level
36+
pcorr: 'fdr_bh' # method for p-value correction ['fdr_bh'/'fdr_tsbh']
37+
Np_ES: 10 # number of permutations for gene ranking analysis (ES)
38+
Np_aRRA: 100 # number of permutations for gene ranking analysis (aRRA)
39+
Np_STARS: 10 # number of permutations for gene ranking analysis (STARS)
40+
thr_STARS: 10 # Threshold percentage for STARS analysis
41+
42+
# VISUALIZATION PARAMETERS
43+
# Scatterplots
44+
dpi: 300 # resolution of PNG plots
45+
delta_s: 0.1 # count shift before log transformation in scatterplots
46+
dotsize: 10 # size of dots in scatterplot
47+
logbase: 10 # base for log transformation of counts in scatterplots
48+
# Heatmap
49+
delta_p: 1 # count shift before log transformation in heatmap
50+
width_p: 800 # width of heatmap image (pixels)
51+
height_p: 800 # height of heatmap image (pixels)
52+
fontsize_p: 14 # fontsize in heatmap image
53+
marginsize: 7 # size of margin in heatmap image (increase if sample names are clipped)
54+
55+
# DIRECTORIES
56+
WorkingDir: '/workingdir/'
57+
DataDir: '/workingdir/Data/'
58+
LibDir: '/workingdir/Library/'
59+
IndexDir: '/workingdir/Library/Bowtie2_Index/'
60+
ScriptsDir: '/opt/PinAPL-Py/Scripts/'
61+
AlignDir: '/workingdir/Alignments/'
62+
AnalysisDir: '/workingdir/Analysis/'
63+
HitDir: '/workingdir/Analysis/Hit Lists'
64+
GeneDir: '/workingdir/Analysis/Gene Rankings'
65+
ControlDir: '/workingdir/Analysis/Control/'
66+
HeatDir: '/workingdir/Analysis/Heatmap/'
67+
QCDir: '/workingdir/Analysis/QC/'
68+
ScatterDir: '/workingdir/Analysis/Scatterplots/'
69+
EffDir: '/workingdir/Analysis/sgRNA Efficiency/'
70+
DepthDir: '/workingdir/Analysis/Read Depth/'
71+
bw2Dir: '/usr/bin/'
72+
CutAdaptDir: '/root/.local/bin/'
73+
STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
74+
75+
# SCRIPT FILENAMES
76+
script00: 'BuildLibraryIndex'
77+
script01: 'AlignReads'
78+
script02: 'AnalyzeReadCounts'
79+
script03: 'AnalyzeControl'
80+
script04: 'FindHits'
81+
script05: 'RankGenes'
82+
script06: 'PlotCounts'
83+
script07: 'PlotReplicates'
84+
script08: 'PlotHeatmap'
85+
86+

0 commit comments

Comments
 (0)