1- # PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
2- # P. Spahn et al., UC San Diego (10/2016)
3-
4- # ***********************************************************************
5- # CONFIGURATION FILE
6- # ***********************************************************************
7-
8- # PROJECT PARAMETERS
9- ScreenType : ' enrichment' # type of screen ('enrichment'/'depletion')
10- LibFilename : ' GeCKOv2_library.tsv' # filename of library spreadsheet
11- seq_5_end : ' TCTTGTGGAAAGGACGAAACACCG' # sequence 5' of sgRNA in read
12- seq_3_end : ' GTTTTAGAGCTAGAAATAGCAAGTT' # sequence 3' of sgRNA in read
13- CtrlPrefix : ' Control_' # name prefix of control samples
14- NonTargetPrefix : ' NonTargeting' # prefix for non-targeting sgRNAs in library (keep at default if none present)
15- sgRNAsPerGene : 6 # number of sgRNAs targeting each gene (not taking non-targeting / microRNAs into account)
16-
17- # ANALYSIS OPTIONS
18- AlnOutput : ' Compress' # keep raw alignment output? ('Keep'/'Compress'/'Delete')
19- keepCutReads : False # keep files containing trimmed reads? ('True'/'False')
20- VarEst : ' model' # method for count variance estimation ('model'/'sample')
21- GeneMetric : ' aRRA' # metric for gene enrichment ('aRRA'/'STARS'/'ES')
22- scatter_annotate : True # annotate scatterplot with sgRNA IDs? ('True'/'False')
23- ClusterBy : ' variance' # clustering criterion ('variance'/'counts')
24- TopN : 100 # number of top sgRNAs to take into account for clustering
25-
26- # TECHNICAL PARAMETERS
27- CutErrorTol : 0.25 # Cutadapt error tolerance
28- R_min : 10 # minimal required read length after cutadapt trimming
29- L_bw : 11 # Bowtie2 -L parameter (seed length)
30- N_bw : 1 # Bowtie2 -N parameter (seed mismatch)
31- i_bw : ' S,1,0.75' # Bowtie2 -i parameter (interval function)
32- Theta : 2 # alignment ambiguity tolerance
33- N0 : 1000000 # read normalization
34- max_q : 95 # maximum quantile for histogram plots
35- alpha : 0.01 # significance level
36- pcorr : ' fdr_bh' # method for p-value correction
37- Np_ES : 100 # number of permutations for gene enrichment analysis (ES)
38- Np_aRRA : 100 # number of permutations for gene enrichment analysis (aRRA)
39- Np_STARS : 10 # number of permutations for gene enrichment analysis (STARS)
40- thr_STARS : 10 # Threshold percentage for STARS analysis
41-
42- # VISUALIZATION PARAMETERS
43- delta_s : 0.01 # count shift before log transformation in scatterplots
44- delta_p : 1 # count shift before log transformation in heatmap
45- dpi : 300 # resolution of PNG plots
46- dotsize : 10 # size of dots in scatterplot
47-
48- # DIRECTORIES
49- WorkingDir : ' /workingdir/'
50- DataDir : ' /workingdir/Data/'
51- LibDir : ' /workingdir/Library/'
52- IndexDir : ' /workingdir/Library/Bowtie2_Index/'
53- ScriptsDir : ' /opt/PinAPL-Py/Scripts/'
54- AnalysisDir : ' /workingdir/Analysis/'
55- AlignDir : ' /workingdir/Alignments/'
56- QCDir : ' /workingdir/Analysis/QC/'
57- bw2Dir : ' /usr/bin/'
58- CutAdaptDir : ' /root/.local/bin/'
59- STARSDir : ' /opt/PinAPL-Py/Scripts/STARS_mod/'
60-
61- # SCRIPT FILENAMES
62- script00 : ' BuildLibraryIndex'
63- script01 : ' AlignReads'
64- script02 : ' AnalyzeCounts'
65- script03 : ' AnalyzeControl'
66- script04 : ' ListCandidateGuides'
67- script05 : ' ListCandidateGenes'
68- script06 : ' PlotSample'
69- script07 : ' PlotReplicates'
70- script08 : ' PlotHeatmap'
1+ # PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
2+ # P. Spahn et al., UC San Diego (10/2016)
3+
4+ # ***********************************************************************
5+ # CONFIGURATION FILE
6+ # ***********************************************************************
7+
8+ # PROJECT PARAMETERS
9+ ScreenType : ' enrichment' # type of screen ['enrichment'/'depletion']
10+ LibFilename : ' GeCKOv2_library.tsv' # filename of library spreadsheet
11+ seq_5_end : ' TCTTGTGGAAAGGACGAAACACCG' # sequence 5' of sgRNA in read
12+ seq_3_end : ' GTTTTAGAGCTAGAAATAGCAAGTT' # sequence 3' of sgRNA in read
13+ NonTargetPrefix : ' NonTargeting' # prefix for non-targeting sgRNAs in library (keep at default if none present)
14+ sgRNAsPerGene : 6 # number of sgRNAs targeting each gene (excluding non-targeting controls and miRNAs).
15+ # ONLY IMPORTANT IF 'ES' is chosen for gene ranking method !
16+
17+ # ANALYSIS OPTIONS
18+ AlnOutput : ' Compress' # keep raw alignment output? ['Keep'/'Compress'/'Delete']
19+ keepCutReads : False # keep files containing trimmed reads? ['True'/'False']
20+ VarEst : ' model' # method for count variance estimation ['model'/'sample')]
21+ GeneMetric : ' aRRA' # metric for gene ranking ['aRRA'/'STARS'/'ES']
22+ scatter_annotate : False # annotate scatterplot with sgRNA IDs? ['True'/'False']
23+ ClusterBy : ' variance' # clustering criterion ['variance'/'counts']
24+ TopN : 25 # number of top sgRNAs to take into account for clustering
25+ HitListFormat : ' tsv' # Format of results spreadsheets (sgRNA hits and gene ranking) ['tsv'/'xlsx']
26+
27+ # TECHNICAL PARAMETERS
28+ CutErrorTol : 0.25 # Cutadapt error tolerance
29+ R_min : 10 # minimal required read length after cutadapt trimming
30+ L_bw : 11 # Bowtie2 -L parameter (seed length)
31+ N_bw : 1 # Bowtie2 -N parameter (seed mismatch)
32+ i_bw : ' S,1,0.75' # Bowtie2 -i parameter (interval function)
33+ Theta : 2 # alignment ambiguity tolerance
34+ max_q : 95 # maximum quantile for histogram plots
35+ alpha : 0.01 # significance level
36+ pcorr : ' fdr_bh' # method for p-value correction ['fdr_bh'/'fdr_tsbh']
37+ Np_ES : 10 # number of permutations for gene ranking analysis (ES)
38+ Np_aRRA : 100 # number of permutations for gene ranking analysis (aRRA)
39+ Np_STARS : 10 # number of permutations for gene ranking analysis (STARS)
40+ thr_STARS : 10 # Threshold percentage for STARS analysis
41+
42+ # VISUALIZATION PARAMETERS
43+ # Scatterplots
44+ dpi : 300 # resolution of PNG plots
45+ delta_s : 0.1 # count shift before log transformation in scatterplots
46+ dotsize : 10 # size of dots in scatterplot
47+ logbase : 10 # base for log transformation of counts in scatterplots
48+ # Heatmap
49+ delta_p : 1 # count shift before log transformation in heatmap
50+ width_p : 800 # width of heatmap image (pixels)
51+ height_p : 800 # height of heatmap image (pixels)
52+ fontsize_p : 14 # fontsize in heatmap image
53+ marginsize : 7 # size of margin in heatmap image (increase if sample names are clipped)
54+
55+ # DIRECTORIES
56+ WorkingDir : ' /workingdir/'
57+ DataDir : ' /workingdir/Data/'
58+ LibDir : ' /workingdir/Library/'
59+ IndexDir : ' /workingdir/Library/Bowtie2_Index/'
60+ ScriptsDir : ' /opt/PinAPL-Py/Scripts/'
61+ AlignDir : ' /workingdir/Alignments/'
62+ AnalysisDir : ' /workingdir/Analysis/'
63+ HitDir : ' /workingdir/Analysis/Hit Lists'
64+ GeneDir : ' /workingdir/Analysis/Gene Rankings'
65+ ControlDir : ' /workingdir/Analysis/Control/'
66+ HeatDir : ' /workingdir/Analysis/Heatmap/'
67+ QCDir : ' /workingdir/Analysis/QC/'
68+ ScatterDir : ' /workingdir/Analysis/Scatterplots/'
69+ EffDir : ' /workingdir/Analysis/sgRNA Efficiency/'
70+ DepthDir : ' /workingdir/Analysis/Read Depth/'
71+ bw2Dir : ' /usr/bin/'
72+ CutAdaptDir : ' /root/.local/bin/'
73+ STARSDir : ' /opt/PinAPL-Py/Scripts/STARS_mod/'
74+
75+ # SCRIPT FILENAMES
76+ script00 : ' BuildLibraryIndex'
77+ script01 : ' AlignReads'
78+ script02 : ' AnalyzeReadCounts'
79+ script03 : ' AnalyzeControl'
80+ script04 : ' FindHits'
81+ script05 : ' RankGenes'
82+ script06 : ' PlotCounts'
83+ script07 : ' PlotReplicates'
84+ script08 : ' PlotHeatmap'
85+
86+
0 commit comments