v1.1

PhilippSpahn · PhilippSpahn · commit 81e527725909 · 2017-02-13T12:39:50.000-08:00
diff --git a/configuration.yaml b/configuration.yaml
@@ -1,70 +1,86 @@
-# PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
-# P. Spahn et al., UC San Diego (10/2016) 
-
-# ***********************************************************************
-# CONFIGURATION FILE
-# ***********************************************************************
-
-# PROJECT PARAMETERS
-ScreenType: 'enrichment'                    # type of screen ('enrichment'/'depletion')
-LibFilename: 'GeCKOv2_library.tsv'            # filename of library spreadsheet  
-seq_5_end: 'TCTTGTGGAAAGGACGAAACACCG'       # sequence 5' of sgRNA in read
-seq_3_end: 'GTTTTAGAGCTAGAAATAGCAAGTT'       # sequence 3' of sgRNA in read
-CtrlPrefix: 'Control_'                      # name prefix of control samples
-NonTargetPrefix: 'NonTargeting'             # prefix for non-targeting sgRNAs in library (keep at default if none present)
-sgRNAsPerGene: 6                            # number of sgRNAs targeting each gene (not taking non-targeting / microRNAs into account) 
-
-# ANALYSIS OPTIONS
-AlnOutput: 'Compress'                  # keep raw alignment output? ('Keep'/'Compress'/'Delete')
-keepCutReads: False                    # keep files containing trimmed reads? ('True'/'False')
-VarEst: 'model'                        # method for count variance estimation ('model'/'sample') 
-GeneMetric: 'aRRA'                    # metric for gene enrichment ('aRRA'/'STARS'/'ES')
-scatter_annotate: True                 # annotate scatterplot with sgRNA IDs? ('True'/'False')
-ClusterBy: 'variance'                  # clustering criterion ('variance'/'counts')
-TopN: 100                              # number of top sgRNAs to take into account for clustering
-
-# TECHNICAL PARAMETERS
-CutErrorTol: 0.25                       # Cutadapt error tolerance
-R_min:  10                             # minimal required read length after cutadapt trimming
-L_bw: 11                               # Bowtie2 -L parameter (seed length)
-N_bw: 1                                # Bowtie2 -N parameter (seed mismatch)
-i_bw: 'S,1,0.75'                       # Bowtie2 -i parameter (interval function)
-Theta: 2                               # alignment ambiguity tolerance
-N0: 1000000                            # read normalization 
-max_q: 95                              # maximum quantile for histogram plots
-alpha: 0.01                            # significance level    
-pcorr: 'fdr_bh'                        # method for p-value correction
-Np_ES: 100                             # number of permutations for gene enrichment analysis (ES)
-Np_aRRA: 100                          # number of permutations for gene enrichment analysis (aRRA)
-Np_STARS: 10                          # number of permutations for gene enrichment analysis (STARS)
-thr_STARS: 10                          # Threshold percentage for STARS analysis
-
-# VISUALIZATION PARAMETERS
-delta_s: 0.01                          # count shift before log transformation in scatterplots
-delta_p: 1                             # count shift before log transformation in heatmap
-dpi: 300                               # resolution of PNG plots
-dotsize: 10                             # size of dots in scatterplot
-
-# DIRECTORIES
-WorkingDir: '/workingdir/'
-DataDir: '/workingdir/Data/'
-LibDir: '/workingdir/Library/'
-IndexDir: '/workingdir/Library/Bowtie2_Index/'
-ScriptsDir: '/opt/PinAPL-Py/Scripts/'
-AnalysisDir: '/workingdir/Analysis/'
-AlignDir: '/workingdir/Alignments/'
-QCDir: '/workingdir/Analysis/QC/'
-bw2Dir: '/usr/bin/'
-CutAdaptDir: '/root/.local/bin/'   
-STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
-
-# SCRIPT FILENAMES
-script00: 'BuildLibraryIndex'
-script01: 'AlignReads'
-script02: 'AnalyzeCounts'
-script03: 'AnalyzeControl'
-script04: 'ListCandidateGuides'
-script05: 'ListCandidateGenes'
-script06: 'PlotSample'
-script07: 'PlotReplicates'
-script08: 'PlotHeatmap'
+# PinAPLPy: Platform-independent Analysis of Pooled Screens using Python
+# P. Spahn et al., UC San Diego (10/2016) 
+
+# ***********************************************************************
+# CONFIGURATION FILE
+# ***********************************************************************
+
+# PROJECT PARAMETERS
+ScreenType: 'enrichment'                    # type of screen ['enrichment'/'depletion']
+LibFilename: 'GeCKOv2_library.tsv'            # filename of library spreadsheet  
+seq_5_end: 'TCTTGTGGAAAGGACGAAACACCG'       # sequence 5' of sgRNA in read
+seq_3_end: 'GTTTTAGAGCTAGAAATAGCAAGTT'       # sequence 3' of sgRNA in read
+NonTargetPrefix: 'NonTargeting'             # prefix for non-targeting sgRNAs in library (keep at default if none present)
+sgRNAsPerGene: 6                       # number of sgRNAs targeting each gene (excluding non-targeting controls and miRNAs). 
+                                       #    ONLY IMPORTANT IF 'ES' is chosen for gene ranking method !
+
+# ANALYSIS OPTIONS
+AlnOutput: 'Compress'                  # keep raw alignment output? ['Keep'/'Compress'/'Delete']
+keepCutReads: False                    # keep files containing trimmed reads? ['True'/'False']
+VarEst: 'model'                        # method for count variance estimation ['model'/'sample')]
+GeneMetric: 'aRRA'                    # metric for gene ranking ['aRRA'/'STARS'/'ES']
+scatter_annotate: False                 # annotate scatterplot with sgRNA IDs? ['True'/'False']
+ClusterBy: 'variance'                  # clustering criterion ['variance'/'counts']
+TopN: 25                              # number of top sgRNAs to take into account for clustering
+HitListFormat: 'tsv'                  # Format of results spreadsheets (sgRNA hits and gene ranking) ['tsv'/'xlsx']
+
+# TECHNICAL PARAMETERS
+CutErrorTol: 0.25                       # Cutadapt error tolerance
+R_min:  10                             # minimal required read length after cutadapt trimming
+L_bw: 11                               # Bowtie2 -L parameter (seed length)
+N_bw: 1                                # Bowtie2 -N parameter (seed mismatch)
+i_bw: 'S,1,0.75'                       # Bowtie2 -i parameter (interval function)
+Theta: 2                               # alignment ambiguity tolerance
+max_q: 95                              # maximum quantile for histogram plots
+alpha: 0.01                            # significance level    
+pcorr: 'fdr_bh'                        # method for p-value correction ['fdr_bh'/'fdr_tsbh']
+Np_ES: 10                             # number of permutations for gene ranking analysis (ES)
+Np_aRRA: 100                         # number of permutations for gene ranking analysis (aRRA)
+Np_STARS: 10                          # number of permutations for gene ranking analysis (STARS)
+thr_STARS: 10                          # Threshold percentage for STARS analysis
+
+# VISUALIZATION PARAMETERS
+# Scatterplots
+dpi: 300                               # resolution of PNG plots
+delta_s: 0.1                         # count shift before log transformation in scatterplots
+dotsize: 10                            # size of dots in scatterplot
+logbase: 10                            # base for log transformation of counts in scatterplots
+# Heatmap
+delta_p: 1                             # count shift before log transformation in heatmap
+width_p: 800                            # width of heatmap image (pixels)
+height_p: 800                            # height of heatmap image (pixels)
+fontsize_p: 14                          # fontsize in heatmap image
+marginsize: 7                           # size of margin in heatmap image (increase if sample names are clipped)
+
+# DIRECTORIES
+WorkingDir: '/workingdir/'
+DataDir: '/workingdir/Data/'
+LibDir: '/workingdir/Library/'
+IndexDir: '/workingdir/Library/Bowtie2_Index/'
+ScriptsDir: '/opt/PinAPL-Py/Scripts/'
+AlignDir: '/workingdir/Alignments/'
+AnalysisDir: '/workingdir/Analysis/'
+HitDir: '/workingdir/Analysis/Hit Lists'
+GeneDir: '/workingdir/Analysis/Gene Rankings'
+ControlDir: '/workingdir/Analysis/Control/'
+HeatDir: '/workingdir/Analysis/Heatmap/'
+QCDir: '/workingdir/Analysis/QC/'
+ScatterDir: '/workingdir/Analysis/Scatterplots/'
+EffDir: '/workingdir/Analysis/sgRNA Efficiency/'
+DepthDir: '/workingdir/Analysis/Read Depth/' 
+bw2Dir: '/usr/bin/'
+CutAdaptDir: '/root/.local/bin/'   
+STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
+
+# SCRIPT FILENAMES
+script00: 'BuildLibraryIndex'
+script01: 'AlignReads'
+script02: 'AnalyzeReadCounts'
+script03: 'AnalyzeControl'
+script04: 'FindHits'
+script05: 'RankGenes'
+script06: 'PlotCounts'
+script07: 'PlotReplicates'
+script08: 'PlotHeatmap'
+
+