v2.11

PhilippSpahn · PhilippSpahn · commit 1f232feaa915 · 2017-02-27T19:48:47.000-08:00
diff --git a/Scripts/FindHits.py b/Scripts/FindHits.py
@@ -153,8 +153,8 @@ def PrepareHitList(sample):
     Results_df = pandas.DataFrame(data = {'sgRNA': [sgIDs[k] for k in range(L)],
                                      'gene': [genes[k] for k in range(L)],
                                      'counts [norm.]': [x[k] for k in range(L)],
-                                     'control mean [norm.]': [numpy.rint(mu[k]) for k in range(L)],
-                                     'control stdev [norm.]': [numpy.rint(numpy.sqrt(sigma2[k])) for k in range(L)],
+                                     'control mean [norm.]': [mu[k] for k in range(L)],
+                                     'control stdev [norm.]': [numpy.sqrt(sigma2[k]) for k in range(L)],
                                      'fold change': [fc[k] for k in range(L)],   
                                      'p-value': ['%.2E' % Decimal(NBpval[k]) for k in range(L)],
                                      'adj. p-value': ['%.2E' % Decimal(NBpval_0[k]) for k in range(L)],                                                 
diff --git a/Scripts/NormalizeReadCounts.py b/Scripts/NormalizeReadCounts.py
@@ -66,7 +66,7 @@ def Normalization():
             GuideCounts0 = open(GuideCounts0_Filename,'w')
             ReadsPerGuide_0 = list()
             for k in range(len(sgIDs)):      
-                ReadsPerGuide_0 = int(numpy.ceil(ReadsPerGuide[k]/N * N0))
+                ReadsPerGuide_0 = int(numpy.round(ReadsPerGuide[k]/N * N0))
                 GuideCounts0.write(str(sgIDs[k]) + '\t' + str(geneIDs[k]) + '\t' + \
                     str(ReadsPerGuide_0) + '\n')
             GuideCounts0.close()
@@ -80,10 +80,56 @@ def Normalization():
             GeneCounts0 = open(GeneCounts0_Filename,'w')
             ReadsPerGene_0 = list()
             for j in range(len(geneIDs)):    
-                ReadsPerGene_0 = int(numpy.ceil(ReadsPerGene[j]/N * N0))
+                ReadsPerGene_0 = int(numpy.round(ReadsPerGene[j]/N * N0))
                 GeneCounts0.write(str(geneIDs[j]) + '\t' + str(ReadsPerGene_0) + '\n')
             GeneCounts0.close()            
+            os.chdir(AlnQCDir)   
+    
+    elif norm == 'total':
+        print('Normalizing to mean total read count ...')
+        TotalCounts = list()
+        for sample in SampleNames:
+            os.chdir(sample)
+            filename = glob.glob('*GuideCounts.tsv')[0]
+            SampleFile = pandas.read_table(filename, sep='\t',names=colnames_u)
+            x = list(SampleFile['counts'].values)
+            TotalCounts.append(numpy.sum(x))
             os.chdir(AlnQCDir)
+        MeanCount = numpy.mean(TotalCounts)
+        # Compute normalized counts
+        for sample in SampleNames:
+            print('Processing '+sample+' ...') 
+            os.chdir(sample)
+            # sgRNA counts            
+            GuideCountsFilename = glob.glob('*GuideCounts.tsv')[0]
+            GuideCounts = pandas.read_table(GuideCountsFilename,sep='\t',names=colnames_u)
+            sgIDs = list(GuideCounts['sgRNA'].values)        
+            geneIDs = list(GuideCounts['gene'].values)                            
+            ReadsPerGuide = list(GuideCounts['counts'].values)
+            N = sum(ReadsPerGuide)
+            GuideCounts0_Filename = GuideCountsFilename[0:-4] + NormSuffix
+            GuideCounts0 = open(GuideCounts0_Filename,'w')
+            ReadsPerGuide_0 = list()
+            for k in range(len(sgIDs)):      
+                ReadsPerGuide_0 = int(numpy.round(ReadsPerGuide[k]/N * MeanCount))
+                GuideCounts0.write(str(sgIDs[k]) + '\t' + str(geneIDs[k]) + '\t' + \
+                    str(ReadsPerGuide_0) + '\n')
+            GuideCounts0.close()
+            # gene counts
+            GeneCountsFilename = glob.glob('*GeneCounts.tsv')[0]
+            GeneCounts = pandas.read_table(GeneCountsFilename,sep='\t',names=colnames_g)            
+            geneIDs = list(GeneCounts['gene'].values)                            
+            ReadsPerGene = list(GeneCounts['counts'].values)    
+            N = sum(ReadsPerGene)                    
+            GeneCounts0_Filename = GeneCountsFilename[0:-4] + NormSuffix
+            GeneCounts0 = open(GeneCounts0_Filename,'w')
+            ReadsPerGene_0 = list()
+            for j in range(len(geneIDs)):    
+                ReadsPerGene_0 = int(numpy.round(ReadsPerGene[j]/N * MeanCount))
+                GeneCounts0.write(str(geneIDs[j]) + '\t' + str(ReadsPerGene_0) + '\n')
+            GeneCounts0.close()            
+            os.chdir(AlnQCDir)            
+    
     elif norm == 'size':
         print('Normalizing by size-factors ...')       
         # Establish data frame
diff --git a/Scripts/PlotCounts.py b/Scripts/PlotCounts.py
@@ -100,7 +100,7 @@ def GOI_Scatterplot(sample,GOI='None'):
     plt.scatter(control_rest,sample_rest,s=dotsize,facecolor='black',lw=0,alpha=0.35)
     plt.scatter(control_sig,sample_sig,s=dotsize,facecolor='green',lw=0,alpha=0.35,label='Significant')
     if GOI != 'None':
-        plt.scatter(control_goi,sample_goi,s=1.5*dotsize,facecolor='red',lw=0,alpha=0.35,label=GOI)
+        plt.scatter(control_goi,sample_goi,s=1.5*dotsize,facecolor='red',lw=0,alpha=1.00,label=GOI)
     if len(K_nonT)>0:
         plt.scatter(control_nonT,sample_nonT,s=dotsize,facecolor='orange',lw=0,alpha=0.75,\
             label='Non Targeting')
@@ -110,7 +110,7 @@ def GOI_Scatterplot(sample,GOI='None'):
     plt.title(sample+' log'+str(logbase)+' counts [norm.]', fontsize=14)
     plt.xlabel('Control (avg.)', fontsize=12)    
     plt.ylabel(sample, fontsize=12)
-    plt.legend(loc='upper left', prop={'size':10})
+    plt.legend(loc='upper left', prop={'size':8})
     if annotate:
         for label, x, y in zip(goi_sgIDs,control_goi,sample_goi):
             plt.annotate(label,xy=(x,y),color='red',fontsize=8)  
diff --git a/Scripts/PlotReplicates.py b/Scripts/PlotReplicates.py
@@ -118,10 +118,10 @@ def Repl_Scatterplot(Repl1,Repl2):
         fontsize=10) 
     plt.text(.6,.15,'Corr (Spearman) = '+str(round(CorrCoeffS*1000)/1000),transform=axes.transAxes,\
         fontsize=10)    
+    plt.tight_layout()  
     plt.savefig(Repl1+' '+Repl2+' correlation.png', dpi=res)  
     if svg:
-        plt.savefig(Repl1+' '+Repl2+' correlation.svg')
-    plt.tight_layout()      
+        plt.savefig(Repl1+' '+Repl2+' correlation.svg')        
     plt.close()
 
     # --------------------------------------