@@ -66,7 +66,7 @@ def Normalization():
6666 GuideCounts0 = open (GuideCounts0_Filename ,'w' )
6767 ReadsPerGuide_0 = list ()
6868 for k in range (len (sgIDs )):
69- ReadsPerGuide_0 = int (numpy .ceil (ReadsPerGuide [k ]/ N * N0 ))
69+ ReadsPerGuide_0 = int (numpy .round (ReadsPerGuide [k ]/ N * N0 ))
7070 GuideCounts0 .write (str (sgIDs [k ]) + '\t ' + str (geneIDs [k ]) + '\t ' + \
7171 str (ReadsPerGuide_0 ) + '\n ' )
7272 GuideCounts0 .close ()
@@ -80,10 +80,56 @@ def Normalization():
8080 GeneCounts0 = open (GeneCounts0_Filename ,'w' )
8181 ReadsPerGene_0 = list ()
8282 for j in range (len (geneIDs )):
83- ReadsPerGene_0 = int (numpy .ceil (ReadsPerGene [j ]/ N * N0 ))
83+ ReadsPerGene_0 = int (numpy .round (ReadsPerGene [j ]/ N * N0 ))
8484 GeneCounts0 .write (str (geneIDs [j ]) + '\t ' + str (ReadsPerGene_0 ) + '\n ' )
8585 GeneCounts0 .close ()
86+ os .chdir (AlnQCDir )
87+
88+ elif norm == 'total' :
89+ print ('Normalizing to mean total read count ...' )
90+ TotalCounts = list ()
91+ for sample in SampleNames :
92+ os .chdir (sample )
93+ filename = glob .glob ('*GuideCounts.tsv' )[0 ]
94+ SampleFile = pandas .read_table (filename , sep = '\t ' ,names = colnames_u )
95+ x = list (SampleFile ['counts' ].values )
96+ TotalCounts .append (numpy .sum (x ))
8697 os .chdir (AlnQCDir )
98+ MeanCount = numpy .mean (TotalCounts )
99+ # Compute normalized counts
100+ for sample in SampleNames :
101+ print ('Processing ' + sample + ' ...' )
102+ os .chdir (sample )
103+ # sgRNA counts
104+ GuideCountsFilename = glob .glob ('*GuideCounts.tsv' )[0 ]
105+ GuideCounts = pandas .read_table (GuideCountsFilename ,sep = '\t ' ,names = colnames_u )
106+ sgIDs = list (GuideCounts ['sgRNA' ].values )
107+ geneIDs = list (GuideCounts ['gene' ].values )
108+ ReadsPerGuide = list (GuideCounts ['counts' ].values )
109+ N = sum (ReadsPerGuide )
110+ GuideCounts0_Filename = GuideCountsFilename [0 :- 4 ] + NormSuffix
111+ GuideCounts0 = open (GuideCounts0_Filename ,'w' )
112+ ReadsPerGuide_0 = list ()
113+ for k in range (len (sgIDs )):
114+ ReadsPerGuide_0 = int (numpy .round (ReadsPerGuide [k ]/ N * MeanCount ))
115+ GuideCounts0 .write (str (sgIDs [k ]) + '\t ' + str (geneIDs [k ]) + '\t ' + \
116+ str (ReadsPerGuide_0 ) + '\n ' )
117+ GuideCounts0 .close ()
118+ # gene counts
119+ GeneCountsFilename = glob .glob ('*GeneCounts.tsv' )[0 ]
120+ GeneCounts = pandas .read_table (GeneCountsFilename ,sep = '\t ' ,names = colnames_g )
121+ geneIDs = list (GeneCounts ['gene' ].values )
122+ ReadsPerGene = list (GeneCounts ['counts' ].values )
123+ N = sum (ReadsPerGene )
124+ GeneCounts0_Filename = GeneCountsFilename [0 :- 4 ] + NormSuffix
125+ GeneCounts0 = open (GeneCounts0_Filename ,'w' )
126+ ReadsPerGene_0 = list ()
127+ for j in range (len (geneIDs )):
128+ ReadsPerGene_0 = int (numpy .round (ReadsPerGene [j ]/ N * MeanCount ))
129+ GeneCounts0 .write (str (geneIDs [j ]) + '\t ' + str (ReadsPerGene_0 ) + '\n ' )
130+ GeneCounts0 .close ()
131+ os .chdir (AlnQCDir )
132+
87133 elif norm == 'size' :
88134 print ('Normalizing by size-factors ...' )
89135 # Establish data frame
0 commit comments