Skip to content

Commit 1f232fe

Browse files
committed
v2.11
1 parent 8292ccf commit 1f232fe

File tree

4 files changed

+54
-8
lines changed

4 files changed

+54
-8
lines changed

Scripts/FindHits.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ def PrepareHitList(sample):
153153
Results_df = pandas.DataFrame(data = {'sgRNA': [sgIDs[k] for k in range(L)],
154154
'gene': [genes[k] for k in range(L)],
155155
'counts [norm.]': [x[k] for k in range(L)],
156-
'control mean [norm.]': [numpy.rint(mu[k]) for k in range(L)],
157-
'control stdev [norm.]': [numpy.rint(numpy.sqrt(sigma2[k])) for k in range(L)],
156+
'control mean [norm.]': [mu[k] for k in range(L)],
157+
'control stdev [norm.]': [numpy.sqrt(sigma2[k]) for k in range(L)],
158158
'fold change': [fc[k] for k in range(L)],
159159
'p-value': ['%.2E' % Decimal(NBpval[k]) for k in range(L)],
160160
'adj. p-value': ['%.2E' % Decimal(NBpval_0[k]) for k in range(L)],

Scripts/NormalizeReadCounts.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def Normalization():
6666
GuideCounts0 = open(GuideCounts0_Filename,'w')
6767
ReadsPerGuide_0 = list()
6868
for k in range(len(sgIDs)):
69-
ReadsPerGuide_0 = int(numpy.ceil(ReadsPerGuide[k]/N * N0))
69+
ReadsPerGuide_0 = int(numpy.round(ReadsPerGuide[k]/N * N0))
7070
GuideCounts0.write(str(sgIDs[k]) + '\t' + str(geneIDs[k]) + '\t' + \
7171
str(ReadsPerGuide_0) + '\n')
7272
GuideCounts0.close()
@@ -80,10 +80,56 @@ def Normalization():
8080
GeneCounts0 = open(GeneCounts0_Filename,'w')
8181
ReadsPerGene_0 = list()
8282
for j in range(len(geneIDs)):
83-
ReadsPerGene_0 = int(numpy.ceil(ReadsPerGene[j]/N * N0))
83+
ReadsPerGene_0 = int(numpy.round(ReadsPerGene[j]/N * N0))
8484
GeneCounts0.write(str(geneIDs[j]) + '\t' + str(ReadsPerGene_0) + '\n')
8585
GeneCounts0.close()
86+
os.chdir(AlnQCDir)
87+
88+
elif norm == 'total':
89+
print('Normalizing to mean total read count ...')
90+
TotalCounts = list()
91+
for sample in SampleNames:
92+
os.chdir(sample)
93+
filename = glob.glob('*GuideCounts.tsv')[0]
94+
SampleFile = pandas.read_table(filename, sep='\t',names=colnames_u)
95+
x = list(SampleFile['counts'].values)
96+
TotalCounts.append(numpy.sum(x))
8697
os.chdir(AlnQCDir)
98+
MeanCount = numpy.mean(TotalCounts)
99+
# Compute normalized counts
100+
for sample in SampleNames:
101+
print('Processing '+sample+' ...')
102+
os.chdir(sample)
103+
# sgRNA counts
104+
GuideCountsFilename = glob.glob('*GuideCounts.tsv')[0]
105+
GuideCounts = pandas.read_table(GuideCountsFilename,sep='\t',names=colnames_u)
106+
sgIDs = list(GuideCounts['sgRNA'].values)
107+
geneIDs = list(GuideCounts['gene'].values)
108+
ReadsPerGuide = list(GuideCounts['counts'].values)
109+
N = sum(ReadsPerGuide)
110+
GuideCounts0_Filename = GuideCountsFilename[0:-4] + NormSuffix
111+
GuideCounts0 = open(GuideCounts0_Filename,'w')
112+
ReadsPerGuide_0 = list()
113+
for k in range(len(sgIDs)):
114+
ReadsPerGuide_0 = int(numpy.round(ReadsPerGuide[k]/N * MeanCount))
115+
GuideCounts0.write(str(sgIDs[k]) + '\t' + str(geneIDs[k]) + '\t' + \
116+
str(ReadsPerGuide_0) + '\n')
117+
GuideCounts0.close()
118+
# gene counts
119+
GeneCountsFilename = glob.glob('*GeneCounts.tsv')[0]
120+
GeneCounts = pandas.read_table(GeneCountsFilename,sep='\t',names=colnames_g)
121+
geneIDs = list(GeneCounts['gene'].values)
122+
ReadsPerGene = list(GeneCounts['counts'].values)
123+
N = sum(ReadsPerGene)
124+
GeneCounts0_Filename = GeneCountsFilename[0:-4] + NormSuffix
125+
GeneCounts0 = open(GeneCounts0_Filename,'w')
126+
ReadsPerGene_0 = list()
127+
for j in range(len(geneIDs)):
128+
ReadsPerGene_0 = int(numpy.round(ReadsPerGene[j]/N * MeanCount))
129+
GeneCounts0.write(str(geneIDs[j]) + '\t' + str(ReadsPerGene_0) + '\n')
130+
GeneCounts0.close()
131+
os.chdir(AlnQCDir)
132+
87133
elif norm == 'size':
88134
print('Normalizing by size-factors ...')
89135
# Establish data frame

Scripts/PlotCounts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def GOI_Scatterplot(sample,GOI='None'):
100100
plt.scatter(control_rest,sample_rest,s=dotsize,facecolor='black',lw=0,alpha=0.35)
101101
plt.scatter(control_sig,sample_sig,s=dotsize,facecolor='green',lw=0,alpha=0.35,label='Significant')
102102
if GOI != 'None':
103-
plt.scatter(control_goi,sample_goi,s=1.5*dotsize,facecolor='red',lw=0,alpha=0.35,label=GOI)
103+
plt.scatter(control_goi,sample_goi,s=1.5*dotsize,facecolor='red',lw=0,alpha=1.00,label=GOI)
104104
if len(K_nonT)>0:
105105
plt.scatter(control_nonT,sample_nonT,s=dotsize,facecolor='orange',lw=0,alpha=0.75,\
106106
label='Non Targeting')
@@ -110,7 +110,7 @@ def GOI_Scatterplot(sample,GOI='None'):
110110
plt.title(sample+' log'+str(logbase)+' counts [norm.]', fontsize=14)
111111
plt.xlabel('Control (avg.)', fontsize=12)
112112
plt.ylabel(sample, fontsize=12)
113-
plt.legend(loc='upper left', prop={'size':10})
113+
plt.legend(loc='upper left', prop={'size':8})
114114
if annotate:
115115
for label, x, y in zip(goi_sgIDs,control_goi,sample_goi):
116116
plt.annotate(label,xy=(x,y),color='red',fontsize=8)

Scripts/PlotReplicates.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,10 @@ def Repl_Scatterplot(Repl1,Repl2):
118118
fontsize=10)
119119
plt.text(.6,.15,'Corr (Spearman) = '+str(round(CorrCoeffS*1000)/1000),transform=axes.transAxes,\
120120
fontsize=10)
121+
plt.tight_layout()
121122
plt.savefig(Repl1+' '+Repl2+' correlation.png', dpi=res)
122123
if svg:
123-
plt.savefig(Repl1+' '+Repl2+' correlation.svg')
124-
plt.tight_layout()
124+
plt.savefig(Repl1+' '+Repl2+' correlation.svg')
125125
plt.close()
126126

127127
# --------------------------------------

0 commit comments

Comments
 (0)