Skip to content

Commit e463cd1

Browse files
committed
v2.7.7
1 parent 328010d commit e463cd1

File tree

6 files changed

+148
-99
lines changed

6 files changed

+148
-99
lines changed

Scripts/AnalyzeControl.py

Lines changed: 58 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# Estimate mean counts and variance from control samples
99
# =======================================================================
1010
# Imports
11+
from __future__ import division # floating point division by default
1112
import numpy
1213
import matplotlib
1314
matplotlib.use('Agg')
@@ -39,8 +40,8 @@ def EstimateControlCounts():
3940
AlnQCDir = config['AlnQCDir']
4041
ControlDir = config['ControlDir']
4142
res = config['dpi']
43+
thr_overdisp = config['thr_overdisp']
4244
CtrlCounts_Filename = 'Control_GuideCounts_0.tsv'
43-
4445

4546
# --------------------------------
4647
# Generate table of control counts
@@ -58,7 +59,7 @@ def EstimateControlCounts():
5859
'gene': genes},
5960
columns = ['sgID','gene'])
6061
if len(ControlSamples) == 0:
61-
print('### ERROR: No control sample directories found! ###')
62+
print('### ERROR: No control samples found! ###')
6263
else:
6364
os.chdir(AlnQCDir)
6465
for controlsample in ControlSamples:
@@ -80,47 +81,70 @@ def EstimateControlCounts():
8081
Mean = list(Mean_array)
8182
Var_matrix = numpy.var(CtrlCounts_matrix,axis=1)
8283
Var_array = numpy.array(Var_matrix.T)[0]
83-
Var = list(Var_array)
84+
SampleVar = list(Var_array)
85+
86+
# --------------------------------------------------------------
87+
# Determine if the variance equals the mean (Poisson distribution)
88+
# --------------------------------------------------------------
89+
Svar0 = numpy.mean(SampleVar)
90+
if Svar0 == 0:
91+
Model = 'none'
92+
print('WARNING: Zero variance or no control replicates! Cannot choose statistical model.')
93+
else:
94+
L0_list = [1 if Mean[k]>0 else 0 for k in range(L)]
95+
overdisp_list = [1 if Mean[k]>0 and SampleVar[k]>Mean[k] else 0 for k in range(L)]
96+
overdisp = sum(overdisp_list)/sum(L0_list)
97+
print('Overdispersion fraction: '+str(overdisp))
98+
if overdisp >= thr_overdisp:
99+
Model = 'Neg. Binomial'
100+
print('Choosing negative binomial model ...')
101+
else:
102+
Model = 'Poisson'
103+
print('Choosing Poisson model ...')
84104

85105
# -----------------------------------------------
86-
# Estimate variance from negative binomial model
106+
# Model variance
87107
# -----------------------------------------------
88-
if max(Var)>0:
89-
x = [numpy.log(Mean[k]) for k in range(L) if Mean[k]>0 and Var[k]>Mean[k]]
90-
y = [numpy.log(Var[k]-Mean[k]) for k in range(L) if Mean[k]>0 and Var[k]>Mean[k]]
108+
if Model == 'none':
109+
Var = [0 for k in range(len(SampleVar))]
110+
n = 'N/A'
111+
p = 'N/A'
112+
elif Model == 'Neg. Binomial':
113+
x = [numpy.log(Mean[k]) for k in range(L) if Mean[k]>0 and SampleVar[k]>Mean[k]]
114+
y = [numpy.log(SampleVar[k]-Mean[k]) for k in range(L) if Mean[k]>0 and SampleVar[k]>Mean[k]]
91115
c = [y[k]-2*x[k] for k in range(len(x))]
92116
c_0 = numpy.mean(c)
93117
D = numpy.exp(c_0)
94-
Var_Model = [Mean[k] + D*Mean[k]**2 for k in range(L)]
95-
else: # no control replicates present
96-
print('WARNING: No control replicates found!')
97-
Var_Model = [0 for k in range(len(Var))]
98-
99-
# -----------------------------------------------
100-
# Compute parameters for neg. binom. distribution
101-
# n: number of failures, p: probability of failure
102-
# -----------------------------------------------
103-
print('Computing parameters of neg. binomial distribution ...')
104-
n = list(); p = list()
105-
for k in range(L):
106-
if Mean[k]==0 or Var_Model[k]==0:
107-
n.append(((Mean[k]+delta)**2/(Var_Model[k]+2*delta))/(1-(Mean[k]+delta)/(Var_Model[k]+2*delta)))
108-
p.append((Mean[k]+delta)/(Var_Model[k]+2*delta))
109-
else:
110-
n.append((Mean[k]**2/Var_Model[k])/(1-Mean[k]/Var_Model[k]))
111-
p.append(Mean[k]/Var_Model[k])
112-
113-
118+
Var = [Mean[k] + D*Mean[k]**2 for k in range(L)]
119+
# -----------------------------------------------
120+
# Compute parameters for neg. binom. distribution
121+
# n: number of failures, p: probability of failure
122+
# -----------------------------------------------
123+
print('Computing parameters of negative binomial distribution ...')
124+
n = list(); p = list()
125+
for k in range(L):
126+
if Mean[k]==0 or Var[k]==0 :
127+
n.append(((Mean[k]+delta)**2/(Var[k]+2*delta))/(1-(Mean[k]+delta)/(Var[k]+2*delta)))
128+
p.append((Mean[k]+delta)/(Var[k]+2*delta))
129+
else:
130+
n.append((Mean[k]**2/Var[k])/(1-Mean[k]/Var[k]))
131+
p.append(Mean[k]/Var[k])
132+
elif Model == 'Poisson':
133+
Var = [Mean[k] if Mean[k]>0 else 1 for k in range(L)]
134+
n = 'N/A'
135+
p = 'N/A'
136+
114137
# --------------------------------
115138
# Write data frame
116139
# --------------------------------
117140
if not os.path.exists(ControlDir):
118141
os.makedirs(ControlDir)
119142
os.chdir(ControlDir)
120143
print('Writing dataframe ...')
144+
CtrlCounts_df['Model'] = Model
121145
CtrlCounts_df['Mean'] = Mean
122-
CtrlCounts_df['Sample Variance'] = Var
123-
CtrlCounts_df['Model Variance'] = Var_Model
146+
CtrlCounts_df['Sample Variance'] = SampleVar
147+
CtrlCounts_df['Model Variance'] = Var
124148
CtrlCounts_df['n'] = n
125149
CtrlCounts_df['p'] = p
126150
CtrlCounts_df.to_csv(CtrlCounts_Filename,sep='\t')
@@ -132,10 +156,10 @@ def EstimateControlCounts():
132156
# Mean/Variance plot
133157
print('Generating dispersion plot ...')
134158
plt.subplot(121)
135-
if max(Var) > 0:
159+
if max(SampleVar) > 0:
136160
Mmax = numpy.percentile(Mean_array,99)
137161
x = [Mean[k] for k in range(L) if Mean[k] < Mmax]
138-
y = [Var[k] for k in range(L) if Mean[k] < Mmax]
162+
y = [SampleVar[k] for k in range(L) if Mean[k] < Mmax]
139163
plt.scatter(x,y,s=4,lw=0,alpha=0.25)
140164
plt.plot(x,x,'--',color='orange',label='Mean = Variance')
141165
leg = plt.legend(loc='upper left', prop={'size':8})
@@ -149,9 +173,9 @@ def EstimateControlCounts():
149173
# Log Plot with Regression
150174
print('Generating log regression plot ...')
151175
plt.subplot(122)
152-
if max(Var) > 0:
153-
logx = [numpy.log(Mean[k]) for k in range(L) if Mean[k]>0 and Var[k]>Mean[k]]
154-
logy = [numpy.log(Var[k]-Mean[k]) for k in range(L) if Mean[k]>0 and Var[k]>Mean[k]]
176+
if Model == 'Neg. Binomial' and max(SampleVar)>0:
177+
logx = [numpy.log(Mean[k]) for k in range(L) if Mean[k]>0 and SampleVar[k]>Mean[k]]
178+
logy = [numpy.log(SampleVar[k]-Mean[k]) for k in range(L) if Mean[k]>0 and SampleVar[k]>Mean[k]]
155179
plt.scatter(logx,logy,s=4,lw=0,alpha=0.25)
156180
logy_0 = [2*logx[k] + c_0 for k in range(len(logx))]
157181
plt.plot(logx,logy_0,'r--')

Scripts/FindHits.py

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,12 @@ def PrepareHitList(sample):
6060
print('Loading read counts ...')
6161
os.chdir(CtrlDir)
6262
Ctrl_File = pandas.read_table(CtrlCounts_Filename, sep='\t')
63+
Model = Ctrl_File['Model'][0]
6364
sgIDs = list(Ctrl_File['sgID'])
6465
genes = list(Ctrl_File['gene'])
6566
mu = list(Ctrl_File['Mean'])
6667
L = len(sgIDs)
68+
SampleVar = list(Ctrl_File['Sample Variance'])
6769
sigma2 = list(Ctrl_File['Model Variance'])
6870
n = list(Ctrl_File['n'])
6971
p = list(Ctrl_File['p'])
@@ -74,57 +76,69 @@ def PrepareHitList(sample):
7476
x = list(SampleFile['counts'])
7577

7678
# -----------------------------------------------
77-
# Compute fold change and p-values
79+
# Compute fold change
7880
# -----------------------------------------------
79-
if max(sigma2) == 0: # check for control replicates
81+
print('Computing fold-changes ...')
82+
fc = list()
83+
for k in range(L):
84+
if x[k]==0 or mu[k]==0:
85+
fc.append((x[k]+delta)/(mu[k]+delta))
86+
else:
87+
fc.append(x[k]/mu[k])
88+
89+
# -----------------------------------------------
90+
# Compute p-values
91+
# -----------------------------------------------
92+
if Model == 'none':
8093
# -----------------------------------------------------------
81-
print('WARNING: No control replicates! No p-values computed...')
82-
print('Computing fold changes ...')
83-
fc = list()
84-
for k in range(L):
85-
if x[k]==0 or mu[k]==0:
86-
fc.append((x[k]+delta)/(mu[k]+delta))
87-
else:
88-
fc.append(x[k]/mu[k])
89-
NBpval = [1 for k in range(L)]
90-
NBpval_0 = [1 for k in range(L)]
94+
print('WARNING: Zero variance or no control replicates! Cannot compute p-values ...')
95+
pval = [1 for k in range(L)]
96+
pval0 = [1 for k in range(L)]
9197
significant = [False for k in range(L)]
9298
# -----------------------------------------------------------
9399
elif ScreenType == 'enrichment': # enrichment screen
94100
# -----------------------------------------------------------
95-
fc = list(); NBpval = list(); NBpval2 = list()
96-
print('Computing fold-changes and p-values...')
97-
for k in range(L):
98-
# fold-change
99-
if x[k]==0 or mu[k]==0:
100-
fc.append((x[k]+delta)/(mu[k]+delta))
101-
else:
102-
fc.append(x[k]/mu[k])
103-
# one-sided p-value
104-
if mu[k]==0 and x[k]==0:
105-
NBpval.append(1)
106-
elif x[k]<=mu[k]:
107-
NBpval.append(1)
108-
else:
109-
NBpval.append(1 - scipy.stats.nbinom.cdf(x[k],n[k],p[k]))
101+
pval = list();
102+
print('Computing p-values ...')
103+
# one-sided p-value
104+
if Model == 'Neg. Binomial':
105+
for k in range(L):
106+
if mu[k]==0 and x[k]==0:
107+
pval.append(1)
108+
elif x[k]<=mu[k]:
109+
pval.append(1)
110+
else:
111+
pval.append(1 - scipy.stats.nbinom.cdf(x[k],n[k],p[k]))
112+
elif Model == 'Poisson':
113+
for k in range(L):
114+
if mu[k]==0 and x[k]==0:
115+
pval.append(1)
116+
elif x[k]<=mu[k]:
117+
pval.append(1)
118+
else:
119+
pval.append(1 - scipy.stats.poisson.cdf(x[k],sigma2[k]))
110120
# -----------------------------------------------------------
111121
elif ScreenType == 'depletion': # depletion screen
112122
# -----------------------------------------------------------
113-
fc = list(); NBpval = list(); NBpval2 = list()
114-
print('Computing fold-changes and p-values...')
115-
for k in range(L):
116-
# fold-change
117-
if x[k]==0 or mu[k]==0:
118-
fc.append((x[k]+delta)/(mu[k]+delta))
119-
else:
120-
fc.append(x[k]/mu[k])
121-
# one-sided p-value
122-
if mu[k]==0 and x[k]==0:
123-
NBpval.append(1)
124-
elif x[k]>=mu[k]:
125-
NBpval.append(1)
126-
else:
127-
NBpval.append(scipy.stats.nbinom.cdf(x[k],n[k],p[k]))
123+
pval = list();
124+
print('Computing p-values...')
125+
# one-sided p-value
126+
if Model == 'Neg. Binomial':
127+
for k in range(L):
128+
if mu[k]==0 and x[k]==0:
129+
pval.append(1)
130+
elif x[k]>=mu[k]:
131+
pval.append(1)
132+
else:
133+
pval.append(scipy.stats.nbinom.cdf(x[k],n[k],p[k]))
134+
elif Model == 'Poisson':
135+
for k in range(L):
136+
if mu[k]==0 and x[k]==0:
137+
pval.append(1)
138+
elif x[k]<=mu[k]:
139+
pval.append(1)
140+
else:
141+
pval.append(scipy.stats.poisson.cdf(x[k],sigma2[k]))
128142
# -----------------------------------------------------------
129143
else: # error in scree type
130144
# -----------------------------------------------------------
@@ -133,17 +147,17 @@ def PrepareHitList(sample):
133147
# -----------------------------------------------
134148
# p-value Correction and Plots
135149
# -----------------------------------------------
136-
if max(sigma2) > 0:
150+
if max(SampleVar) > 0:
137151
# p-value correction for multiple tests
138152
print('p-value correction ...')
139-
multTest = multipletests(NBpval,alpha,padj)
153+
multTest = multipletests(pval,alpha,padj)
140154
significant = multTest[0]
141-
NBpval_0 = multTest[1]
155+
pval0 = multTest[1]
142156
# Plots
143157
print('Plotting p-values ...')
144-
pvalHist(NBpval,NBpval_0,pvalDir,sample,res,svg)
145-
VolcanoPlot(fc,NBpval,significant,pvalDir,ScreenType,sample,res,svg,alpha)
146-
QQPlot(NBpval,significant,pvalDir,sample,res,svg,alpha)
158+
pvalHist(pval,pval0,pvalDir,sample,res,svg)
159+
VolcanoPlot(fc,pval,significant,pvalDir,ScreenType,sample,res,svg,alpha)
160+
QQPlot(pval,significant,pvalDir,sample,res,svg,alpha)
147161
zScorePlot(fc,significant,pvalDir,ScreenType,sample,res,svg,alpha)
148162

149163

@@ -161,8 +175,8 @@ def PrepareHitList(sample):
161175
'control mean': [mu[k] for k in range(L)],
162176
'control stdev': [numpy.sqrt(sigma2[k]) for k in range(L)],
163177
'fold change': [fc[k] for k in range(L)],
164-
'p-value': [NBpval[k] for k in range(L)],
165-
'p-value (adj.)': [NBpval_0[k] for k in range(L)],
178+
'p-value': [pval[k] for k in range(L)],
179+
'p-value (adj.)': [pval0[k] for k in range(L)],
166180
'significant': [str(significant[k]) for k in range(L)]},
167181
columns = ['sgRNA','gene','counts','control mean',\
168182
'control stdev','fold change','p-value','p-value (adj.)','significant'])

Scripts/PinAPL.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,10 @@
9292
# Trim Adapters
9393
StatMsg = 'Trimming reads ...'
9494
os.system('python -u PrintStatus.py SubHeader "'+StatMsg+'" 2>&1 | tee -a PinAPL-Py.log')
95-
os.system('python -u '+TrimScript+'.py 2>&1 | tee -a PinAPL-Py.log')
95+
if os.path.exists(AlignDir):
96+
os.system('python -u PrintStatus.py SkipTrim blank 2>&1 | tee -a PinAPL-Py.log')
97+
else:
98+
os.system('python -u '+TrimScript+'.py 2>&1 | tee -a PinAPL-Py.log')
9699
DoneMsg = 'Read trimming completed.'
97100
os.system('python -u PrintStatus.py Done "'+DoneMsg+'" 2>&1 | tee -a PinAPL-Py.log')
98101

Scripts/PrintStatus.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
def PrintStatus_Header():
1414
print('**************************************************')
15-
print('Launching PinAPL-Py v2.7.6..')
15+
print('Launching PinAPL-Py v2.7.7..')
1616
print('P. Spahn et al., UC San Diego (09/2017)')
1717
print('**************************************************')
1818

@@ -29,6 +29,9 @@ def PrintStatus_Done(msg):
2929
def PrintStatus_ProcessSample(sample):
3030
print('Processing sample '+sample+' ... ')
3131

32+
def PrintStatus_SkipTrim():
33+
print('Alignment data found. Skipping read trimming ... ')
34+
3235
def PrintStatus_SkipSample(sample):
3336
print('Alignment data found for sample '+sample+'. Skipping alignment ... ')
3437

@@ -56,6 +59,8 @@ def PrintStatus_TimeStamp(msg):
5659
PrintStatus_Done(input2)
5760
elif input1 == 'ProcessSample':
5861
PrintStatus_ProcessSample(input2)
62+
elif input1 == 'SkipTrim':
63+
PrintStatus_SkipTrim()
5964
elif input1 == 'SkipSample':
6065
PrintStatus_SkipSample(input2)
6166
elif input1 == 'SkipSeqQC':

0 commit comments

Comments
 (0)