Skip to content

Commit 4316620

Browse files
committed
v2.7.6
1 parent 2924ddf commit 4316620

15 files changed

+169
-49
lines changed

Scripts/AlignReads.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ def MapAndCount(sample):
8383
AlnStemDir = config['AlignDir']
8484
AlnDir = AlnStemDir+sample+'/'
8585
OutputDir = config['AlnQCDir']+sample
86-
seq_5_end = config['seq_5_end']
87-
CutErrorTol = config['CutErrorTol']
88-
R_min = config['R_min']
8986
minN = config['Cutoff']
9087
LibFilename = config['LibFilename']
9188
LibFormat = LibFilename[-3:]
@@ -160,6 +157,8 @@ def MapAndCount(sample):
160157
# ------------------------------------------
161158
start = time.time()
162159
print('Analyzing alignment ...')
160+
print('Applying matching threshold ...')
161+
print('Applying ambiguity threshold ...')
163162
# CLASSIFY ALIGNMENTS
164163
os.chdir(AlnDir)
165164
bw2outputFilename = ReadsFilename0 + '_bw2output.sam'
@@ -211,16 +210,19 @@ def MapAndCount(sample):
211210
NFail += 1
212211
AlnStatus.append('Fail')
213212
bw2sam.close();
214-
215-
# ------------------------------------------
216-
# Text output and plots
217-
# ------------------------------------------
218-
print('Writing alignment logfile ...')
219213
NReads = NTol + NAmb + NUnique + NFail
220214
FracUnique = round(NUnique/NReads*1000)/10
221215
FracTol = round(NTol/NReads*1000)/10
222216
FracAmb = round(NAmb/NReads*1000)/10
223-
FracFail = round(NFail/NReads*1000)/10
217+
FracFail = round(NFail/NReads*1000)/10
218+
print('*** Successfully mapped reads: '\
219+
+str(NUnique+NTol)+' ('+str(FracUnique+FracTol)+'%) ***')
220+
221+
222+
# ------------------------------------------
223+
# Text output and plots
224+
# ------------------------------------------
225+
print('Writing alignment logfile ...')
224226
if aln_time < 60:
225227
time_elapsed = aln_time
226228
time_unit = ' [secs]'
@@ -356,9 +358,6 @@ def MapAndCount(sample):
356358
for k in range(L):
357359
GuideCounts.write(str(sgIDs[k]) + '\t'+ str(geneIDs[k]) + '\t' + str(ReadsPerGuide[k]) + '\n')
358360
GuideCounts.close()
359-
# No-mapping warning
360-
if sum(ReadsPerGuide) == 0:
361-
print('!! ERROR: Zero total read counts! Check library file and index !!')
362361
# Read counts per gene in library
363362
print('Counting reads per gene ...')
364363
global GeneList
@@ -373,6 +372,9 @@ def MapAndCount(sample):
373372
GeneCounts.close()
374373
end = time.time()
375374
print('Read counting completed.')
375+
# No-mapping warning
376+
if sum(ReadsPerGuide) == 0:
377+
print('### ERROR: Zero read counts! Check library and alignment ###')
376378
# Time stamp
377379
sec_elapsed = end-start
378380
if sec_elapsed < 60:

Scripts/AnalyzeControl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def EstimateControlCounts():
5858
'gene': genes},
5959
columns = ['sgID','gene'])
6060
if len(ControlSamples) == 0:
61-
print('ERROR: No control sample directories found!')
61+
print('### ERROR: No control sample directories found! ###')
6262
else:
6363
os.chdir(AlnQCDir)
6464
for controlsample in ControlSamples:

Scripts/Bowtie2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
def BuildIndex(LibFastA,IndexDir,bw2Dir):
1616
os.chdir(IndexDir)
17-
bw2_cmdline = bw2Dir+'bowtie2-build -f library.fasta Library'
17+
bw2_cmdline = bw2Dir+'bowtie2-build -q -f library.fasta Library'
1818
os.system(bw2_cmdline)
1919

2020
def RunBowtie2(ReadsFilename0,TempDataDir,AlnDir,bw2Dir,IndexDir,L_bw,N_bw,i_bw):

Scripts/CheckCharacters.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Mon Oct 10 10:22:56 2016
5+
6+
@author: philipp
7+
"""
8+
# Library sanity check
9+
# =======================================================================
10+
# Imports
11+
import yaml
12+
import sys
13+
import os
14+
import pandas
15+
16+
def RunSanityCheck():
17+
# ------------------------------------------------
18+
# Get parameters
19+
# ------------------------------------------------
20+
configFile = open('configuration.yaml','r')
21+
config = yaml.load(configFile)
22+
configFile.close()
23+
LibDir = config['LibDir']
24+
LibFilename = config['LibFilename']
25+
LibFormat = LibFilename[-3:]
26+
if LibFormat == 'tsv':
27+
libsep = '\t'
28+
elif LibFormat == 'csv':
29+
libsep = ','
30+
DataDir = config['DataDir']
31+
WorkingDir = config['WorkingDir']
32+
33+
# --------------------------------------------------------------------
34+
# Replace non-printable characters from library (...these cause problems in PlotCount.py)
35+
# --------------------------------------------------------------------
36+
os.chdir(LibDir)
37+
LibCols = ['gene','ID','seq']
38+
LibFile = pandas.read_table(LibFilename, sep = libsep, skiprows = 1, names = LibCols)
39+
GeneNames = list(LibFile['gene'])
40+
ID = list(LibFile['ID'])
41+
seq = list(LibFile['seq'])
42+
GeneNames0 = []
43+
ID0 = []
44+
BadCharacters = [' ','>','<',';',':',',','|','/','\\','(',')','[',']',\
45+
'$','%','*','?','{','}','=','+','@']
46+
for gene in GeneNames:
47+
for bad_char in BadCharacters:
48+
gene = gene.replace(bad_char,'_')
49+
GeneNames0.append(gene)
50+
for sgRNA in ID:
51+
for bad_char in BadCharacters:
52+
sgRNA = sgRNA.replace(bad_char,'_')
53+
ID0.append(sgRNA)
54+
if GeneNames != GeneNames0 or ID != ID0:
55+
LibFile0 = pandas.DataFrame(data = {'gene': [gene for gene in GeneNames0],
56+
'ID': [sgRNA for sgRNA in ID0],
57+
'seq': [s for s in seq]},
58+
columns = ['gene','ID','seq'])
59+
LibFile0.to_csv(LibFilename, sep = libsep, index = False)
60+
print("WARNING: Special characters in library file have been replaced by '_' ")
61+
62+
# --------------------------------------------------------------------
63+
# Load Data Sheet
64+
# --------------------------------------------------------------------
65+
os.chdir(WorkingDir)
66+
DataSheet = pandas.read_excel('DataSheet.xlsx')
67+
Filenames = list(DataSheet['FILENAME'])
68+
TreatmentList = list(DataSheet['TREATMENT'])
69+
F = len(Filenames)
70+
BadCharFound = False
71+
72+
# --------------------------------------------------------------------
73+
# Replace non-printable characters from filenames
74+
# --------------------------------------------------------------------
75+
os.chdir(DataDir)
76+
BadCharacters = [' ','>','<',';',':',',','|','/','\\','(',')','[',']',\
77+
'$','%','*','?','{','}','=','+','@']
78+
for j in range(F):
79+
Filename = Filenames[j]
80+
Filename0 = Filename
81+
for bad_char in BadCharacters:
82+
Filename0 = Filename0.replace(bad_char,'_')
83+
if Filename0 != Filename:
84+
BadCharFound = True
85+
os.system('mv '+"'"+Filename+"'"+' '+Filename0)
86+
DataSheet['FILENAME'][j] = Filename0
87+
88+
# --------------------------------------------------------------------
89+
# Replace non-printable characters from filenames
90+
# --------------------------------------------------------------------
91+
TreatmentList0 = TreatmentList
92+
for bad_char in BadCharacters:
93+
TreatmentList0 = [str(treatment).replace(bad_char,'_') for treatment in TreatmentList0]
94+
if TreatmentList0 != TreatmentList:
95+
BadCharFound = True
96+
DataSheet['TREATMENT'] = TreatmentList0
97+
98+
# --------------------------------------------------------------------
99+
# Update Data Sheet
100+
# --------------------------------------------------------------------
101+
if BadCharFound:
102+
os.chdir(WorkingDir)
103+
DataSheet.to_excel('DataSheet.xlsx',columns=['FILENAME','TREATMENT'])
104+
print("WARNING: Special characters in sample names replaced by '_'")
105+
else:
106+
print('No special characters found.')
107+
108+
109+
110+
111+
112+
if __name__ == "__main__":
113+
RunSanityCheck()

Scripts/FindHits.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def PrepareHitList(sample):
4646
ListDir = config['HitDir']
4747
CtrlCounts_Filename = 'Control_GuideCounts_0.tsv'
4848
ScreenType = config['ScreenType']
49-
alpha = config['alpha']
49+
alpha = config['alpha_s']
5050
padj = config['padj']
5151
SheetFormat = config['HitListFormat']
5252
delta = config['delta']
@@ -128,7 +128,7 @@ def PrepareHitList(sample):
128128
# -----------------------------------------------------------
129129
else: # error in scree type
130130
# -----------------------------------------------------------
131-
print('ERROR: Check spelling of ScreenType in configuration file!')
131+
print('### ERROR: Check spelling of ScreenType in configuration file! ###')
132132

133133
# -----------------------------------------------
134134
# p-value Correction and Plots
@@ -162,10 +162,10 @@ def PrepareHitList(sample):
162162
'control stdev': [numpy.sqrt(sigma2[k]) for k in range(L)],
163163
'fold change': [fc[k] for k in range(L)],
164164
'p-value': [NBpval[k] for k in range(L)],
165-
'FDR': [NBpval_0[k] for k in range(L)],
165+
'p-value (adj.)': [NBpval_0[k] for k in range(L)],
166166
'significant': [str(significant[k]) for k in range(L)]},
167167
columns = ['sgRNA','gene','counts','control mean',\
168-
'control stdev','fold change','p-value','FDR','significant'])
168+
'control stdev','fold change','p-value','p-value (adj.)','significant'])
169169
if ScreenType == 'enrichment':
170170
Results_df_0 = Results_df.sort_values(['significant','p-value','fold change','sgRNA'],ascending=[0,1,0,1])
171171
elif ScreenType == 'depletion':

Scripts/LoadDataSheet.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ def LoadExcelDataSheet():
2424
os.chdir(WorkingDir)
2525
DataSheet = pandas.read_excel('DataSheet.xlsx')
2626
FileNames = list(DataSheet['FILENAME'].values)
27-
TreatmentList = list(DataSheet['TREATMENT'].values)
28-
TreatmentList = [str(treatment).replace(' ','_') for treatment in TreatmentList] # replace spaces
27+
TreatmentList = list(DataSheet['TREATMENT'])
2928
Treatments = list(set(TreatmentList))
3029
if 'Control' in Treatments:
3130
N = len(FileNames)
@@ -42,7 +41,7 @@ def LoadExcelDataSheet():
4241
DataSheet.to_excel('DataSheet.xlsx',columns=['FILENAME','TREATMENT','SAMPLE NAME'])
4342
os.chdir(ScriptsDir)
4443
else:
45-
print('ERROR: No control treatment defined!')
44+
print('### ERROR: No control treatment defined! ###')
4645

4746

4847
if __name__ == "__main__":

Scripts/NormalizeReadCounts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def Normalization():
214214
header=False,index=False)
215215
os.chdir(AlnQCDir)
216216
else:
217-
print('ERROR: Check spelling of Normalization parameter in configuration file!')
217+
print('### ERROR: Check spelling of Normalization parameter in configuration file! ###')
218218

219219
# --------------------------------------
220220
# Time stamp

Scripts/PinAPL.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,12 @@
5656
os.system('python -u PrintStatus.py Header blank 2>&1 | tee PinAPL-Py.log')
5757
start = time.time()
5858

59-
# Library sanity check
59+
# Character sanity check
60+
StatMsg = 'Running character sanity check ...'
61+
os.system('python -u PrintStatus.py SubHeader "'+StatMsg+'" 2>&1 | tee -a PinAPL-Py.log')
6062
os.system('python -u '+SanityScript+'.py 2>&1 | tee -a PinAPL-Py.log')
63+
DoneMsg = 'Character sanity check completed.'
64+
os.system('python -u PrintStatus.py Done "'+DoneMsg+'" 2>&1 | tee -a PinAPL-Py.log')
6165

6266
# Generate index if not present
6367
if not os.path.exists(IndexDir):

Scripts/PlotCounts.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def GOI_Scatterplot(sample,GOI='None',Annot='none',NonT='none',Transp='none'):
4242
PlotDir = config['ScatterDir']
4343
HiLiteDir = config['HiLiteDir']
4444
ScreenType = config['ScreenType']
45-
alpha = config['alpha']
45+
alpha = config['alpha_s']
4646
delta = config['delta']
4747
NonTPrefix = config['NonTargetPrefix']
4848
res = config['dpi']
@@ -117,7 +117,7 @@ def GOI_Scatterplot(sample,GOI='None',Annot='none',NonT='none',Transp='none'):
117117
os.chdir(PlotDir)
118118
fig,ax = plt.subplots(figsize=(4,4.25))
119119
plt.scatter(control_rest,sample_rest,s=dotsize,facecolor='black',lw=0,alpha=TransparencyLevel)
120-
plt.scatter(control_sig,sample_sig,s=dotsize,facecolor='green',lw=0,alpha=tpcy,label='FDR<'+str(alpha))
120+
plt.scatter(control_sig,sample_sig,s=dotsize,facecolor='green',lw=0,alpha=tpcy,label='p < '+str(alpha))
121121
if len(K_nonT)>0 and ShowNonTargets:
122122
plt.scatter(control_nonT,sample_nonT,s=dotsize,facecolor='orange',lw=0,alpha=0.35,\
123123
label='Non Targeting')
@@ -167,7 +167,7 @@ def GOI_Scatterplot(sample,GOI='None',Annot='none',NonT='none',Transp='none'):
167167
print('sgID\t\tCounts\tControl\tSignificant')
168168
print('-----------------------------------------------')
169169
if not K_goi:
170-
print('ERROR: Gene name not found!')
170+
print('### ERROR: Gene name not found! ###')
171171
else:
172172
for k in K_goi:
173173
println = str(sgIDs[k])+'\t'+str(int(sample_counts[k]))+'\t'+ \

Scripts/PlotReplicates.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def Repl_Scatterplot(Repl1,Repl2,GOI='None',Annot='none',NonT='none',Transp='non
4242
AlnQCDir = config['AlnQCDir']
4343
PlotDir = config['CorrelDir']
4444
HiLiteDir2 = config['HiLiteDir2']
45-
alpha = config['alpha']
4645
delta = config['delta']
4746
NonTPrefix = config['NonTargetPrefix']
4847
res = config['dpi']

0 commit comments

Comments
 (0)