Skip to content

Commit 0584337

Browse files
committed
v2.7.5
1 parent 58ff77b commit 0584337

File tree

5 files changed

+75
-72
lines changed

5 files changed

+75
-72
lines changed

Scripts/CheckLibrary.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Mon Oct 10 10:22:56 2016
5+
6+
@author: philipp
7+
"""
8+
# Library sanity check
9+
# =======================================================================
10+
# Imports
11+
import yaml
12+
import sys
13+
import os
14+
import pandas
15+
16+
def LibrarySanityCheck():
17+
# ------------------------------------------------
18+
# Get parameters
19+
# ------------------------------------------------
20+
configFile = open('configuration.yaml','r')
21+
config = yaml.load(configFile)
22+
configFile.close()
23+
LibDir = config['LibDir']
24+
LibFilename = config['LibFilename']
25+
LibFormat = LibFilename[-3:]
26+
if LibFormat == 'tsv':
27+
libsep = '\t'
28+
elif LibFormat == 'csv':
29+
libsep = ','
30+
31+
# ----------------------------------
32+
# Read library
33+
# ----------------------------------
34+
os.chdir(LibDir)
35+
LibCols = ['gene','ID','seq']
36+
LibFile = pandas.read_table(LibFilename, sep = libsep, skiprows = 1, names = LibCols)
37+
GeneNames = list(LibFile['gene'])
38+
ID = list(LibFile['ID'])
39+
seq = list(LibFile['seq'])
40+
41+
# ----------------------------------
42+
# Replace non-printable characters (...these cause problems in PlotCount.py)
43+
# ----------------------------------
44+
GeneNames0 = []
45+
for gene in GeneNames:
46+
gene = gene.replace('|','_')
47+
gene = gene.replace('(','_')
48+
gene = gene.replace(')','_')
49+
gene = gene.replace(';','_')
50+
gene = gene.replace('"','')
51+
gene = gene.replace('/','_')
52+
gene = gene.replace('\\','_')
53+
GeneNames0.append(gene)
54+
if GeneNames != GeneNames0:
55+
LibFile0 = pandas.DataFrame(data = {'gene': [gene for gene in GeneNames0],
56+
'ID': [id for id in ID],
57+
'seq': [s for s in seq]},
58+
columns = ['gene','ID','seq'])
59+
LibFile0.to_csv(LibFilename, sep = libsep, index = False)
60+
print("WARNING: Found non-printable characters in library file. Replaced by '_' ")
61+
62+
63+
64+
if __name__ == "__main__":
65+
LibrarySanityCheck()

Scripts/FindHits.py

Lines changed: 1 addition & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -146,70 +146,7 @@ def PrepareHitList(sample):
146146
QQPlot(NBpval,significant,pvalDir,sample,res,svg,alpha)
147147
zScorePlot(fc,significant,pvalDir,ScreenType,sample,res,svg,alpha)
148148

149-
150-
# # Compute fold change (compared to control)
151-
# print('Computing fold changes ...')
152-
# fc = list()
153-
# for k in range(L):
154-
# if x[k]==0 or mu[k]==0:
155-
# fc.append((x[k]+delta)/(mu[k]+delta))
156-
# else:
157-
# fc.append(x[k]/mu[k])
158-
# # Compute negative binomial p-values
159-
# if max(sigma2) > 0:
160-
# print('Computing p-values ...')
161-
# # Neg. Binom. Parameters n: number of failures, p: probability of failure
162-
# n = list(); p = list()
163-
# for i in range(L):
164-
# if mu[i]==0 or sigma2[i]==0:
165-
# n.append(((mu[i]+delta)**2/(sigma2[i]+2*delta))/(1-(mu[i]+delta)/(sigma2[i]+2*delta)))
166-
# p.append((mu[i]+delta)/(sigma2[i]+2*delta))
167-
# else:
168-
# n.append((mu[i]**2/sigma2[i])/(1-mu[i]/sigma2[i]))
169-
# p.append(mu[i]/sigma2[i])
170-
# NBpval = list();
171-
# if ScreenType == 'enrichment':
172-
# for i in range(L):
173-
# if mu[i]==0 and x[i]==0:
174-
# NBpval.append(1)
175-
# elif x[i]<=mu[i]:
176-
# NBpval.append(1)
177-
# else:
178-
# NBpval.append(1 - scipy.stats.nbinom.cdf(x[i],n[i],p[i]))
179-
# elif ScreenType == 'depletion':
180-
# for i in range(L):
181-
# if mu[i]==0 and x[i]==0:
182-
# NBpval.append(1)
183-
# elif x[i]>=mu[i]:
184-
# NBpval.append(1)
185-
# else:
186-
# NBpval.append(scipy.stats.nbinom.cdf(x[i],n[i],p[i]))
187-
# else:
188-
# print('ERROR: Check spelling of ScreenType in configuration file!')
189-
# # Compute two-sided pvalues (for volcano plot only!)
190-
# NBpval2 = list()
191-
# for i in range(L):
192-
# if x[i]<=mu[i]:
193-
# NBpval2.append(scipy.stats.nbinom.cdf(x[i],n[i],p[i]))
194-
# else:
195-
# NBpval2.append(1 - scipy.stats.nbinom.cdf(x[i],n[i],p[i]))
196-
# # p-value correction for multiple tests
197-
# print('p-value correction ...')
198-
# multTest = multipletests(NBpval,alpha,padj)
199-
# significant = multTest[0]
200-
# NBpval_0 = multTest[1]
201-
# # Plots
202-
# print('Plotting p-values ...')
203-
# pvalHist(NBpval,NBpval_0,pvalDir,sample,res,svg)
204-
# VolcanoPlot(fc,NBpval2,significant,pvalDir,ScreenType,sample,res,svg,alpha)
205-
# QQPlot(NBpval,significant,pvalDir,sample,res,svg,alpha)
206-
# zScorePlot(fc,significant,pvalDir,ScreenType,sample,res,svg,alpha)
207-
# else: # no control replicates
208-
# print('WARNING: No control replicates! No p-values computed...')
209-
# NBpval = [1 for k in range(L)]
210-
# NBpval_0 = [1 for k in range(L)]
211-
# significant = [False for k in range(L)]
212-
149+
213150
# -----------------------------------------------
214151
# Save sgRNA dataframe
215152
# -----------------------------------------------
@@ -235,7 +172,6 @@ def PrepareHitList(sample):
235172
Results_df_0 = Results_df.sort_values(['significant','p-value','fold change','sgRNA'],ascending=[0,1,1,1])
236173
ListFilename = sample+'_'+str(alpha)+'_'+padj+'_sgRNAList.tsv'
237174
Results_df_0.to_csv(ListFilename, sep = '\t', index = False)
238-
#Results_df_0.to_hdf(ListFilename+'.hdf','df')
239175
if SheetFormat == 'xlsx':
240176
print('Converting to xlsx ...')
241177
ListFilename = sample+'_'+str(alpha)+'_'+padj+'_sgRNAList.xlsx'

Scripts/PinAPL.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
AlnQCDir = config['AlnQCDir']
3333
SeqQCDir = config['SeqQCDir']
3434
LogFileDir = config['LogFileDir']
35+
SanityScript = config['SanityScript']
3536
IndexScript = config['IndexScript']
3637
LoaderScript = config['LoaderScript']
3738
ReadDepthScript = config['ReadDepthScript']
@@ -55,6 +56,9 @@
5556
os.system('python -u PrintStatus.py Header blank 2>&1 | tee PinAPL-Py.log')
5657
start = time.time()
5758

59+
# Library sanity check
60+
os.system('python -u '+SanityScript+'.py 2>&1 | tee -a PinAPL-Py.log')
61+
5862
# Generate index if not present
5963
if not os.path.exists(IndexDir):
6064
StatMsg = 'Building library index ...'
@@ -196,7 +200,4 @@
196200
os.system('cp configuration.yaml '+LogFileDir)
197201
os.system('cp PinAPL-Py.log '+LogFileDir)
198202
os.chdir(WorkingDir)
199-
os.system('cp DataSheet.xlsx '+LogFileDir)
200-
201-
# Status message
202-
print('LOADING RESULTS PAGE. PLEASE REFRESH PERIODICALLY...')
203+
os.system('cp DataSheet.xlsx '+LogFileDir)

Scripts/PrintStatus.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212

1313
def PrintStatus_Header():
1414
print('**************************************************')
15-
print('Launching PinAPL-Py v2.7.4..')
16-
print('P. Spahn et al., UC San Diego (07/2017)')
15+
print('Launching PinAPL-Py v2.7.5..')
16+
print('P. Spahn et al., UC San Diego (08/2017)')
1717
print('**************************************************')
1818

1919
def PrintStatus_SubHeader(msg):

configuration.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ alpha: 0.001 # critical false-discovery rate for sign. sgRNAs/genes
3131
P_0: 0.00005 # maximum p-value for sgRNA to be taken into account for aRRA
3232
Np: 1000 # number of permutations for gene ranking analysis
3333
TopN: 25 # number of top sgRNAs to take into account for clustering
34-
thr_STARS: 20 # threshold percentage for STARS analysis
34+
thr_STARS: 10 # threshold percentage for STARS analysis
3535
CutErrorTol: 0.1 # cutadapt error tolerance
3636
R_min: 20 # minimal required read length after cutadapt trimming
3737
L_bw: 11 # Bowtie2 -L parameter (seed length)
@@ -83,6 +83,7 @@ CutAdaptDir: '/root/.local/bin/'
8383
STARSDir: '/opt/PinAPL-Py/Scripts/STARS_mod/'
8484

8585
# SCRIPT FILENAMES
86+
SanityScript: 'CheckLibrary'
8687
IndexScript: 'BuildLibraryIndex'
8788
LoaderScript: 'LoadDataSheet'
8889
ReadDepthScript: 'PlotNumReads'

0 commit comments

Comments
 (0)