@@ -41,8 +41,17 @@ def RunSanityCheck():
4141 seq = list (LibFile ['seq' ])
4242 GeneNames0 = []
4343 ID0 = []
44+
45+ # --------------------------------------------------------------------
46+ # Define bad characters (library)
47+ # --------------------------------------------------------------------
4448 BadCharacters = [' ' ,'>' ,'<' ,';' ,':' ,',' ,'|' ,'/' ,'\\ ' ,'(' ,')' ,'[' ,']' ,\
4549 '$' ,'%' ,'*' ,'?' ,'{' ,'}' ,'=' ,'+' ,'@' ]
50+
51+ # --------------------------------------------------------------------
52+ # Check library
53+ # --------------------------------------------------------------------
54+ BadLibCharFound = False
4655 for gene in GeneNames :
4756 for bad_char in BadCharacters :
4857 gene = gene .replace (bad_char ,'_' )
@@ -52,6 +61,7 @@ def RunSanityCheck():
5261 sgRNA = sgRNA .replace (bad_char ,'_' )
5362 ID0 .append (sgRNA )
5463 if GeneNames != GeneNames0 or ID != ID0 :
64+ BadLibCharFound = True
5565 LibFile0 = pandas .DataFrame (data = {'gene' : [gene for gene in GeneNames0 ],
5666 'ID' : [sgRNA for sgRNA in ID0 ],
5767 'seq' : [s for s in seq ]},
@@ -67,42 +77,52 @@ def RunSanityCheck():
6777 Filenames = list (DataSheet ['FILENAME' ])
6878 TreatmentList = list (DataSheet ['TREATMENT' ])
6979 F = len (Filenames )
70- BadCharFound = False
80+
81+ # --------------------------------------------------------------------
82+ # Define bad characters (filenames & samples)
83+ # --------------------------------------------------------------------
84+ BadCharacters = [' ' ,'>' ,'<' ,';' ,':' ,',' ,'|' ,'/' ,'\\ ' ,'(' ,')' ,'[' ,']' ,\
85+ '$' ,'%' ,'*' ,'?' ,'{' ,'}' ,'=' ,'+' ,'@' ]
7186
7287 # --------------------------------------------------------------------
73- # Replace non-printable characters from filenames
88+ # Replace non-printable characters from filenames
7489 # --------------------------------------------------------------------
7590 os .chdir (DataDir )
76- BadCharacters = [' ' ,'>' ,'<' ,';' ,':' ,',' ,'|' ,'/' ,'\\ ' ,'(' ,')' ,'[' ,']' ,\
77- '$' ,'%' ,'*' ,'?' ,'{' ,'}' ,'=' ,'+' ,'@' ]
91+ BadFileCharFound = False
7892 for j in range (F ):
7993 Filename = Filenames [j ]
8094 Filename0 = Filename
8195 for bad_char in BadCharacters :
8296 Filename0 = Filename0 .replace (bad_char ,'_' )
8397 if Filename0 != Filename :
84- BadCharFound = True
98+ BadFileCharFound = True
8599 os .system ('mv ' + "'" + Filename + "'" + ' ' + Filename0 )
86- DataSheet ['FILENAME' ][j ] = Filename0
100+ DataSheet ['FILENAME' ][j ] = Filename0
101+ print ("WARNING: Special characters in filenames names replaced by '_'" )
87102
88103 # --------------------------------------------------------------------
89- # Replace non-printable characters from filenames
90- # --------------------------------------------------------------------
104+ # Replace non-printable characters from sample names
105+ # --------------------------------------------------------------------
91106 TreatmentList0 = TreatmentList
107+ BadSampleCharFound = False
92108 for bad_char in BadCharacters :
93109 TreatmentList0 = [str (treatment ).replace (bad_char ,'_' ) for treatment in TreatmentList0 ]
94110 if TreatmentList0 != TreatmentList :
95- BadCharFound = True
111+ BadSampleCharFound = True
96112 DataSheet ['TREATMENT' ] = TreatmentList0
113+ print ("WARNING: Special characters in sample names replaced by '_'" )
97114
98115 # --------------------------------------------------------------------
99116 # Update Data Sheet
100117 # --------------------------------------------------------------------
101- if BadCharFound :
118+ if BadFileCharFound or BadSampleCharFound :
102119 os .chdir (WorkingDir )
103- DataSheet .to_excel ('DataSheet.xlsx' ,columns = ['FILENAME' ,'TREATMENT' ])
104- print ("WARNING: Special characters in sample names replaced by '_'" )
105- else :
120+ DataSheet .to_excel ('DataSheet.xlsx' ,columns = ['FILENAME' ,'TREATMENT' ])
121+
122+ # --------------------------------------------------------------------
123+ # No special characters found
124+ # --------------------------------------------------------------------
125+ if not BadLibCharFound and not BadFileCharFound and not BadSampleCharFound :
106126 print ('No special characters found.' )
107127
108128
0 commit comments