@@ -36,7 +36,10 @@ def parse_args(args):
36
36
help = 'Alignment tool (Diamond / MMseqs2)' ,
37
37
dest = 'mode' ,
38
38
default = None )
39
- cmd_create_db .add_argument ('--quiet' ,'--quiet' ,action = 'store_true' , help = 'Disable alignment console output' )
39
+ cmd_create_db .add_argument ('--quiet' ,
40
+ action = 'store_true' ,
41
+ dest = 'quiet' ,
42
+ help = 'Disable alignment console output' )
40
43
41
44
parser .add_argument ('-i' , '--input' ,
42
45
required = False ,
@@ -60,7 +63,7 @@ def parse_args(args):
60
63
required = False ,
61
64
help = 'Output directory (will be created if non-existent)' ,
62
65
dest = 'output' ,
63
- default = path .join (_ROOT , 'output' ))
66
+ default = path .join (_ROOT , 'output' ))
64
67
65
68
parser .add_argument ('--tool' , '--tool' ,
66
69
required = False ,
@@ -152,7 +155,7 @@ def check_install():
152
155
has_mmseqs = False
153
156
has_diamond = False
154
157
dependencies = ['diamond' , 'mmseqs' ]
155
- print ("Looking for dependencies..." )
158
+ logger . debug ("Looking for dependencies..." )
156
159
157
160
for dep in dependencies :
158
161
p = which (dep )
@@ -166,11 +169,11 @@ def check_install():
166
169
'At least one of them is necessary to run GMSC-mapper.\n ' )
167
170
sys .exit (1 )
168
171
elif has_diamond and not has_mmseqs :
169
- print ('Warning: mmseqs does not appear to be available.You can only use the `--tool diamond` option(default).' )
172
+ logger . warning ('Warning: mmseqs does not appear to be available.You can only use the `--tool diamond` option(default).' )
170
173
elif not has_diamond and has_mmseqs :
171
- print ('Warning: diamond does not appear to be available.You can only use the `--tool mmseqs` option.' )
174
+ logger . warning ('Warning: diamond does not appear to be available.You can only use the `--tool mmseqs` option.' )
172
175
else :
173
- print ('Dependencies installation is OK\n ' )
176
+ logger . info ('Dependencies installation is OK\n ' )
174
177
return has_diamond ,has_mmseqs
175
178
176
179
def validate_args (args ,has_diamond ,has_mmseqs ):
@@ -231,16 +234,16 @@ def create_db(args):
231
234
mmseqs_cmd = ['mmseqs' ,'createdb' ,
232
235
args .target_faa ,
233
236
out_db ]
234
-
237
+
235
238
if args .mode == "diamond" :
236
- print ('Start creating Diamond database...' )
237
- subprocess .check_call (diamond_cmd )
238
- print ( ' \n Diamond database has been created successfully.\n ' )
239
+ logger . info ('Start creating Diamond database...' )
240
+ subprocess .check_call (diamond_cmd )
241
+ logger . info ( 'Diamond database has been created successfully.' )
239
242
240
243
if args .mode == "mmseqs" :
241
- print ('Start creating MMseqs database...' )
242
- subprocess .check_call (mmseqs_cmd )
243
- print ( ' \n MMseqs database has been created successfully.\n ' )
244
+ logger . debug ('Start creating MMseqs database...' )
245
+ subprocess .check_call (mmseqs_cmd )
246
+ logger . info ( 'MMseqs database has been created successfully.' )
244
247
245
248
def predict (args ,tmpdir ):
246
249
from gmsc_mapper .predict import predict_genes ,filter_smorfs
@@ -253,21 +256,21 @@ def predict(args,tmpdir):
253
256
predict_genes (args .genome_fasta ,predicted_smorf )
254
257
if not path .getsize (predicted_smorf ):
255
258
sys .stderr .write ("GMSC-mapper Error:No smORFs have been predicted.Please check your input file.\n " )
256
- sys .exit (1 )
259
+ sys .exit (1 )
257
260
else :
258
261
filter_smorfs (predicted_smorf , filtered_smorf )
259
262
if not path .getsize (filtered_smorf ):
260
263
sys .stderr .write ("GMSC-mapper Error:No smORFs remained after filtering by length(<100aa).\n " )
261
- sys .exit (1 )
264
+ sys .exit (1 )
262
265
else :
263
- print ( ' \n smORF prediction has done. \n ' )
266
+ logger . info ( 'smORF prediction complete ' )
264
267
return filtered_smorf
265
268
266
269
def translate_gene (args ,tmpdir ):
267
270
from gmsc_mapper .translate import translate_gene
268
- print ( 'Start gene translation...' )
271
+ logger . debug ( 'Starting gene translation...' )
269
272
translated_file = translate_gene (args .nt_input ,tmpdir )
270
- print ('Gene translation has done. \n ' )
273
+ logger . info ('Gene translation complete ' )
271
274
return translated_file
272
275
273
276
def check_length (queryfile ):
@@ -366,34 +369,33 @@ def generate_fasta(output,queryfile,resultfile):
366
369
for ID ,seq in fasta_iter (queryfile ):
367
370
if ID in smorf_id :
368
371
f .write (f'>{ ID } \n { seq } \n ' )
369
- print ('smORF fasta file generating has done. \n ' )
372
+ logger . debug ('smORF fasta file generation complete ' )
370
373
return fastafile
371
374
372
375
def habitat (args ,resultfile ):
373
376
from gmsc_mapper .map_habitat import smorf_habitat
374
- print ( 'Start habitat annotation...' )
377
+ logger . debug ( 'Starting habitat annotation...' )
375
378
single_number ,single_percentage ,multi_number ,multi_percentage = smorf_habitat (args .habitatindex ,args .output ,args .habitat ,resultfile )
376
- print ('habitat annotation has done.\n ' )
379
+ logger . info ('habitat annotation has done.' )
377
380
return single_number ,single_percentage ,multi_number ,multi_percentage
378
381
379
382
def taxonomy (args ,resultfile ,tmpdirname ):
380
383
from gmsc_mapper .map_taxonomy import deep_lca ,taxa_summary
381
- print ('Start taxonomy annotation...' )
384
+ logger . debug ('Start taxonomy annotation...' )
382
385
deep_lca (args .taxonomyindex ,args .taxonomy ,args .output ,resultfile ,tmpdirname )
383
386
annotated_number ,rank_number ,rank_percentage = taxa_summary (args .output )
384
- print ( 'taxonomy annotation has done. \n ' )
387
+ logger . info ( 'Taxonomy annotation complete. ' )
385
388
return annotated_number ,rank_number ,rank_percentage
386
389
387
390
def quality (args ,resultfile ):
388
391
from gmsc_mapper .map_quality import smorf_quality
389
- print ('Start quality annotation...' )
392
+ logger . debug ('Start quality annotation...' )
390
393
number ,percentage = smorf_quality (args .output ,args .quality ,resultfile )
391
- print ( 'quality annotation has done. \n ' )
394
+ logger . info ( 'Quality annotation completed. ' )
392
395
return number ,percentage
393
396
394
397
def predicted_smorf_count (file_name ):
395
- out = subprocess .getoutput ("wc -l %s" % file_name )
396
- return int (out .split ()[0 ])
398
+ return sum (1 for _ in open (file_name , 'rt' ))
397
399
398
400
def main (args = None ):
399
401
if args is None :
@@ -439,7 +441,7 @@ def main(args=None):
439
441
if args .genome_fasta :
440
442
queryfile = predict (args ,tmpdir )
441
443
smorf_number = int (predicted_smorf_count (queryfile )/ 2 )
442
- summary .append (f'{ str ( smorf_number ) } smORFs are predicted in total.' )
444
+ summary .append (f'{ smorf_number } smORFs are predicted in total.' )
443
445
if args .nt_input :
444
446
if args .filter :
445
447
args .nt_input = filter_length (args .nt_input ,tmpdir ,303 )
@@ -458,18 +460,18 @@ def main(args=None):
458
460
459
461
fastafile = generate_fasta (args .output ,queryfile ,resultfile )
460
462
smorf_number = int (predicted_smorf_count (fastafile )/ 2 )
461
- summary .append (f'{ str ( smorf_number ) } smORFs are aligned against GMSC in total.\n ' )
463
+ summary .append (f'{ smorf_number } smORFs aligned against GMSC in total.\n ' )
462
464
463
465
if not args .noquality :
464
466
summary .append (f'# Quality' )
465
- number ,percentage = quality (args ,resultfile )
466
- summary .append (f'{ str ( number ) } ( { str ( round ( percentage * 100 , 2 )) } % ) aligned smORFs are high quality.\n ' )
467
+ number ,percentage = quality (args ,resultfile )
468
+ summary .append (f'{ number } ( { percentage :.2% } ) aligned smORFs are high quality.\n ' )
467
469
468
470
if not args .nohabitat :
469
471
summary .append (f'# Habitat' )
470
472
single_number ,single_percentage ,multi_number ,multi_percentage = habitat (args ,resultfile )
471
- summary .append (f'{ str ( single_number ) } ( { str ( round ( single_percentage * 100 , 2 )) } % ) aligned smORFs are single-habitat.' )
472
- summary .append (f'{ str ( multi_number ) } ( { str ( round ( multi_percentage * 100 , 2 )) } % ) aligned smORFs are multi-habitat.\n ' )
473
+ summary .append (f'{ single_number } ( { single_percentage :.2% } ) aligned smORFs are single-habitat.' )
474
+ summary .append (f'{ multi_number } ( { multi_percentage :.2% } ) aligned smORFs are multi-habitat.\n ' )
473
475
474
476
if not args .notaxonomy :
475
477
summary .append (f'# Taxonomy' )
@@ -495,4 +497,4 @@ def main(args=None):
495
497
sys .exit (1 )
496
498
497
499
if __name__ == '__main__' :
498
- main (sys .argv )
500
+ main (sys .argv )
0 commit comments