@@ -255,16 +255,16 @@ def predict(args,tmpdir):
255
255
256
256
predict_genes (args .genome_fasta ,predicted_smorf )
257
257
if not path .getsize (predicted_smorf ):
258
- sys . stderr . write ("GMSC-mapper Error :No smORFs have been predicted.Please check your input file.\n " )
259
- sys . exit ( 1 )
258
+ logger . info ("GMSC-mapper Info :No smORFs have been predicted.Please check your input file.\n " )
259
+ return ( "" , False )
260
260
else :
261
261
filter_smorfs (predicted_smorf , filtered_smorf )
262
- if not path .getsize (filtered_smorf ):
263
- sys . stderr . write ("GMSC-mapper Error :No smORFs remained after filtering by length(<100aa).\n " )
264
- sys . exit ( 1 )
265
- else :
266
- logger .info ('smORF prediction complete' )
267
- return filtered_smorf
262
+ if not path .getsize (filtered_smorf ):
263
+ logger . info ("GMSC-mapper Info :No smORFs remained after filtering by length(<100aa).\n " )
264
+ return ( "" , False )
265
+ else :
266
+ logger .info ('smORF prediction complete' )
267
+ return ( filtered_smorf , True )
268
268
269
269
def translate_gene (args ,tmpdir ):
270
270
from gmsc_mapper .translate import translate_gene
@@ -357,20 +357,21 @@ def generate_fasta(output,queryfile,resultfile):
357
357
try :
358
358
result = pd .read_csv (resultfile , sep = '\t ' ,header = None )
359
359
except :
360
- logger .error ('GMSC-mapper error: There is no alignment results between your input sequences and GMSC.\n ' )
361
- sys .exit (1 )
362
-
363
- logger .debug ('Start smORF fasta file generation...' )
364
- fastafile = path .join (output ,"mapped.smorfs.faa" )
360
+ print ('GMSC-mapper info: There is no alignment results between your input sequences and GMSC.\n ' )
361
+ logger .info ('GMSC-mapper info: There is no alignment results between your input sequences and GMSC.\n ' )
362
+ return ("" ,False )
363
+ else :
364
+ logger .debug ('Start smORF fasta file generation...' )
365
+ fastafile = path .join (output ,"mapped.smorfs.faa" )
365
366
366
- smorf_id = set (result .iloc [:, 0 ].tolist ())
367
+ smorf_id = set (result .iloc [:, 0 ].tolist ())
367
368
368
- with open (fastafile ,"wt" ) as f :
369
- for ID ,seq in fasta_iter (queryfile ):
370
- if ID in smorf_id :
371
- f .write (f'>{ ID } \n { seq } \n ' )
372
- logger .debug ('smORF fasta file generation complete' )
373
- return fastafile
369
+ with open (fastafile ,"wt" ) as f :
370
+ for ID ,seq in fasta_iter (queryfile ):
371
+ if ID in smorf_id :
372
+ f .write (f'>{ ID } \n { seq } \n ' )
373
+ logger .debug ('smORF fasta file generation complete' )
374
+ return ( fastafile , True )
374
375
375
376
def habitat (args ,resultfile ):
376
377
from gmsc_mapper .map_habitat import smorf_habitat
@@ -439,9 +440,12 @@ def main(args=None):
439
440
summary = []
440
441
summary .append (f'# Total number' )
441
442
if args .genome_fasta :
442
- queryfile = predict (args ,tmpdir )
443
- smorf_number = int (predicted_smorf_count (queryfile )/ 2 )
444
- summary .append (f'{ smorf_number } smORFs are predicted in total.' )
443
+ (queryfile ,ifpredict ) = predict (args ,tmpdir )
444
+ if ifpredict :
445
+ smorf_number = int (predicted_smorf_count (queryfile )/ 2 )
446
+ summary .append (f'{ smorf_number } smORFs are predicted in total.' )
447
+ else :
448
+ summary .append (f'No smORFs are predicted.' )
445
449
if args .nt_input :
446
450
if args .filter :
447
451
args .nt_input = filter_length (args .nt_input ,tmpdir ,303 )
@@ -452,43 +456,38 @@ def main(args=None):
452
456
if args .filter :
453
457
args .aa_input = filter_length (args .aa_input ,tmpdir ,100 )
454
458
queryfile = args .aa_input
455
-
456
- if args .tool == 'diamond' :
457
- resultfile = mapdb_diamond (args ,queryfile )
458
- if args .tool == 'mmseqs' :
459
- resultfile = mapdb_mmseqs (args ,queryfile ,tmpdir )
460
-
461
- fastafile = generate_fasta (args .output ,queryfile ,resultfile )
462
- smorf_number = int (predicted_smorf_count (fastafile )/ 2 )
463
- summary .append (f'{ smorf_number } smORFs are aligned against GMSC in total.\n ' )
464
-
465
- if not args .noquality :
466
- summary .append (f'# Quality' )
467
- number ,percentage = quality (args ,resultfile )
468
- summary .append (f'{ number } ({ percentage :.2%} ) aligned smORFs are high quality.\n ' )
469
-
470
- if not args .nohabitat :
471
- summary .append (f'# Habitat' )
472
- single_number ,single_percentage ,multi_number ,multi_percentage = habitat (args ,resultfile )
473
- summary .append (f'{ single_number } ({ single_percentage :.2%} ) aligned smORFs are single-habitat.' )
474
- summary .append (f'{ multi_number } ({ multi_percentage :.2%} ) aligned smORFs are multi-habitat.\n ' )
475
-
476
- if not args .notaxonomy :
477
- summary .append (f'# Taxonomy' )
478
- annotated_number ,rank_number ,rank_percentage = taxonomy (args ,resultfile ,tmpdir )
479
- summary .append (
480
- f'{ annotated_number } ({ 1 - rank_percentage ["no rank" ]:.2%} ) aligned smORFs have taxonomy annotation.' )
481
- for rank in [
482
- 'kingdom' ,
483
- 'phylum' ,
484
- 'class' ,
485
- 'order' ,
486
- 'family' ,
487
- 'genus' ,
488
- 'species' ]:
489
- summary .append (
490
- f'{ rank_number [rank ]} ({ rank_percentage [rank ]:.2%} ) aligned smORFs are annotated at level of { rank } .' )
491
-
459
+
460
+ if (args .genome_fasta and ifpredict ) or args .nt_input or args .aa_input :
461
+ if args .tool == 'diamond' :
462
+ resultfile = mapdb_diamond (args ,queryfile )
463
+ if args .tool == 'mmseqs' :
464
+ resultfile = mapdb_mmseqs (args ,queryfile ,tmpdir )
465
+
466
+ (fastafile ,ifsuccess ) = generate_fasta (args .output ,queryfile ,resultfile )
467
+ if ifsuccess :
468
+ smorf_number = int (predicted_smorf_count (fastafile )/ 2 )
469
+ summary .append (f'{ smorf_number } smORFs are aligned against GMSC in total.\n ' )
470
+
471
+ if not args .noquality :
472
+ summary .append (f'# Quality' )
473
+ number ,percentage = quality (args ,resultfile )
474
+ summary .append (f'{ number } ({ percentage :.2%} ) aligned smORFs are high quality.\n ' )
475
+
476
+ if not args .nohabitat :
477
+ summary .append (f'# Habitat' )
478
+ single_number ,single_percentage ,multi_number ,multi_percentage = habitat (args ,resultfile )
479
+ summary .append (f'{ single_number } ({ single_percentage :.2%} ) aligned smORFs are single-habitat.' )
480
+ summary .append (f'{ multi_number } ({ multi_percentage :.2%} ) aligned smORFs are multi-habitat.\n ' )
481
+
482
+ if not args .notaxonomy :
483
+ summary .append (f'# Taxonomy' )
484
+ annotated_number ,rank_number ,rank_percentage = taxonomy (args ,resultfile ,tmpdir )
485
+ summary .append (f'{ annotated_number } ({ 1 - rank_percentage ["no rank" ]:.2%} ) aligned smORFs have taxonomy annotation.' )
486
+ for rank in ['kingdom' ,'phylum' ,'class' ,'order' ,'family' ,'genus' ,'species' ]:
487
+ summary .append (f'{ rank_number [rank ]} ({ rank_percentage [rank ]:.2%} ) aligned smORFs are annotated at level of { rank } .' )
488
+ else :
489
+ summary .append (f'None of sequences are aligned against GMSC.\n ' )
490
+
492
491
with atomic_write (f'{ args .output } /summary.txt' , overwrite = True ) as ofile :
493
492
for s in summary :
494
493
print (s )
0 commit comments