6
6
import pandas as pd
7
7
import tempfile
8
8
from atomicwrites import atomic_write
9
+ import logging
9
10
10
11
_ROOT = path .abspath (path .join (os .getcwd (), ".." ))
11
12
13
+ logger = logging .getLogger ('GMSC-mapper' )
14
+
12
15
def parse_args (args ):
13
16
parser = argparse .ArgumentParser (formatter_class = argparse .ArgumentDefaultsHelpFormatter ,
14
17
description = 'GMSC-mapper' )
@@ -250,7 +253,7 @@ def flatten(items, ignore_types=(str, bytes)):
250
253
def predict (args ,tmpdir ):
251
254
from gmsc_mapper .predict import predict_genes ,filter_smorfs
252
255
253
- print ( 'Start smORF prediction...' )
256
+ logger . debug ( 'Starting smORF prediction...' )
254
257
255
258
predicted_smorf = path .join (tmpdir ,"predicted.smorf.faa" )
256
259
filtered_smorf = path .join (args .output ,"predicted.filterd.smorf.faa" )
@@ -277,30 +280,25 @@ def translate_gene(args,tmpdir):
277
280
278
281
def check_length (queryfile ):
279
282
from gmsc_mapper .fasta import fasta_iter
280
- print ('Start length checking...' )
281
- message_warning = '''GMSC-mapper Warning: Input sequences are all more than 303nt.
282
- Please check if your input consists of contigs, which should use -i not --nt-genes or --aa-genes as input.
283
- However, we will regard your input sequences as nucleotide genes and continue to process.\n '''
284
- all_longer_flag = 1
285
-
286
- for ID , seq in fasta_iter (queryfile ):
287
- if len (seq ) < 303 :
288
- all_longer_flag = 0
289
- break
290
- if all_longer_flag :
291
- print (message_warning )
292
- print ('Length checking has done.\n ' )
283
+ logger .debug ('Start length checking...' )
284
+ if all (len (seq ) < 303
285
+ for _ , seq in fasta_iter (queryfile )):
286
+ logger .warning ('GMSC-mapper Warning: Input sequences are all more than 303nt. '
287
+ 'Please check if your input consists of contigs, which should use -i not --nt-genes or --aa-genes as input. '
288
+ 'However, we will regard your input sequences as nucleotide genes and continue to process.\n ' )
289
+
290
+ logger .info ('Length checking has done.\n ' )
293
291
294
292
def filter_length (queryfile ,tmpdir ,N ):
295
293
from gmsc_mapper .filter_length import filter_length
296
- print ( 'Start length filter...' )
294
+ logger . debug ( 'Starting length filter...' )
297
295
filtered_file = filter_length (queryfile ,tmpdir ,N )
298
- print ('Length filter has done. \n ' )
296
+ logger . info ('Length filter complete ' )
299
297
return filtered_file
300
298
301
299
def mapdb_diamond (args ,queryfile ):
302
- print ( 'Start smORF mapping...' )
303
-
300
+ logger . debug ( 'Starting smORF mapping...' )
301
+
304
302
resultfile = path .join (args .output ,"alignment.out.smorfs.tsv" )
305
303
outfmt = '6,qseqid,sseqid,full_qseq,full_sseq,qlen,slen,length,qstart,qend,sstart,send,bitscore,pident,evalue,qcovhsp,scovhsp'
306
304
@@ -332,12 +330,12 @@ def mapdb_diamond(args,queryfile):
332
330
333
331
subprocess .check_call ([x for x in flatten (diamond_cmd )])
334
332
335
- print ( ' \n smORF mapping has done. \n ' )
333
+ logger . info ( 'smORF mapping complete ' )
336
334
return resultfile
337
335
338
- def mapdb_mmseqs (args ,queryfile ,tmpdir ):
339
- print ('Start smORF mapping...' )
340
-
336
+ def mapdb_mmseqs (args , queryfile , tmpdir ):
337
+ logger . info ('Start smORF mapping...' )
338
+
341
339
querydb = path .join (tmpdir ,"query.db" )
342
340
resultdb = path .join (tmpdir ,"result.db" )
343
341
tmp = path .join (tmpdir ,"tmp" ,"" )
@@ -399,10 +397,10 @@ def generate_fasta(output,queryfile,resultfile):
399
397
try :
400
398
result = pd .read_csv (resultfile , sep = '\t ' ,header = None )
401
399
except :
402
- print ('GMSC-mapper Warning : There is no alignment results between your input sequences and GMSC.\n ' )
400
+ logger . error ('GMSC-mapper error : There is no alignment results between your input sequences and GMSC.\n ' )
403
401
sys .exit (1 )
404
402
405
- print ('Start smORF fasta file generating ...' )
403
+ logger . debug ('Start smORF fasta file generation ...' )
406
404
fastafile = path .join (output ,"mapped.smorfs.faa" )
407
405
408
406
smorf_id = set (result .iloc [:, 0 ].tolist ())
0 commit comments