@@ -242,14 +242,6 @@ def create_db(args):
242
242
subprocess .check_call (mmseqs_cmd )
243
243
print ('\n MMseqs database has been created successfully.\n ' )
244
244
245
- def flatten (items , ignore_types = (str , bytes )):
246
- from collections .abc import Iterable
247
- for x in items :
248
- if isinstance (x , Iterable ) and not isinstance (x , ignore_types ):
249
- yield from flatten (x )
250
- else :
251
- yield x
252
-
253
245
def predict (args ,tmpdir ):
254
246
from gmsc_mapper .predict import predict_genes ,filter_smorfs
255
247
@@ -301,34 +293,22 @@ def mapdb_diamond(args,queryfile):
301
293
302
294
resultfile = path .join (args .output ,"alignment.out.smorfs.tsv" )
303
295
outfmt = '6,qseqid,sseqid,full_qseq,full_sseq,qlen,slen,length,qstart,qend,sstart,send,bitscore,pident,evalue,qcovhsp,scovhsp'
304
-
296
+
297
+ diamond_cmd = ['diamond' ,'blastp' ,
298
+ '-q' ,queryfile ,
299
+ '-d' ,args .database ,
300
+ '-o' ,resultfile ,
301
+ args .sensitivity ,
302
+ '-e' ,str (args .evalue ),
303
+ '--id' ,str (float (args .identity )* 100 ),
304
+ '--query-cover' ,str (float (args .coverage )* 100 ),
305
+ '--subject-cover' ,str (float (args .coverage )* 100 ),
306
+ '-p' ,str (args .threads ),
307
+ '--outfmt' ] + outfmt .split (',' )
305
308
if args .quiet :
306
- diamond_cmd = ['diamond' ,'blastp' ,
307
- '-q' ,queryfile ,
308
- '-d' ,args .database ,
309
- '-o' ,resultfile ,
310
- args .sensitivity ,
311
- '-e' ,str (args .evalue ),
312
- '--id' ,str (float (args .identity )* 100 ),
313
- '--query-cover' ,str (float (args .coverage )* 100 ),
314
- '--subject-cover' ,str (float (args .coverage )* 100 ),
315
- '-p' ,str (args .threads ),
316
- '--outfmt' ,outfmt .split (',' ),
317
- '--quiet' ]
318
- else :
319
- diamond_cmd = ['diamond' ,'blastp' ,
320
- '-q' ,queryfile ,
321
- '-d' ,args .database ,
322
- '-o' ,resultfile ,
323
- args .sensitivity ,
324
- '-e' ,str (args .evalue ),
325
- '--id' ,str (float (args .identity )* 100 ),
326
- '--query-cover' ,str (float (args .coverage )* 100 ),
327
- '--subject-cover' ,str (float (args .coverage )* 100 ),
328
- '-p' ,str (args .threads ),
329
- '--outfmt' ,outfmt .split (',' )]
309
+ diamond_cmd .append ('--quiet' )
330
310
331
- subprocess .check_call ([ x for x in flatten ( diamond_cmd )])
311
+ subprocess .check_call (diamond_cmd )
332
312
333
313
logger .info ('smORF mapping complete' )
334
314
return resultfile
@@ -341,53 +321,30 @@ def mapdb_mmseqs(args, queryfile, tmpdir):
341
321
tmp = path .join (tmpdir ,"tmp" ,"" )
342
322
resultfile = path .join (args .output ,"alignment.out.smorfs.tsv" )
343
323
outfmt = 'query,target,qseq,tseq,qlen,tlen,alnlen,qstart,qend,tstart,tend,bits,pident,evalue,qcov,tcov'
344
-
345
- if args .quiet :
346
- mmseqs_cmd_db = ['mmseqs' ,'createdb' ,queryfile ,querydb ,'-v' ,'0' ]
347
- mmseqs_cmd_search = ['mmseqs' ,'search' ,
348
- querydb ,
349
- args .database ,
350
- resultdb ,
351
- tmp ,
352
- '-s' ,str (args .sensitivity ),
353
- '-e' ,str (args .evalue ),
354
- '--min-seq-id' ,str (args .identity ),
355
- '-c' ,str (args .coverage ),
356
- '--threads' ,str (args .threads ),
357
- '-v' ,'0' ]
358
- mmseqs_cmd_out = ['mmseqs' ,'convertalis' ,
359
- querydb ,
360
- args .database ,
361
- resultdb ,
362
- resultfile ,
363
- '--format-output' ,outfmt ,
364
- '-v' ,'0' ]
365
- else :
366
- mmseqs_cmd_db = ['mmseqs' ,'createdb' ,queryfile ,querydb ]
367
- mmseqs_cmd_search = ['mmseqs' ,'search' ,
368
- querydb ,
369
- args .database ,
370
- resultdb ,
371
- tmp ,
372
- '-s' ,str (args .sensitivity ),
373
- '-e' ,str (args .evalue ),
374
- '--min-seq-id' ,str (args .identity ),
375
- '-c' ,str (args .coverage ),
376
- '--threads' ,str (args .threads )]
377
- mmseqs_cmd_out = ['mmseqs' ,'convertalis' ,
324
+
325
+ mmseqs_cmd_db = ['mmseqs' , 'createdb' , queryfile , querydb ]
326
+ mmseqs_cmd_search = ['mmseqs' ,'search' ,
378
327
querydb ,
379
328
args .database ,
380
329
resultdb ,
381
- resultfile ,
382
- '--format-output' ,outfmt ]
383
-
384
- subprocess .check_call (mmseqs_cmd_db )
385
-
386
- subprocess .check_call (mmseqs_cmd_search )
387
-
388
- subprocess .check_call (mmseqs_cmd_out )
330
+ tmp ,
331
+ '-s' ,str (args .sensitivity ),
332
+ '-e' ,str (args .evalue ),
333
+ '--min-seq-id' ,str (args .identity ),
334
+ '-c' ,str (args .coverage ),
335
+ '--threads' ,str (args .threads )]
336
+ mmseqs_cmd_out = ['mmseqs' ,'convertalis' ,
337
+ querydb ,
338
+ args .database ,
339
+ resultdb ,
340
+ resultfile ,
341
+ '--format-output' , outfmt ]
342
+ for mmseqs_cmd in [mmseqs_cmd_db ,mmseqs_cmd_search ,mmseqs_cmd_out ]:
343
+ if args .quiet :
344
+ mmseqs_cmd .extend (['-v' , '0' ])
345
+ subprocess .check_call (mmseqs_cmd )
389
346
390
- print ( ' \n smORF mapping has done. \n ' )
347
+ logger . info ( 'smORF mapping complete ' )
391
348
return resultfile
392
349
393
350
def generate_fasta (output ,queryfile ,resultfile ):
@@ -456,30 +413,24 @@ def main(args=None):
456
413
if args .tool == 'diamond' :
457
414
if args .database is None :
458
415
args .database = path .join (_ROOT , 'db/targetdb.dmnd' )
459
- if args .sensitivity is None :
460
- args .sensitivity = '--more-sensitive'
461
- if args .sensitivity == '1' :
462
- args .sensitivity = '--fast'
463
- if args .sensitivity == '2' :
464
- args .sensitivity = '--mid-sensitive'
465
- if args .sensitivity == '3' :
466
- args .sensitivity = '--default'
467
- if args .sensitivity == '4' :
468
- args .sensitivity = '--sensitive'
469
- if args .sensitivity == '5' :
470
- args .sensitivity = '--more-sensitive'
471
- if args .sensitivity == '6' :
472
- args .sensitivity = '--very-sensitive'
473
- if args .sensitivity == '7' :
474
- args .sensitivity = '--ultra-sensitive'
416
+ args .sensitivity = {
417
+ None : '--more-sensitive' ,
418
+ '1' : '--fast' ,
419
+ '2' : '--mid-sensitive' ,
420
+ '3' : '--default' ,
421
+ '4' : '--sensitive' ,
422
+ '5' : '--more-sensitive' ,
423
+ '6' : '--very-sensitive' ,
424
+ '7' : '--ultra-sensitive' ,
425
+ }.get (args .sensitivity , args .sensitivity )
426
+
475
427
if args .tool == 'mmseqs' :
476
428
if args .database is None :
477
429
args .database = path .join (_ROOT , 'db/targetdb' )
478
430
if args .sensitivity is None :
479
431
args .sensitivity = 5.7
480
432
481
- if not os .path .exists (args .output ):
482
- os .makedirs (args .output )
433
+ os .makedirs (args .output , exist_ok = True )
483
434
484
435
with tempfile .TemporaryDirectory () as tmpdir :
485
436
try :
0 commit comments