Skip to content

Commit de37d89

Browse files
committed
RFCT Simplify code by collapsing similar regions
1 parent d1daa49 commit de37d89

File tree

1 file changed

+46
-95
lines changed

1 file changed

+46
-95
lines changed

gmsc_mapper/main.py

Lines changed: 46 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,6 @@ def create_db(args):
242242
subprocess.check_call(mmseqs_cmd)
243243
print('\nMMseqs database has been created successfully.\n')
244244

245-
def flatten(items, ignore_types=(str, bytes)):
246-
from collections.abc import Iterable
247-
for x in items:
248-
if isinstance(x, Iterable) and not isinstance(x, ignore_types):
249-
yield from flatten(x)
250-
else:
251-
yield x
252-
253245
def predict(args,tmpdir):
254246
from gmsc_mapper.predict import predict_genes,filter_smorfs
255247

@@ -301,34 +293,22 @@ def mapdb_diamond(args,queryfile):
301293

302294
resultfile = path.join(args.output,"alignment.out.smorfs.tsv")
303295
outfmt = '6,qseqid,sseqid,full_qseq,full_sseq,qlen,slen,length,qstart,qend,sstart,send,bitscore,pident,evalue,qcovhsp,scovhsp'
304-
296+
297+
diamond_cmd = ['diamond','blastp',
298+
'-q',queryfile,
299+
'-d',args.database,
300+
'-o',resultfile,
301+
args.sensitivity,
302+
'-e',str(args.evalue),
303+
'--id',str(float(args.identity)*100),
304+
'--query-cover',str(float(args.coverage)*100),
305+
'--subject-cover',str(float(args.coverage)*100),
306+
'-p',str(args.threads),
307+
'--outfmt'] + outfmt.split(',')
305308
if args.quiet:
306-
diamond_cmd = ['diamond','blastp',
307-
'-q',queryfile,
308-
'-d',args.database,
309-
'-o',resultfile,
310-
args.sensitivity,
311-
'-e',str(args.evalue),
312-
'--id',str(float(args.identity)*100),
313-
'--query-cover',str(float(args.coverage)*100),
314-
'--subject-cover',str(float(args.coverage)*100),
315-
'-p',str(args.threads),
316-
'--outfmt',outfmt.split(','),
317-
'--quiet']
318-
else:
319-
diamond_cmd = ['diamond','blastp',
320-
'-q',queryfile,
321-
'-d',args.database,
322-
'-o',resultfile,
323-
args.sensitivity,
324-
'-e',str(args.evalue),
325-
'--id',str(float(args.identity)*100),
326-
'--query-cover',str(float(args.coverage)*100),
327-
'--subject-cover',str(float(args.coverage)*100),
328-
'-p',str(args.threads),
329-
'--outfmt',outfmt.split(',')]
309+
diamond_cmd.append('--quiet')
330310

331-
subprocess.check_call([x for x in flatten(diamond_cmd)])
311+
subprocess.check_call(diamond_cmd)
332312

333313
logger.info('smORF mapping complete')
334314
return resultfile
@@ -341,53 +321,30 @@ def mapdb_mmseqs(args, queryfile, tmpdir):
341321
tmp = path.join(tmpdir,"tmp","")
342322
resultfile = path.join(args.output,"alignment.out.smorfs.tsv")
343323
outfmt = 'query,target,qseq,tseq,qlen,tlen,alnlen,qstart,qend,tstart,tend,bits,pident,evalue,qcov,tcov'
344-
345-
if args.quiet:
346-
mmseqs_cmd_db = ['mmseqs','createdb',queryfile,querydb,'-v','0']
347-
mmseqs_cmd_search = ['mmseqs','search',
348-
querydb,
349-
args.database,
350-
resultdb,
351-
tmp,
352-
'-s',str(args.sensitivity),
353-
'-e',str(args.evalue),
354-
'--min-seq-id',str(args.identity),
355-
'-c',str(args.coverage),
356-
'--threads',str(args.threads),
357-
'-v','0']
358-
mmseqs_cmd_out = ['mmseqs','convertalis',
359-
querydb,
360-
args.database,
361-
resultdb,
362-
resultfile,
363-
'--format-output',outfmt,
364-
'-v','0']
365-
else:
366-
mmseqs_cmd_db = ['mmseqs','createdb',queryfile,querydb]
367-
mmseqs_cmd_search = ['mmseqs','search',
368-
querydb,
369-
args.database,
370-
resultdb,
371-
tmp,
372-
'-s',str(args.sensitivity),
373-
'-e',str(args.evalue),
374-
'--min-seq-id',str(args.identity),
375-
'-c',str(args.coverage),
376-
'--threads',str(args.threads)]
377-
mmseqs_cmd_out = ['mmseqs','convertalis',
324+
325+
mmseqs_cmd_db = ['mmseqs', 'createdb', queryfile, querydb]
326+
mmseqs_cmd_search = ['mmseqs','search',
378327
querydb,
379328
args.database,
380329
resultdb,
381-
resultfile,
382-
'--format-output',outfmt]
383-
384-
subprocess.check_call(mmseqs_cmd_db)
385-
386-
subprocess.check_call(mmseqs_cmd_search)
387-
388-
subprocess.check_call(mmseqs_cmd_out)
330+
tmp,
331+
'-s',str(args.sensitivity),
332+
'-e',str(args.evalue),
333+
'--min-seq-id',str(args.identity),
334+
'-c',str(args.coverage),
335+
'--threads',str(args.threads)]
336+
mmseqs_cmd_out = ['mmseqs','convertalis',
337+
querydb,
338+
args.database,
339+
resultdb,
340+
resultfile,
341+
'--format-output', outfmt]
342+
for mmseqs_cmd in [mmseqs_cmd_db,mmseqs_cmd_search,mmseqs_cmd_out]:
343+
if args.quiet:
344+
mmseqs_cmd.extend(['-v', '0'])
345+
subprocess.check_call(mmseqs_cmd)
389346

390-
print('\nsmORF mapping has done.\n')
347+
logger.info('smORF mapping complete')
391348
return resultfile
392349

393350
def generate_fasta(output,queryfile,resultfile):
@@ -456,30 +413,24 @@ def main(args=None):
456413
if args.tool == 'diamond':
457414
if args.database is None:
458415
args.database = path.join(_ROOT, 'db/targetdb.dmnd')
459-
if args.sensitivity is None:
460-
args.sensitivity = '--more-sensitive'
461-
if args.sensitivity == '1':
462-
args.sensitivity = '--fast'
463-
if args.sensitivity == '2':
464-
args.sensitivity = '--mid-sensitive'
465-
if args.sensitivity == '3':
466-
args.sensitivity = '--default'
467-
if args.sensitivity == '4':
468-
args.sensitivity = '--sensitive'
469-
if args.sensitivity == '5':
470-
args.sensitivity = '--more-sensitive'
471-
if args.sensitivity == '6':
472-
args.sensitivity = '--very-sensitive'
473-
if args.sensitivity == '7':
474-
args.sensitivity = '--ultra-sensitive'
416+
args.sensitivity = {
417+
None: '--more-sensitive',
418+
'1': '--fast',
419+
'2': '--mid-sensitive',
420+
'3': '--default',
421+
'4': '--sensitive',
422+
'5': '--more-sensitive',
423+
'6': '--very-sensitive',
424+
'7': '--ultra-sensitive',
425+
}.get(args.sensitivity, args.sensitivity)
426+
475427
if args.tool == 'mmseqs':
476428
if args.database is None:
477429
args.database = path.join(_ROOT, 'db/targetdb')
478430
if args.sensitivity is None:
479431
args.sensitivity = 5.7
480432

481-
if not os.path.exists(args.output):
482-
os.makedirs(args.output)
433+
os.makedirs(args.output, exist_ok=True)
483434

484435
with tempfile.TemporaryDirectory() as tmpdir:
485436
try:

0 commit comments

Comments
 (0)