Skip to content

Commit d9ba509

Browse files
committed
ENH add --quiet for alignment tools
1 parent 1fcad2a commit d9ba509

File tree

6 files changed

+137
-85
lines changed

6 files changed

+137
-85
lines changed

gmsc_mapper/main.py

Lines changed: 102 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def parse_args(args):
3434
help='Alignment tool (Diamond / MMseqs2)',
3535
dest='mode',
3636
default = None)
37+
cmd_create_db.add_argument('--quiet','--quiet',action='store_true', help='Disable alignment console output')
3738

3839
parser.add_argument('-i', '--input',
3940
required=False,
@@ -102,6 +103,8 @@ def parse_args(args):
102103
parser.add_argument('--notaxonomy', '--notaxonomy',action='store_true', help='Use this if no need to annotate taxonomy')
103104

104105
parser.add_argument('--noquality', '--noquality',action='store_true', help='Use this if no need to annotate quality')
106+
107+
parser.add_argument('--quiet','--quiet',action='store_true', help='Disable alignment console output')
105108

106109
parser.add_argument('--db', '--db',
107110
required=False,
@@ -193,24 +196,36 @@ def expect_file(f):
193196
if not args.noquality and args.quality:
194197
expect_file(args.quality)
195198

196-
def create_db(arguments):
197-
if not os.path.exists(arguments.output):
198-
os.makedirs(arguments.output)
199-
out_db = path.join(arguments.output,"targetdb")
200-
201-
if arguments.mode == "diamond":
199+
def create_db(args):
200+
if not os.path.exists(args.output):
201+
os.makedirs(args.output)
202+
out_db = path.join(args.output,"targetdb")
203+
204+
if args.quiet:
205+
diamond_cmd = ['diamond','makedb',
206+
'--in',args.target_faa,
207+
'-d',out_db,
208+
'--quiet']
209+
mmseqs_cmd = ['mmseqs','createdb',
210+
args.target_faa,
211+
out_db,
212+
'-v','0']
213+
else:
214+
diamond_cmd = ['diamond','makedb',
215+
'--in',args.target_faa,
216+
'-d',out_db]
217+
mmseqs_cmd = ['mmseqs','createdb',
218+
args.target_faa,
219+
out_db]
220+
221+
if args.mode == "diamond":
202222
print('Start creating Diamond database...')
203-
subprocess.check_call([
204-
'diamond','makedb',
205-
'--in',arguments.target_faa,
206-
'-d',out_db])
223+
subprocess.check_call(diamond_cmd)
207224
print('\nDiamond database has been created successfully.\n')
208-
if arguments.mode == "mmseqs":
225+
226+
if args.mode == "mmseqs":
209227
print('Start creating MMseqs database...')
210-
subprocess.check_call([
211-
'mmseqs','createdb',
212-
arguments.target_faa,
213-
out_db])
228+
subprocess.check_call(mmseqs_cmd)
214229
print('\nMMseqs database has been created successfully.\n')
215230

216231
def flatten(items, ignore_types=(str, bytes)):
@@ -278,18 +293,33 @@ def mapdb_diamond(args,queryfile):
278293
resultfile = path.join(args.output,"alignment.out.smorfs.tsv")
279294
outfmt = '6,qseqid,sseqid,full_qseq,full_sseq,qlen,slen,length,qstart,qend,sstart,send,bitscore,pident,evalue,qcovhsp,scovhsp'
280295

281-
subprocess.check_call([x for x in flatten([
282-
'diamond','blastp',
283-
'-q',queryfile,
284-
'-d',args.database,
285-
'-o',resultfile,
286-
args.sensitivity,
287-
'-e',str(args.evalue),
288-
'--id',str(float(args.identity)*100),
289-
'--query-cover',str(float(args.coverage)*100),
290-
'--subject-cover',str(float(args.coverage)*100),
291-
'-p',str(args.threads),
292-
'--outfmt',outfmt.split(',')])])
296+
if args.quiet:
297+
diamond_cmd = ['diamond','blastp',
298+
'-q',queryfile,
299+
'-d',args.database,
300+
'-o',resultfile,
301+
args.sensitivity,
302+
'-e',str(args.evalue),
303+
'--id',str(float(args.identity)*100),
304+
'--query-cover',str(float(args.coverage)*100),
305+
'--subject-cover',str(float(args.coverage)*100),
306+
'-p',str(args.threads),
307+
'--outfmt',outfmt.split(','),
308+
'--quiet']
309+
else:
310+
diamond_cmd = ['diamond','blastp',
311+
'-q',queryfile,
312+
'-d',args.database,
313+
'-o',resultfile,
314+
args.sensitivity,
315+
'-e',str(args.evalue),
316+
'--id',str(float(args.identity)*100),
317+
'--query-cover',str(float(args.coverage)*100),
318+
'--subject-cover',str(float(args.coverage)*100),
319+
'-p',str(args.threads),
320+
'--outfmt',outfmt.split(',')]
321+
322+
subprocess.check_call([x for x in flatten(diamond_cmd)])
293323

294324
print('\nsmORF mapping has done.\n')
295325
return resultfile
@@ -302,29 +332,51 @@ def mapdb_mmseqs(args,queryfile,tmpdir):
302332
tmp = path.join(tmpdir,"tmp","")
303333
resultfile = path.join(args.output,"alignment.out.smorfs.tsv")
304334
outfmt = 'query,target,qseq,tseq,qlen,tlen,alnlen,qstart,qend,tstart,tend,bits,pident,evalue,qcov,tcov'
305-
306-
subprocess.check_call([
307-
'mmseqs','createdb',queryfile,querydb])
308-
309-
subprocess.check_call([
310-
'mmseqs','search',
311-
querydb,
312-
args.database,
313-
resultdb,
314-
tmp,
315-
'-s',str(args.sensitivity),
316-
'-e',str(args.evalue),
317-
'--min-seq-id',str(args.identity),
318-
'-c',str(args.coverage),
319-
'--threads',str(args.threads)])
320-
321-
subprocess.check_call([
322-
'mmseqs','convertalis',
323-
querydb,
324-
args.database,
325-
resultdb,
326-
resultfile,
327-
'--format-output',outfmt])
335+
336+
if args.quiet:
337+
mmseqs_cmd_db = ['mmseqs','createdb',queryfile,querydb,'-v','0']
338+
mmseqs_cmd_search = ['mmseqs','search',
339+
querydb,
340+
args.database,
341+
resultdb,
342+
tmp,
343+
'-s',str(args.sensitivity),
344+
'-e',str(args.evalue),
345+
'--min-seq-id',str(args.identity),
346+
'-c',str(args.coverage),
347+
'--threads',str(args.threads),
348+
'-v','0']
349+
mmseqs_cmd_out = ['mmseqs','convertalis',
350+
querydb,
351+
args.database,
352+
resultdb,
353+
resultfile,
354+
'--format-output',outfmt,
355+
'-v','0']
356+
else:
357+
mmseqs_cmd_db = ['mmseqs','createdb',queryfile,querydb]
358+
mmseqs_cmd_search = ['mmseqs','search',
359+
querydb,
360+
args.database,
361+
resultdb,
362+
tmp,
363+
'-s',str(args.sensitivity),
364+
'-e',str(args.evalue),
365+
'--min-seq-id',str(args.identity),
366+
'-c',str(args.coverage),
367+
'--threads',str(args.threads)]
368+
mmseqs_cmd_out = ['mmseqs','convertalis',
369+
querydb,
370+
args.database,
371+
resultdb,
372+
resultfile,
373+
'--format-output',outfmt]
374+
375+
subprocess.check_call(mmseqs_cmd_db)
376+
377+
subprocess.check_call(mmseqs_cmd_search)
378+
379+
subprocess.check_call(mmseqs_cmd_out)
328380

329381
print('\nsmORF mapping has done.\n')
330382
return resultfile

tests.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@ echo "# GMSC-Mapper
44
AUTHORS: Yiqian Duan, Celio Dias Santos Junior, Luis Pedro Coelho"
55

66
echo "Creating mocking databases"
7-
gmsc-mapper createdb -i examples/target.faa -o examples/ -m diamond
8-
gmsc-mapper createdb -i examples/target.faa -o examples/ -m mmseqs
7+
gmsc-mapper createdb -i examples/target.faa -o examples/ -m diamond --quiet
8+
gmsc-mapper createdb -i examples/target.faa -o examples/ -m mmseqs --quiet
99

1010
echo "Testing basic usage"
11-
gmsc-mapper -i ./examples/example.fa -o ./examples_output/ --db ./examples/targetdb.dmnd --habitat ./examples/ref_habitat.txt --quality ./examples/ref_quality.txt --taxonomy ./examples/ref_taxonomy.txt
11+
gmsc-mapper -i ./examples/example.fa -o ./examples_output/ --db ./examples/targetdb.dmnd --habitat ./examples/ref_habitat.txt --quality ./examples/ref_quality.txt --taxonomy ./examples/ref_taxonomy.txt --quiet
1212
python tests/diamond_contig.py
13-
gmsc-mapper --aa-genes examples/example.faa -o examples_output/ --db examples/targetdb.dmnd --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt
13+
gmsc-mapper --aa-genes examples/example.faa -o examples_output/ --db examples/targetdb.dmnd --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt --quiet
1414
python tests/diamond_protein.py
15-
gmsc-mapper --nt-genes examples/example.fna -o examples_output/ --db examples/targetdb.dmnd --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt
15+
gmsc-mapper --nt-genes examples/example.fna -o examples_output/ --db examples/targetdb.dmnd --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt --quiet
1616
python tests/diamond_gene.py
1717

1818
echo "Testing tool flag - MMSeqs"
19-
gmsc-mapper -i examples/example.fa -o examples_output/ --db examples/targetdb --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt --tool mmseqs
19+
gmsc-mapper -i examples/example.fa -o examples_output/ --db examples/targetdb --habitat examples/ref_habitat.txt --quality examples/ref_quality.txt --taxonomy examples/ref_taxonomy.txt --tool mmseqs --quiet
2020
python tests/mmseqs_contig.py

tests/diamond_contig.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,34 @@ def diamond_contig_test():
33

44
alignment_flag = filecmp.cmp("./tests/diamond_contig/alignment.out.smorfs.tsv", "./examples_output/alignment.out.smorfs.tsv")
55
if not alignment_flag:
6-
print('Contig input of Diamond mode alignment results have something wrong.')
6+
print('\nContig input of Diamond mode alignment results have something wrong.\n')
77

88
predict_flag = filecmp.cmp("./tests/diamond_contig/predicted.filterd.smorf.faa", "./examples_output/predicted.filterd.smorf.faa")
99
if not predict_flag:
10-
print('Contig input of Diamond mode predicted fasta results have something wrong.')
10+
print('\nContig input of Diamond mode predicted fasta results have something wrong.\n')
1111

1212
fasta_flag = filecmp.cmp("./tests/diamond_contig/mapped.smorfs.faa", "./examples_output/mapped.smorfs.faa")
1313
if not fasta_flag:
14-
print('Contig input of Diamond mode mapped fasta results have something wrong.')
14+
print('\nContig input of Diamond mode mapped fasta results have something wrong.\n')
1515

1616
habitat_flag = filecmp.cmp("./tests/diamond_contig/habitat.out.smorfs.tsv", "./examples_output/habitat.out.smorfs.tsv")
1717
if not habitat_flag:
18-
print('Contig input of Diamond mode habitat results have something wrong.')
18+
print('\nContig input of Diamond mode habitat results have something wrong.\n')
1919

2020
taxonomy_flag = filecmp.cmp("./tests/diamond_contig/taxonomy.out.smorfs.tsv", "./examples_output/taxonomy.out.smorfs.tsv")
2121
if not taxonomy_flag:
22-
print('Contig input of Diamond mode taxonomy results have something wrong.')
22+
print('\nContig input of Diamond mode taxonomy results have something wrong.\n')
2323

2424
quality_flag = filecmp.cmp("./tests/diamond_contig/quality.out.smorfs.tsv", "./examples_output/quality.out.smorfs.tsv")
2525
if not quality_flag:
26-
print('Contig input of Diamond mode quality results have something wrong.')
26+
print('\nContig input of Diamond mode quality results have something wrong.\n')
2727

2828
summary_flag = filecmp.cmp("./tests/diamond_contig/summary.txt", "./examples_output/summary.txt")
2929
if not summary_flag:
30-
print('Contig input of Diamond mode summary results have something wrong.')
30+
print('\nContig input of Diamond mode summary results have something wrong.\n')
3131

3232
if alignment_flag and predict_flag and fasta_flag and habitat_flag and taxonomy_flag and quality_flag and summary_flag:
33-
print('Contig input of Diamond mode checking has passed')
33+
print('\nContig input of Diamond mode checking has passed.\n')
3434

3535
return(alignment_flag,predict_flag,fasta_flag,habitat_flag,taxonomy_flag,quality_flag,summary_flag)
3636

tests/diamond_gene.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,30 @@ def diamond_gene_test():
33

44
alignment_flag = filecmp.cmp("./tests/diamond_gene/alignment.out.smorfs.tsv", "./examples_output/alignment.out.smorfs.tsv")
55
if not alignment_flag:
6-
print('Gene input of Diamond mode alignment results have something wrong.')
6+
print('\nGene input of Diamond mode alignment results have something wrong.\n')
77

88
fasta_flag = filecmp.cmp("./tests/diamond_gene/mapped.smorfs.faa", "./examples_output/mapped.smorfs.faa")
99
if not fasta_flag:
10-
print('Gene input of Diamond mode mapped fasta results have something wrong.')
10+
print('\nGene input of Diamond mode mapped fasta results have something wrong.\n')
1111

1212
habitat_flag = filecmp.cmp("./tests/diamond_gene/habitat.out.smorfs.tsv", "./examples_output/habitat.out.smorfs.tsv")
1313
if not habitat_flag:
14-
print('Gene input of Diamond mode habitat results have something wrong.')
14+
print('\nGene input of Diamond mode habitat results have something wrong.\n')
1515

1616
taxonomy_flag = filecmp.cmp("./tests/diamond_gene/taxonomy.out.smorfs.tsv", "./examples_output/taxonomy.out.smorfs.tsv")
1717
if not taxonomy_flag:
18-
print('Gene input of Diamond mode taxonomy results have something wrong.')
18+
print('\nGene input of Diamond mode taxonomy results have something wrong.\n')
1919

2020
quality_flag = filecmp.cmp("./tests/diamond_gene/quality.out.smorfs.tsv", "./examples_output/quality.out.smorfs.tsv")
2121
if not quality_flag:
22-
print('Gene input of Diamond mode quality results have something wrong.')
22+
print('\nGene input of Diamond mode quality results have something wrong.\n')
2323

2424
summary_flag = filecmp.cmp("./tests/diamond_gene/summary.txt", "./examples_output/summary.txt")
2525
if not summary_flag:
26-
print('Gene input of Diamond mode summary results have something wrong.')
26+
print('\nGene input of Diamond mode summary results have something wrong.\n')
2727

2828
if alignment_flag and fasta_flag and habitat_flag and taxonomy_flag and quality_flag and summary_flag:
29-
print('Gene input of Diamond mode checking has passed')
29+
print('\nGene input of Diamond mode checking has passed.\n')
3030

3131
return(alignment_flag,fasta_flag,habitat_flag,taxonomy_flag,quality_flag,summary_flag)
3232

tests/diamond_protein.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,30 @@ def diamond_protein_test():
33

44
alignment_flag = filecmp.cmp("./tests/diamond_protein/alignment.out.smorfs.tsv", "./examples_output/alignment.out.smorfs.tsv")
55
if not alignment_flag:
6-
print('Protein input of Diamond mode alignment results have something wrong.')
6+
print('\nProtein input of Diamond mode alignment results have something wrong.\n')
77

88
fasta_flag = filecmp.cmp("./tests/diamond_protein/mapped.smorfs.faa", "./examples_output/mapped.smorfs.faa")
99
if not fasta_flag:
10-
print('Protein input of Diamond mode mapped fasta results have something wrong.')
10+
print('\nProtein input of Diamond mode mapped fasta results have something wrong.\n')
1111

1212
habitat_flag = filecmp.cmp("./tests/diamond_protein/habitat.out.smorfs.tsv", "./examples_output/habitat.out.smorfs.tsv")
1313
if not habitat_flag:
14-
print('Protein input of Diamond mode habitat results have something wrong.')
14+
print('\nProtein input of Diamond mode habitat results have something wrong.\n')
1515

1616
taxonomy_flag = filecmp.cmp("./tests/diamond_protein/taxonomy.out.smorfs.tsv", "./examples_output/taxonomy.out.smorfs.tsv")
1717
if not taxonomy_flag:
18-
print('Protein input of Diamond mode taxonomy results have something wrong.')
18+
print('\nProtein input of Diamond mode taxonomy results have something wrong.\n')
1919

2020
quality_flag = filecmp.cmp("./tests/diamond_protein/quality.out.smorfs.tsv", "./examples_output/quality.out.smorfs.tsv")
2121
if not quality_flag:
22-
print('Protein input of Diamond mode quality results have something wrong.')
22+
print('\nProtein input of Diamond mode quality results have something wrong.\n')
2323

2424
summary_flag = filecmp.cmp("./tests/diamond_protein/summary.txt", "./examples_output/summary.txt")
2525
if not summary_flag:
26-
print('Protein input of Diamond mode summary results have something wrong.')
26+
print('\nProtein input of Diamond mode summary results have something wrong.\n')
2727

2828
if alignment_flag and fasta_flag and habitat_flag and taxonomy_flag and quality_flag and summary_flag:
29-
print('Protein input of Diamond mode checking has passed')
29+
print('\nProtein input of Diamond mode checking has passed.\n')
3030

3131
return(alignment_flag,fasta_flag,habitat_flag,taxonomy_flag,quality_flag,summary_flag)
3232

tests/mmseqs_contig.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,30 @@ def mmseqs_contig_test():
33

44
predict_flag = filecmp.cmp("./tests/mmseqs_contig/predicted.filterd.smorf.faa", "./examples_output/predicted.filterd.smorf.faa")
55
if not predict_flag:
6-
print('Contig input of MMseqs2 mode predicted fasta results have something wrong.')
6+
print('\nContig input of MMseqs2 mode predicted fasta results have something wrong.\n')
77

88
fasta_flag = filecmp.cmp("./tests/mmseqs_contig/mapped.smorfs.faa", "./examples_output/mapped.smorfs.faa")
99
if not fasta_flag:
10-
print('Contig input of MMseqs2 mode mapped fasta results have something wrong.')
10+
print('\nContig input of MMseqs2 mode mapped fasta results have something wrong.\n')
1111

1212
habitat_flag = filecmp.cmp("./tests/mmseqs_contig/habitat.out.smorfs.tsv", "./examples_output/habitat.out.smorfs.tsv")
1313
if not habitat_flag:
14-
print('Contig input of MMseqs2 mode habitat results have something wrong.')
14+
print('\nContig input of MMseqs2 mode habitat results have something wrong.\n')
1515

1616
taxonomy_flag = filecmp.cmp("./tests/mmseqs_contig/taxonomy.out.smorfs.tsv", "./examples_output/taxonomy.out.smorfs.tsv")
1717
if not taxonomy_flag:
18-
print('Contig input of MMseqs2 mode taxonomy results have something wrong.')
18+
print('\nContig input of MMseqs2 mode taxonomy results have something wrong.\n')
1919

2020
quality_flag = filecmp.cmp("./tests/mmseqs_contig/quality.out.smorfs.tsv", "./examples_output/quality.out.smorfs.tsv")
2121
if not quality_flag:
22-
print('Contig input of MMseqs2 mode quality results have something wrong.')
22+
print('\nContig input of MMseqs2 mode quality results have something wrong.\n')
2323

2424
summary_flag = filecmp.cmp("./tests/mmseqs_contig/summary.txt", "./examples_output/summary.txt")
2525
if not summary_flag:
26-
print('Contig input of MMseqs2 mode summary results have something wrong.')
26+
print('\nContig input of MMseqs2 mode summary results have something wrong.\n')
2727

2828
if predict_flag and fasta_flag and habitat_flag and taxonomy_flag and quality_flag and summary_flag:
29-
print('Contig input of MMseqs2 mode checking has passed')
29+
print('\nContig input of MMseqs2 mode checking has passed.\n')
3030

3131
return(predict_flag,fasta_flag,habitat_flag,taxonomy_flag,quality_flag,summary_flag)
3232

0 commit comments

Comments
 (0)