Skip to content

Commit 9ee6f1e

Browse files
committed
RFCT More readable code
1 parent de37d89 commit 9ee6f1e

File tree

1 file changed

+36
-34
lines changed

1 file changed

+36
-34
lines changed

gmsc_mapper/main.py

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def parse_args(args):
3636
help='Alignment tool (Diamond / MMseqs2)',
3737
dest='mode',
3838
default = None)
39-
cmd_create_db.add_argument('--quiet','--quiet',action='store_true', help='Disable alignment console output')
39+
cmd_create_db.add_argument('--quiet',
40+
action='store_true',
41+
dest='quiet',
42+
help='Disable alignment console output')
4043

4144
parser.add_argument('-i', '--input',
4245
required=False,
@@ -60,7 +63,7 @@ def parse_args(args):
6063
required=False,
6164
help='Output directory (will be created if non-existent)',
6265
dest='output',
63-
default=path.join(_ROOT, 'output'))
66+
default=path.join(_ROOT, 'output'))
6467

6568
parser.add_argument('--tool', '--tool',
6669
required=False,
@@ -152,7 +155,7 @@ def check_install():
152155
has_mmseqs = False
153156
has_diamond = False
154157
dependencies = ['diamond', 'mmseqs']
155-
print("Looking for dependencies...")
158+
logger.debug("Looking for dependencies...")
156159

157160
for dep in dependencies:
158161
p = which(dep)
@@ -166,11 +169,11 @@ def check_install():
166169
'At least one of them is necessary to run GMSC-mapper.\n')
167170
sys.exit(1)
168171
elif has_diamond and not has_mmseqs:
169-
print('Warning: mmseqs does not appear to be available.You can only use the `--tool diamond` option(default).')
172+
logger.warning('Warning: mmseqs does not appear to be available.You can only use the `--tool diamond` option(default).')
170173
elif not has_diamond and has_mmseqs:
171-
print('Warning: diamond does not appear to be available.You can only use the `--tool mmseqs` option.')
174+
logger.warning('Warning: diamond does not appear to be available.You can only use the `--tool mmseqs` option.')
172175
else:
173-
print('Dependencies installation is OK\n')
176+
logger.info('Dependencies installation is OK\n')
174177
return has_diamond,has_mmseqs
175178

176179
def validate_args(args,has_diamond,has_mmseqs):
@@ -231,16 +234,16 @@ def create_db(args):
231234
mmseqs_cmd = ['mmseqs','createdb',
232235
args.target_faa,
233236
out_db]
234-
237+
235238
if args.mode == "diamond":
236-
print('Start creating Diamond database...')
237-
subprocess.check_call(diamond_cmd)
238-
print('\nDiamond database has been created successfully.\n')
239+
logger.info('Start creating Diamond database...')
240+
subprocess.check_call(diamond_cmd)
241+
logger.info('Diamond database has been created successfully.')
239242

240243
if args.mode == "mmseqs":
241-
print('Start creating MMseqs database...')
242-
subprocess.check_call(mmseqs_cmd)
243-
print('\nMMseqs database has been created successfully.\n')
244+
logger.debug('Start creating MMseqs database...')
245+
subprocess.check_call(mmseqs_cmd)
246+
logger.info('MMseqs database has been created successfully.')
244247

245248
def predict(args,tmpdir):
246249
from gmsc_mapper.predict import predict_genes,filter_smorfs
@@ -253,21 +256,21 @@ def predict(args,tmpdir):
253256
predict_genes(args.genome_fasta,predicted_smorf)
254257
if not path.getsize(predicted_smorf):
255258
sys.stderr.write("GMSC-mapper Error:No smORFs have been predicted.Please check your input file.\n")
256-
sys.exit(1)
259+
sys.exit(1)
257260
else:
258261
filter_smorfs(predicted_smorf, filtered_smorf)
259262
if not path.getsize(filtered_smorf):
260263
sys.stderr.write("GMSC-mapper Error:No smORFs remained after filtering by length(<100aa).\n")
261-
sys.exit(1)
264+
sys.exit(1)
262265
else:
263-
print('\nsmORF prediction has done.\n')
266+
logger.info('smORF prediction complete')
264267
return filtered_smorf
265268

266269
def translate_gene(args,tmpdir):
267270
from gmsc_mapper.translate import translate_gene
268-
print('Start gene translation...')
271+
logger.debug('Starting gene translation...')
269272
translated_file = translate_gene(args.nt_input,tmpdir)
270-
print('Gene translation has done.\n')
273+
logger.info('Gene translation complete')
271274
return translated_file
272275

273276
def check_length(queryfile):
@@ -366,34 +369,33 @@ def generate_fasta(output,queryfile,resultfile):
366369
for ID,seq in fasta_iter(queryfile):
367370
if ID in smorf_id:
368371
f.write(f'>{ID}\n{seq}\n')
369-
print('smORF fasta file generating has done.\n')
372+
logger.debug('smORF fasta file generation complete')
370373
return fastafile
371374

372375
def habitat(args,resultfile):
373376
from gmsc_mapper.map_habitat import smorf_habitat
374-
print('Start habitat annotation...')
377+
logger.debug('Starting habitat annotation...')
375378
single_number,single_percentage,multi_number,multi_percentage = smorf_habitat(args.habitatindex,args.output,args.habitat,resultfile)
376-
print('habitat annotation has done.\n')
379+
logger.info('habitat annotation has done.')
377380
return single_number,single_percentage,multi_number,multi_percentage
378381

379382
def taxonomy(args,resultfile,tmpdirname):
380383
from gmsc_mapper.map_taxonomy import deep_lca,taxa_summary
381-
print('Start taxonomy annotation...')
384+
logger.debug('Start taxonomy annotation...')
382385
deep_lca(args.taxonomyindex,args.taxonomy,args.output,resultfile,tmpdirname)
383386
annotated_number,rank_number,rank_percentage = taxa_summary(args.output)
384-
print('taxonomy annotation has done.\n')
387+
logger.info('Taxonomy annotation complete.')
385388
return annotated_number,rank_number,rank_percentage
386389

387390
def quality(args,resultfile):
388391
from gmsc_mapper.map_quality import smorf_quality
389-
print('Start quality annotation...')
392+
logger.debug('Start quality annotation...')
390393
number,percentage = smorf_quality(args.output,args.quality,resultfile)
391-
print('quality annotation has done.\n')
394+
logger.info('Quality annotation completed.')
392395
return number,percentage
393396

394397
def predicted_smorf_count(file_name):
395-
out = subprocess.getoutput("wc -l %s" % file_name)
396-
return int(out.split()[0])
398+
return sum(1 for _ in open(file_name, 'rt'))
397399

398400
def main(args=None):
399401
if args is None:
@@ -439,7 +441,7 @@ def main(args=None):
439441
if args.genome_fasta:
440442
queryfile = predict(args,tmpdir)
441443
smorf_number = int(predicted_smorf_count(queryfile)/2)
442-
summary.append(f'{str(smorf_number)} smORFs are predicted in total.')
444+
summary.append(f'{smorf_number} smORFs are predicted in total.')
443445
if args.nt_input:
444446
if args.filter:
445447
args.nt_input = filter_length(args.nt_input,tmpdir,303)
@@ -458,18 +460,18 @@ def main(args=None):
458460

459461
fastafile = generate_fasta(args.output,queryfile,resultfile)
460462
smorf_number = int(predicted_smorf_count(fastafile)/2)
461-
summary.append(f'{str(smorf_number)} smORFs are aligned against GMSC in total.\n')
463+
summary.append(f'{smorf_number} smORFs aligned against GMSC in total.\n')
462464

463465
if not args.noquality:
464466
summary.append(f'# Quality')
465-
number,percentage = quality(args,resultfile)
466-
summary.append(f'{str(number)}({str(round(percentage*100,2))}%) aligned smORFs are high quality.\n')
467+
number,percentage = quality(args,resultfile)
468+
summary.append(f'{number} ({percentage:.2%}) aligned smORFs are high quality.\n')
467469

468470
if not args.nohabitat:
469471
summary.append(f'# Habitat')
470472
single_number,single_percentage,multi_number,multi_percentage = habitat(args,resultfile)
471-
summary.append(f'{str(single_number)}({str(round(single_percentage*100,2))}%) aligned smORFs are single-habitat.')
472-
summary.append(f'{str(multi_number)}({str(round(multi_percentage*100,2))}%) aligned smORFs are multi-habitat.\n')
473+
summary.append(f'{single_number} ({single_percentage:.2%}) aligned smORFs are single-habitat.')
474+
summary.append(f'{multi_number} ({multi_percentage:.2%}) aligned smORFs are multi-habitat.\n')
473475

474476
if not args.notaxonomy:
475477
summary.append(f'# Taxonomy')
@@ -495,4 +497,4 @@ def main(args=None):
495497
sys.exit(1)
496498

497499
if __name__ == '__main__':
498-
main(sys.argv)
500+
main(sys.argv)

0 commit comments

Comments
 (0)