Skip to content

Commit a462799

Browse files
committed
update to v1.2.7
1 parent 4a3cfee commit a462799

File tree

3 files changed

+45
-1
lines changed

3 files changed

+45
-1
lines changed

bin/cidder

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,22 @@ def cidder_main():
277277
gf_listing_handle.write(renamed_gfile + '\n')
278278
gf_listing_handle.close()
279279

280+
if os.path.isfile(all_genomes_listing_file):
281+
total_gtdb_genome_count = 0
282+
with open(genbank_accession_listing_file) as ogalf:
283+
for line in ogalf:
284+
line = line.strip()
285+
total_gtdb_genome_count += 1
286+
287+
genome_count = 0
288+
with open(all_genomes_listing_file) as oaglf:
289+
for line in oaglf:
290+
line = line.strip()
291+
genome_count += 1
292+
293+
msg = 'Was able to download %s of %s genomes belonging to taxa "%s" in GTDB %s.' % (str(genome_count), str(total_gtdb_genome_count), taxa_name, gtdb_release)
294+
sys.stderr.write(msg + '\n')
295+
logObject.info(msg)
280296

281297
if genomes:
282298
gf_listing_handle = open(all_genomes_listing_file, 'a+')
@@ -446,6 +462,9 @@ def cidder_main():
446462
rep_genomes = set([])
447463
rep_genomes_protein_clusters = set([])
448464
rep_genomes_multigenome_protein_clusters = set([])
465+
466+
rep_appending_order_file = outdir + 'CiDDER_Results.txt'
467+
raof_handle = open(rep_appending_order_file, 'w')
449468

450469
# First, select (one of) the genome(s) with the most distinct protein clusters.
451470
for i, gc in sorted(genome_cluster_counts.items(), key=itemgetter(1), reverse=True):
@@ -454,6 +473,7 @@ def cidder_main():
454473
msg = 'Starting genome: %s - %d distinct protein clusters' % (gc[0], gc[1])
455474
sys.stdout.write(msg + '\n')
456475
logObject.info(msg)
476+
raof_handle.write(gc[0] + '\t0\n')
457477
genome_path = genome_name_to_path[gc[0]]
458478
if genome_path in mge_proc_to_unproc_mapping:
459479
genome_path = mge_proc_to_unproc_mapping[genome_path]
@@ -469,6 +489,7 @@ def cidder_main():
469489
curr_saturation = (len(rep_genomes_protein_clusters)/c_count)*100.0
470490
curr_multigenome_saturation = (len(rep_genomes_multigenome_protein_clusters)/mgc_count)*100.0
471491

492+
rep_index = 1
472493
if curr_saturation >= saturation_cutoff or curr_multigenome_saturation >= multigenome_saturation_cutoff:
473494
msg = 'Requirements met! Protein cluster saturation of representative genomes is: %0.2f%%\nMulti-genome protein cluster saturation of representative genomes is %0.2f%%' % (curr_saturation, curr_multigenome_saturation)
474495
sys.stdout.write(msg + '\n')
@@ -503,8 +524,13 @@ def cidder_main():
503524
else:
504525
shutil.copy2(genome_path, cidder_drep_dir)
505526
rep_genomes.add(new_rep)
527+
raof_handle.write(new_rep + '\t' + str(rep_index) + '\n')
528+
rep_index += 1
506529
rep_genomes_protein_clusters = rep_genomes_protein_clusters.union(genome_protein_clusters[new_rep])
507530
rep_genomes_multigenome_protein_clusters = rep_genomes_multigenome_protein_clusters.union(genome_protein_clusters[new_rep].intersection(multi_genome_clusters))
531+
msg = 'Adding genome %s' % new_rep
532+
sys.stdout.write(msg + '\n')
533+
logObject.info(msg)
508534

509535
curr_saturation = (len(rep_genomes_protein_clusters)/c_count)*100.0
510536
curr_multigenome_saturation = (len(rep_genomes_multigenome_protein_clusters)/mgc_count)*100.0
@@ -514,7 +540,8 @@ def cidder_main():
514540
sys.stdout.write(msg + '\n')
515541
logObject.info(msg)
516542
limits_hit = True
517-
543+
raof_handle.close()
544+
518545
msg = 'There were %d representative genomes selected from %d considered!' % (len(rep_genomes), number_of_genomes)
519546
sys.stdout.write(msg + '\n')
520547
logObject.info(msg)

bin/skder

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,23 @@ def skder_main():
287287
gf_listing_handle.write(renamed_gfile + '\n')
288288
gf_listing_handle.close()
289289

290+
if os.path.isfile(all_genomes_listing_file):
291+
total_gtdb_genome_count = 0
292+
with open(genbank_accession_listing_file) as ogalf:
293+
for line in ogalf:
294+
line = line.strip()
295+
total_gtdb_genome_count += 1
296+
297+
genome_count = 0
298+
with open(all_genomes_listing_file) as oaglf:
299+
for line in oaglf:
300+
line = line.strip()
301+
genome_count += 1
302+
303+
msg = 'Was able to download %s of %s genomes belonging to taxa "%s" in GTDB %s.' % (str(genome_count), str(total_gtdb_genome_count), taxa_name, gtdb_release)
304+
sys.stderr.write(msg + '\n')
305+
logObject.info(msg)
306+
290307
if genomes:
291308
gf_listing_handle = open(all_genomes_listing_file, 'a+')
292309
for gf in genomes:

test_case.tar.gz

110 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)