Skip to content

Commit b358c90

Browse files
committed
release v. 3.1.1 bug fixes and numpy version restriction
1 parent 4268642 commit b358c90

File tree

8 files changed

+47
-76
lines changed

8 files changed

+47
-76
lines changed

mob_suite/constants.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,4 @@
171171

172172
default_database_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'databases')
173173

174-
ETE3DBTAXAFILE = os.path.abspath(default_database_dir + "/taxa.sqlite")
175-
176-
ETE3_LOCK_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "databases/ETE3_DB.lock")
177-
178174
LOG_FORMAT = '%(asctime)s %(name)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'

mob_suite/mob_cluster.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -487,11 +487,11 @@ def main():
487487

488488
if not os.path.isdir(out_dir):
489489
logging.info('Creating directory {}'.format(args.outdir))
490-
os.mkdir(out_dir, 0o755)
490+
os.makedirs(out_dir, 0o755)
491491
tmp_dir = os.path.join(out_dir, '__tmp')
492492
if not os.path.isdir(tmp_dir):
493493
logging.info('Creating directory {}'.format(args.outdir))
494-
os.mkdir(tmp_dir, 0o755)
494+
os.makedirs(tmp_dir, 0o755)
495495

496496
taxonomy_file = args.taxonomy
497497

mob_suite/mob_init.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def main():
126126

127127

128128
if os.path.exists(database_directory) == False:
129-
os.mkdir(database_directory)
129+
os.makedirs(database_directory)
130130
else:
131131
logger.info("Database directory folder already exists at {}".format(database_directory))
132132

@@ -164,8 +164,15 @@ def main():
164164

165165
logger.info('Initializing databases...this will take some time')
166166
# Find available threads and use the maximum number available for mash sketch but cap it at 32
167-
num_threads = min(multiprocessing.cpu_count(), 32)
168-
167+
try:
168+
num_threads = len(os.sched_getaffinity(0))
169+
except AttributeError:
170+
num_threads = multiprocessing.cpu_count()
171+
172+
if num_threads > 32:
173+
num_threads = 32
174+
if num_threads < 1:
175+
num_threads = 1
169176

170177
if not os.path.exists(database_directory):
171178
os.makedirs(database_directory)

mob_suite/mob_recon.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from mob_suite.constants import \
1515
MOB_CLUSTER_INFO_HEADER, \
1616
MOB_RECON_INFO_HEADER, \
17-
ETE3DBTAXAFILE, \
1817
default_database_dir, \
1918
LOG_FORMAT, \
2019
LIT_PLASMID_TAXONOMY_HEADER
@@ -29,7 +28,7 @@
2928
read_sequence_info, \
3029
fixStart, \
3130
calc_md5, \
32-
GC, \
31+
gc_fraction, \
3332
ETE3_db_status_check, \
3433
writeReport, \
3534
dict_from_alt_key_list, \
@@ -998,7 +997,6 @@ def main():
998997
orit_blast_results = os.path.join(tmp_dir, 'orit_blast_results.txt')
999998
repetitive_blast_results = os.path.join(tmp_dir, 'repetitive_blast_results.txt')
1000999
contig_blast_results = os.path.join(tmp_dir, 'contig_blast_results.txt')
1001-
contig_blast_results = os.path.join(tmp_dir, 'contig_blast_results.txt')
10021000
prefix = None
10031001
if args.prefix is not None:
10041002
prefix = args.prefix
@@ -1010,9 +1008,7 @@ def main():
10101008
logger.info('Analysis directory {}'.format(args.outdir))
10111009

10121010
database_dir = os.path.abspath(args.database_directory)
1013-
1014-
1015-
1011+
print
10161012

10171013
if database_dir == default_database_dir:
10181014
plasmid_ref_db = args.plasmid_db
@@ -1043,27 +1039,26 @@ def main():
10431039
else:
10441040
sample_id = args.sample_id
10451041

1046-
verify_init(logger, database_dir)
10471042

10481043
run_overhang = args.run_overhang
10491044
unicycler_contigs = args.unicycler_contigs
10501045

10511046
# initialize analysis directory
10521047
if not os.path.isdir(args.outdir):
1053-
os.mkdir(args.outdir, 0o755)
1048+
os.makedirs(args.outdir, 0o755)
10541049

10551050
elif not args.force:
10561051
logger.error("Error output directory exists, please specify a new directory or use --force to overwrite")
10571052
sys.exit(-1)
10581053
else:
10591054
shutil.rmtree(args.outdir)
1060-
os.mkdir(args.outdir, 0o755)
1055+
os.makedirs(args.outdir, 0o755)
10611056

10621057
if not os.path.isdir(tmp_dir):
1063-
os.mkdir(tmp_dir, 0o755)
1058+
os.makedirs(tmp_dir, 0o755)
10641059
else:
10651060
shutil.rmtree(tmp_dir)
1066-
os.mkdir(tmp_dir, 0o755)
1061+
os.makedirs(tmp_dir, 0o755)
10671062

10681063
# Initialize clustering distance thresholds
10691064
if not (args.primary_cluster_dist >= 0 and args.primary_cluster_dist <= 1):
@@ -1126,7 +1121,7 @@ def main():
11261121
for feature in MOB_RECON_INFO_HEADER:
11271122
contig_info[id][feature] = ''
11281123
contig_info[id]['md5'] = calc_md5(seq)
1129-
contig_info[id]['gc'] = GC(seq)
1124+
contig_info[id]['gc'] = gc_fraction(seq)
11301125
contig_info[id]['size'] = len(seq)
11311126
contig_info[id]['contig_id'] = id
11321127
contig_info[id]['sample_id'] = sample_id
@@ -1398,7 +1393,7 @@ def main():
13981393
mobtyper_report = os.path.join(out_dir, "mobtyper_results.txt")
13991394
if prefix is not None:
14001395
mobtyper_report = os.path.join(out_dir, "{}.mobtyper_results.txt".format(prefix))
1401-
build_mobtyper_report(contig_memberships['plasmid'], out_dir, mobtyper_report,contig_seqs, ncbi, lit)
1396+
build_mobtyper_report(contig_memberships['plasmid'], out_dir, mobtyper_report,contig_seqs, ncbi, lit,ETE3DBTAXAFILE)
14021397

14031398
writeReport(results, MOB_RECON_INFO_HEADER, contig_report)
14041399

mob_suite/mob_typer.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
read_sequence_info, \
1919
writeReport, \
2020
sort_biomarkers, \
21-
ETE3_db_status_check, \
2221
calc_md5, \
23-
GC, \
22+
gc_fraction, \
2423
read_fasta_dict, \
2524
identify_biomarkers, \
2625
parseMash, \
@@ -31,11 +30,9 @@
3130
blast_mge, \
3231
writeMGEresults
3332

34-
from mob_suite.constants import ETE3DBTAXAFILE, \
35-
MOB_TYPER_REPORT_HEADER, \
33+
from mob_suite.constants import MOB_TYPER_REPORT_HEADER, \
3634
MOB_CLUSTER_INFO_HEADER, \
3735
default_database_dir, \
38-
ETE3_LOCK_FILE, \
3936
LIT_PLASMID_TAXONOMY_HEADER
4037

4138

@@ -187,7 +184,7 @@ def main():
187184
tmp_dir = args.analysis_dir
188185

189186
if not os.path.isdir(tmp_dir):
190-
os.mkdir(tmp_dir, 0o755)
187+
os.makedirs(tmp_dir, 0o755)
191188

192189
if not isinstance(args.num_threads, int):
193190
logger.info('Error number of threads must be an integer, you specified "{}"'.format(args.num_threads))
@@ -218,6 +215,7 @@ def main():
218215
primary_distance = float(args.primary_cluster_dist)
219216

220217
min_length = int(args.min_length)
218+
ETE3DBTAXAFILE = os.path.abspath(database_dir + "/taxa.sqlite")
221219

222220
if database_dir == default_database_dir:
223221
mob_ref = args.plasmid_mob
@@ -236,7 +234,7 @@ def main():
236234
mpf_ref = os.path.join(database_dir, 'mpf.proteins.faa')
237235
plasmid_orit = os.path.join(database_dir, 'orit.fas')
238236
repetitive_mask_file = os.path.join(database_dir, 'repetitive.dna.fas')
239-
ETE3DBTAXAFILE = os.path.abspath(database_dir + "/taxa.sqlite")
237+
240238

241239
LIT_PLASMID_TAXONOMY_FILE = os.path.join(database_dir, "host_range_literature_plasmidDB.txt")
242240
NCBI_PLASMID_TAXONOMY_FILE = plasmid_meta
@@ -340,7 +338,7 @@ def main():
340338
contig_info[id][feature] = ''
341339
seq = str(record.seq)
342340
contig_info[id]['md5'] = calc_md5(seq)
343-
contig_info[id]['gc'] = GC(seq)
341+
contig_info[id]['gc'] = gc_fraction(seq)
344342
contig_info[id]['size'] = len(seq)
345343
contig_info[id]['contig_id'] = id
346344
contig_info[id]['sample_id'] = sample_id
@@ -510,7 +508,7 @@ def main():
510508
record['relaxase_type_accession(s)'] = ",".join(record['relaxase_type_accession(s)'])
511509

512510
host_range = hostrange(record['rep_type(s)'].split(','), record['relaxase_type_accession(s)'].split(','),
513-
mob_cluster_id, ncbi, lit)
511+
mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE)
514512

515513
for field in host_range:
516514
record[field] = host_range[field]

mob_suite/utils.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from Bio import SeqIO
2-
from Bio.SeqUtils import GC
2+
from Bio.SeqUtils import gc_fraction
33
from mob_suite.blast import BlastRunner
44
from mob_suite.blast import BlastReader
55
import os, re, time
@@ -11,8 +11,7 @@
1111
from ete3 import NCBITaxa
1212
from mob_suite.constants import \
1313
MOB_TYPER_REPORT_HEADER, \
14-
MGE_INFO_HEADER, \
15-
ETE3DBTAXAFILE
14+
MGE_INFO_HEADER
1615

1716

1817
def getAssocValues(query_list_values, look_up_key, value_key, data):
@@ -65,30 +64,6 @@ def parseMash(mash_results):
6564
return hits
6665

6766

68-
def getHeirarchy(taxid):
69-
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
70-
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
71-
initETE3Database()
72-
73-
ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE)
74-
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
75-
logging.error(
76-
"Tried ete3 init, but still was not able to find taxa.sqlite file for ete3 lib in {}. Aborting".format(
77-
ETE3DBTAXAFILE))
78-
return ['-', '-']
79-
80-
if not isinstance(taxid, int):
81-
return {'names': [], 'ranks': []}
82-
83-
lineage = ncbi.get_lineage(taxid)
84-
names = ncbi.get_taxid_translator(lineage)
85-
ranks = []
86-
for id in lineage:
87-
ranks.append(ncbi.get_rank(id))
88-
89-
return {'names': names, 'ranks': names}
90-
91-
9267
def filter_invalid_taxids(taxids):
9368
filtered = []
9469
for i in taxids:
@@ -104,7 +79,7 @@ def filter_invalid_taxids(taxids):
10479
return filtered
10580

10681

107-
def getHeirarchy(taxid):
82+
def getHeirarchy(taxid,ETE3DBTAXAFILE):
10883
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
10984
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
11085
initETE3Database()
@@ -128,7 +103,7 @@ def getHeirarchy(taxid):
128103
return {'names': names, 'ranks': names}
129104

130105

131-
def getTaxid(taxon):
106+
def getTaxid(taxon,ETE3DBTAXAFILE):
132107
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
133108
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
134109
initETE3Database()
@@ -146,7 +121,7 @@ def getTaxid(taxon):
146121

147122

148123

149-
def NamesToTaxIDs(names):
124+
def NamesToTaxIDs(names,ETE3DBTAXAFILE):
150125
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
151126
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
152127
initETE3Database(ETE3DBTAXAFILE)
@@ -163,7 +138,7 @@ def NamesToTaxIDs(names):
163138

164139

165140

166-
def getTaxonConvergence(taxids):
141+
def getTaxonConvergence(taxids,ETE3DBTAXAFILE):
167142
if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE):
168143
logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE))
169144
initETE3Database(ETE3DBTAXAFILE)
@@ -231,7 +206,7 @@ def getTaxonConvergence(taxids):
231206
return (['-', '-'])
232207

233208

234-
def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit):
209+
def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE):
235210
host_range_predictions = {
236211
'observed_host_range_ncbi_name': '',
237212
'observed_host_range_ncbi_rank': '',
@@ -276,25 +251,25 @@ def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit):
276251
ncbi_unique_taxids = filter_invalid_taxids(
277252
list(set(ncbi_replicon_taxids + ncbi_cluster_taxids + ncbi_relaxase_taxids)))
278253
host_range_predictions['observed_host_range_ncbi_rank'], host_range_predictions[
279-
'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids)
254+
'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE)
280255

281256
# Determine taxids associated with literature
282257

283258
lit_unique_taxids = filter_invalid_taxids(list(set(lit_replicon_taxids)))
284259

285260
host_range_predictions['reported_host_range_lit_rank'], host_range_predictions[
286-
'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids)
261+
'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE)
287262

288263
# determine overall host range
289264
overall_taxids = filter_invalid_taxids(list(set(ncbi_unique_taxids + lit_unique_taxids)))
290265
host_range_predictions['predicted_host_range_overall_rank'], host_range_predictions[
291-
'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids)
266+
'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE)
292267

293268
# move host-range prediction up to family when it is at genus or species level
294269
if host_range_predictions['predicted_host_range_overall_rank'] == 'genus' or host_range_predictions[
295270
'predicted_host_range_overall_rank'] == 'species':
296-
taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'])
297-
heir = getHeirarchy(taxid)
271+
taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE)
272+
heir = getHeirarchy(taxid,ETE3DBTAXAFILE)
298273
names = heir['names']
299274
ranks = heir['ranks']
300275

@@ -869,7 +844,7 @@ def calcFastaStatsIndividual(fasta):
869844
id = record.id
870845
seq = record.seq
871846
genome_size = len(seq)
872-
gc = GC(seq)
847+
gc = gc_fraction(seq)
873848
md5 = calc_md5(seq)
874849
stats[id] = {
875850
'total_length': genome_size,
@@ -892,7 +867,7 @@ def calcFastaStats(fasta):
892867
num_seqs += 1
893868
seq = seq + record.seq
894869
genome_size = len(seq)
895-
gc = GC(seq)
870+
gc = gc_fraction(seq)
896871
md5 = calc_md5(seq)
897872

898873
return {
@@ -974,7 +949,7 @@ def determine_mpf_type(hits):
974949
return max(types, key=lambda i: types[i])
975950

976951

977-
def build_mobtyper_report(plasmid_contig_info, out_dir, outfile, seq_dict, ncbi, lit):
952+
def build_mobtyper_report(plasmid_contig_info, out_dir, outfile, seq_dict, ncbi, lit,ETE3DBTAXAFILE):
978953
mob_typer_results = {}
979954
for clust_id in plasmid_contig_info:
980955

@@ -1008,7 +983,7 @@ def build_mobtyper_report(plasmid_contig_info, out_dir, outfile, seq_dict, ncbi,
1008983
cluster_seq = sorted(cluster_seq,key=len)
1009984
seq = "".join(cluster_seq)
1010985
mob_typer_results[clust_id]['md5'] = [calc_md5(seq)]
1011-
mob_typer_results[clust_id]['gc'] = [GC(seq)]
986+
mob_typer_results[clust_id]['gc'] = [gc_fraction(seq)]
1012987
mob_typer_results[clust_id]['size'] = [len(seq)]
1013988
mob_typer_results[clust_id]['num_contigs'] = len(cluster_seq)
1014989

@@ -1089,7 +1064,7 @@ def build_mobtyper_report(plasmid_contig_info, out_dir, outfile, seq_dict, ncbi,
10891064
else:
10901065
mob_cluster_id = '-'
10911066

1092-
host_range = hostrange(rep_types, relaxase_types, mob_cluster_id, ncbi, lit)
1067+
host_range = hostrange(rep_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE)
10931068

10941069
for field in host_range:
10951070
mob_typer_results[clust_id][field] = host_range[field]

mob_suite/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
__version__ = '3.1.0'
1+
__version__ = '3.1.1'
22

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def read(fname):
2929
setup(
3030
name='mob_suite',
3131
include_package_data=True,
32-
version='3.1.0',
32+
version='3.1.1',
3333
python_requires='>=3.7.0,<4',
3434
setup_requires=['pytest-runner'],
3535
tests_require=['pytest'],
@@ -46,7 +46,7 @@ def read(fname):
4646
package_data={'mob_suite': ['config.json']},
4747

4848
install_requires=[
49-
'numpy>=1.11.1,<2',
49+
'numpy>=1.11.1,<1.23.5',
5050
'tables>=3.3.0,<4',
5151
'pandas>=0.22.0,<=1.0.5',
5252
'biopython>=1.70,<2',

0 commit comments

Comments
 (0)