Skip to content

Commit 40543b5

Browse files
authored
update to v1.2.5
1 parent dd784b7 commit 40543b5

File tree

3 files changed

+50
-24
lines changed

3 files changed

+50
-24
lines changed

bin/cidder

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def create_parser():
8888
Dereplicated_Representative_Genomes/ folder will be the original unprocesed genomes.
8989
""", formatter_class=argparse.RawTextHelpFormatter)
9090

91-
parser.add_argument('-g', '--genomes', nargs='+', help='Genome assembly files in (gzipped) FASTA format\n(accepted suffices are: *.fasta,\n*.fa, *.fas, or *.fna) [Optional].', required=False, default=[])
91+
parser.add_argument('-g', '--genomes', nargs='+', help='Genome assembly file paths or paths to containing\ndirectories. Files should be in FASTA format and can be gzipped\n(accepted suffices are: *.fasta,\n*.fa, *.fas, or *.fna) [Optional].', required=False, default=[])
9292
parser.add_argument('-t', '--taxa-name', help='Genus or species identifier from GTDB for which to\ndownload genomes for and include in\ndereplication analysis [Optional].', required=False, default=None)
9393
parser.add_argument('-r', '--gtdb-release', help='Which GTDB release to use if -t argument issued [Default is R220].', default="R220")
9494
parser.add_argument('-o', '--output-directory', help='Output directory.', required=True)
@@ -267,19 +267,30 @@ def cidder_main():
267267
gf_listing_handle.write(renamed_gfile + '\n')
268268
gf_listing_handle.close()
269269

270+
270271
if genomes:
271-
symlink_genomes_directory = outdir + 'local_genomes/'
272-
util.setupDirectories([symlink_genomes_directory])
273272
gf_listing_handle = open(all_genomes_listing_file, 'a+')
274273
for gf in genomes:
275-
gf = os.path.abspath(gf)
276-
suffix = gf.split('.')[-1].lower()
277-
if gf.endswith('.gz'):
278-
suffix = '.gz'.join(gf.split('.gz')[:-1]).split('.')[-1].lower()
279-
if not suffix in ACCEPTED_SUFFICES: continue
280-
if sanity_check:
281-
assert(util.is_fasta(gf))
282-
gf_listing_handle.write(gf + '\n')
274+
if os.path.isfile(gf):
275+
gf = os.path.abspath(gf)
276+
suffix = gf.split('.')[-1].lower()
277+
if gf.endswith('.gz'):
278+
suffix = '.gz'.join(gf.split('.gz')[:-1]).split('.')[-1].lower()
279+
if not suffix in ACCEPTED_SUFFICES: continue
280+
if sanity_check:
281+
assert(util.is_fasta(gf))
282+
gf_listing_handle.write(gf + '\n')
283+
else:
284+
gf_dir = os.path.abspath(gf) + '/'
285+
for gdf in os.listdir(gf_dir):
286+
gdf = os.path.abspath(gf_dir + gdf)
287+
suffix = gdf.split('.')[-1].lower()
288+
if gdf.endswith('.gz'):
289+
suffix = '.gz'.join(gdf.split('.gz')[:-1]).split('.')[-1].lower()
290+
if not suffix in ACCEPTED_SUFFICES: continue
291+
if sanity_check:
292+
assert(util.is_fasta(gdf))
293+
gf_listing_handle.write(gdf + '\n')
283294
gf_listing_handle.close()
284295

285296
mge_proc_to_unproc_mapping = {}

bin/skder

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def create_parser():
8585
MGEs and enable them to still be selected as representatives.
8686
""", formatter_class=argparse.RawTextHelpFormatter)
8787

88-
parser.add_argument('-g', '--genomes', nargs='+', help='Genome assembly files in (gzipped) FASTA format\n(accepted suffices are: *.fasta,\n*.fa, *.fas, or *.fna) [Optional].', required=False, default=[])
88+
parser.add_argument('-g', '--genomes', nargs='+', help='Genome assembly file paths or paths to containing\ndirectories. Files should be in FASTA format and can be gzipped\n(accepted suffices are: *.fasta,\n*.fa, *.fas, or *.fna) [Optional].', required=False, default=[])
8989
parser.add_argument('-t', '--taxa-name', help='Genus or species identifier from GTDB for which to\ndownload genomes for and include in\ndereplication analysis [Optional].', required=False, default=None)
9090
parser.add_argument('-r', '--gtdb-release', help='Which GTDB release to use if -t argument issued [Default is R220].', default="R220")
9191
parser.add_argument('-o', '--output-directory', help='Output directory.', required=True)
@@ -94,7 +94,7 @@ def create_parser():
9494
parser.add_argument('-tc', '--test-cutoffs', action='store_true', help="Assess clustering using various pre-selected cutoffs.", required=False, default=False)
9595
parser.add_argument('-f', '--aligned-fraction-cutoff', type=float, help="Aligned cutoff threshold for dereplication - only needed by\none genome [Default is 90.0].", required=False, default=90.0)
9696
parser.add_argument('-a', '--max-af-distance-cutoff', type=float, help="Maximum difference for aligned fraction between a pair to\nautomatically disqualify the genome with a higher\nAF from being a representative.", required=False, default=10.0)
97-
parser.add_argument('-p', '--skani-triangle-parameters', help="Options for skani triangle. Note ANI and AF cutoffs\nare specified separately and the -E parameter is always\nrequested. [Default is \"\"].", default="", required=False)
97+
parser.add_argument('-p', '--skani-triangle-parameters', help="Options for skani triangle. Note ANI and AF cutoffs\nare specified separately and the -E parameter is always\nrequested. [Default is \"-s 90.0\"].", default="-s 90.0", required=False)
9898
parser.add_argument('-s', '--sanity-check', action='store_true', help="Confirm each FASTA file provided or downloaded is actually\na FASTA file. Makes it slower, but generally\ngood practice.", required=False, default=False)
9999
parser.add_argument('-fm', '--filter-mge', action='store_true', help="Filter predicted MGE coordinates along genomes before\ndereplication assessment but after N50\ncomputation.", required=False, default=False)
100100
parser.add_argument('-gd', '--genomad-database', help="If filter-mge is specified, it will by default use PhiSpy;\nhowever, if a database directory for\ngeNomad is provided - it will use that instead\nto predict MGEs.", default=None, required=False)
@@ -157,6 +157,11 @@ def skder_main():
157157
except:
158158
sys.stderr.write('GTDB release requested is not valid. Valid options include: %s\n' % ' '.join(VALID_GTDB_RELEASES))
159159
sys.exit(1)
160+
161+
if percent_identity_cutoff < 90.0 and skani_triangle_parameters=="-s 90.0":
162+
screen_cutoff = max(percent_identity_cutoff - 10.0, 0.0)
163+
skani_triangle_parameters = "-s " + str(screen_cutoff)
164+
sys.stderr.write("Warning: ANI threshold requested is lower than 90.0 but the -p\nargument was not changed from the default where skani's screen\nparameter is set to 90.0 - therefore changing to set skani\ntriangle's -s parameter to %f\n" % screen_cutoff)
160165

161166
if os.path.isdir(outdir):
162167
sys.stderr.write("Output directory already exists! Overwriting in 5 seconds...\n")
@@ -273,18 +278,28 @@ def skder_main():
273278
gf_listing_handle.close()
274279

275280
if genomes:
276-
symlink_genomes_directory = outdir + 'local_genomes/'
277-
util.setupDirectories([symlink_genomes_directory])
278281
gf_listing_handle = open(all_genomes_listing_file, 'a+')
279282
for gf in genomes:
280-
gf = os.path.abspath(gf)
281-
suffix = gf.split('.')[-1].lower()
282-
if gf.endswith('.gz'):
283-
suffix = '.gz'.join(gf.split('.gz')[:-1]).split('.')[-1].lower()
284-
if not suffix in ACCEPTED_SUFFICES: continue
285-
if sanity_check:
286-
assert(util.is_fasta(gf))
287-
gf_listing_handle.write(gf + '\n')
283+
if os.path.isfile(gf):
284+
gf = os.path.abspath(gf)
285+
suffix = gf.split('.')[-1].lower()
286+
if gf.endswith('.gz'):
287+
suffix = '.gz'.join(gf.split('.gz')[:-1]).split('.')[-1].lower()
288+
if not suffix in ACCEPTED_SUFFICES: continue
289+
if sanity_check:
290+
assert(util.is_fasta(gf))
291+
gf_listing_handle.write(gf + '\n')
292+
else:
293+
gf_dir = os.path.abspath(gf) + '/'
294+
for gdf in os.listdir(gf_dir):
295+
gdf = os.path.abspath(gf_dir + gdf)
296+
suffix = gdf.split('.')[-1].lower()
297+
if gdf.endswith('.gz'):
298+
suffix = '.gz'.join(gdf.split('.gz')[:-1]).split('.')[-1].lower()
299+
if not suffix in ACCEPTED_SUFFICES: continue
300+
if sanity_check:
301+
assert(util.is_fasta(gdf))
302+
gf_listing_handle.write(gdf + '\n')
288303
gf_listing_handle.close()
289304

290305
number_of_genomes = None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "skDER"
33
authors = [{name="Rauf Salamzade", email="salamzader@gmail.com"}]
4-
version = "1.2.4"
4+
version = "1.2.5"
55
description = "Program to select distinct representatives from an input set of microbial genomes."
66

77
[build-system]

0 commit comments

Comments
 (0)