Skip to content

Commit d53ce36

Browse files
committed
lint
1 parent 48d003d commit d53ce36

File tree

6 files changed

+30
-33
lines changed

6 files changed

+30
-33
lines changed

src/votuderep/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""votuderep - A CLI tool for dereplicating and filtering viral contigs."""
22

3-
__version__ = "0.1.1"
3+
__version__ = "0.2.0"
44
__author__ = "Andrea Telatin"
55
__license__ = "MIT"
66

src/votuderep/cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def cli(ctx, verbose: bool):
3131
3232
• [bold]derep[/bold]: Dereplicate vOTUs using BLAST and ANI clustering
3333
• [bold]filter[/bold]: Filter FASTA files using CheckV quality metrics
34+
• [bold]trainingdata[/bold]: Download training dataset from the internet
3435
"""
3536
# Ensure context object exists
3637
ctx.ensure_object(dict)
@@ -46,10 +47,12 @@ def main():
4647
# Import commands here to avoid circular imports
4748
from .commands.derep import derep
4849
from .commands.filter import filter_cmd
50+
from .commands.trainingdata import trainingdata
4951

5052
# Register commands
5153
cli.add_command(derep)
5254
cli.add_command(filter_cmd)
55+
cli.add_command(trainingdata)
5356

5457
# Run CLI
5558
cli(obj={})
-2 Bytes
Binary file not shown.
-2 Bytes
Binary file not shown.
-2 Bytes
Binary file not shown.

src/votuderep/commands/trainingdata.py

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Trainingdata command for downloading training datasets."""
22

3-
import logging
43
import os
54
import subprocess
65
import urllib.request
@@ -27,17 +26,17 @@ def download_file(url: str, output_path: str, description: str = "Downloading"):
2726
console=console,
2827
) as progress:
2928
task = progress.add_task(f"{description} {os.path.basename(output_path)}")
30-
29+
3130
def reporthook(block_num, block_size, total_size):
3231
if total_size > 0:
3332
percent = min(100, (block_num * block_size * 100) / total_size)
3433
progress.update(task, completed=percent)
35-
34+
3635
urllib.request.urlretrieve(url, output_path, reporthook)
3736
progress.update(task, completed=100)
38-
37+
3938
logger.info(f"Downloaded: {output_path}")
40-
39+
4140
except Exception as e:
4241
raise VotuDerepError(f"Failed to download {url}: {e}")
4342

@@ -52,19 +51,14 @@ def run_curl(url: str, output_path: str, description: str = "Downloading"):
5251
console=console,
5352
) as progress:
5453
task = progress.add_task(f"{description} {os.path.basename(output_path)}")
55-
54+
5655
cmd = ["curl", "-L", url, "-o", output_path]
57-
process = subprocess.run(
58-
cmd,
59-
capture_output=True,
60-
text=True,
61-
check=True
62-
)
63-
56+
subprocess.run(cmd, capture_output=True, text=True, check=True)
57+
6458
progress.update(task, completed=100)
65-
59+
6660
logger.info(f"Downloaded: {output_path}")
67-
61+
6862
except subprocess.CalledProcessError as e:
6963
raise VotuDerepError(f"Failed to download {url}: {e.stderr}")
7064
except FileNotFoundError:
@@ -83,61 +77,61 @@ def run_curl(url: str, output_path: str, description: str = "Downloading"):
8377
def trainingdata(ctx, outdir: str):
8478
"""
8579
Download training dataset from the internet.
86-
80+
8781
Downloads viral assembly and sequencing reads for training purposes.
8882
"""
8983
verbose = ctx.obj.get("verbose", False)
90-
84+
9185
if verbose:
9286
console.print(f"[blue]Output directory:[/blue] {outdir}")
93-
87+
9488
# Create output directory structure
9589
outdir_path = Path(outdir)
9690
reads_dir = outdir_path / "reads"
97-
91+
9892
try:
9993
reads_dir.mkdir(parents=True, exist_ok=True)
10094
logger.info(f"Created directory structure: {reads_dir}")
101-
95+
10296
console.print("[bold green]Downloading training dataset...[/bold green]")
103-
97+
10498
# Download assembly
10599
assembly_url = "https://zenodo.org/api/records/10650983/files/illumina_sample_pool_megahit.fa.gz/content"
106100
assembly_path = outdir_path / "human_gut_assembly.fa.gz"
107-
101+
108102
console.print("\n[blue]Downloading assembly...[/blue]")
109103
download_file(assembly_url, str(assembly_path), "Downloading assembly")
110-
104+
111105
# Download reads
112106
console.print("\n[blue]Downloading sequencing reads...[/blue]")
113107
ebi_base = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq"
114-
108+
115109
reads_to_download = [
116110
("ERR6797445", "ERR679/005/ERR6797445"),
117-
("ERR6797444", "ERR679/004/ERR6797444"),
111+
("ERR6797444", "ERR679/004/ERR6797444"),
118112
("ERR6797443", "ERR679/003/ERR6797443"),
119113
]
120-
114+
121115
for sample_id, path_suffix in reads_to_download:
122116
for read_num in ["1", "2"]:
123117
url = f"{ebi_base}/{path_suffix}/{sample_id}_{read_num}.fastq.gz"
124118
output_file = reads_dir / f"{sample_id}_R{read_num}.fastq.gz"
125-
119+
126120
run_curl(url, str(output_file), f"Downloading {sample_id}_R{read_num}")
127-
128-
console.print(f"\n[bold green]✓ Training dataset downloaded successfully![/bold green]")
121+
122+
console.print("\n[bold green]✓ Training dataset downloaded successfully![/bold green]")
129123
console.print(f"[blue]Files saved to:[/blue] {outdir_path.absolute()}")
130-
124+
131125
# Summary of downloaded files
132126
if verbose:
133127
console.print("\n[bold]Downloaded files:[/bold]")
134128
for file_path in sorted(outdir_path.rglob("*")):
135129
if file_path.is_file():
136130
size = file_path.stat().st_size / (1024 * 1024) # MB
137131
console.print(f" • {file_path.relative_to(outdir_path)} ({size:.1f} MB)")
138-
132+
139133
except Exception as e:
140134
if isinstance(e, VotuDerepError):
141135
raise
142136
else:
143-
raise VotuDerepError(f"Failed to download training dataset: {e}")
137+
raise VotuDerepError(f"Failed to download training dataset: {e}")

0 commit comments

Comments
 (0)