Skip to content

Commit 9781ea8

Browse files
committed
fix linting; fix tests
1 parent 7f6173f commit 9781ea8

File tree

18 files changed

+139
-337
lines changed

18 files changed

+139
-337
lines changed

src/votuderep.egg-info/PKG-INFO

Lines changed: 7 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,14 @@ Dynamic: license-file
3434

3535
# votuderep
3636

37-
A Python CLI tool for dereplicating and filtering viral contigs (vOTUs - viral Operational Taxonomic Units).
37+
A Python CLI tool for dereplicating and filtering viral contigs (vOTUs - viral Operational Taxonomic Units)
38+
using the CheckV method.
3839

3940
## Features
4041

4142
- **Dereplicate vOTUs**: Remove redundant viral sequences using BLAST-based ANI clustering
4243
- **Filter by CheckV metrics**: Filter viral contigs based on quality, completeness, and other metrics
43-
- **Rich CLI interface**: Beautiful, user-friendly command-line interface powered by rich-click
44-
- **Modular design**: Well-structured codebase with separation of concerns
45-
- **Type-safe**: Written with type hints for better code quality
44+
- ...
4645

4746
## Requirements
4847

@@ -80,7 +79,6 @@ sudo apt-get install ncbi-blast+
8079
brew install blast
8180
```
8281

83-
Alternatively, download from [NCBI](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download).
8482

8583
## Usage
8684

@@ -210,86 +208,18 @@ Note: "Not-determined" sequences are included by default unless `--exclude-undet
210208
- `--version`: Show version and exit
211209
- `--help`: Show help message
212210

213-
## Project Structure
214-
215-
```
216-
votuderep/
217-
├── pyproject.toml # Modern Python packaging configuration
218-
├── README.md # This file
219-
├── src/
220-
│ └── votuderep/
221-
│ ├── __init__.py # Package initialization
222-
│ ├── __main__.py # Entry point for python -m votuderep
223-
│ ├── cli.py # Main CLI setup
224-
│ ├── commands/ # Subcommands
225-
│ │ ├── __init__.py
226-
│ │ ├── derep.py # Dereplication command
227-
│ │ └── filter.py # Filtering command
228-
│ ├── core/ # Business logic
229-
│ │ ├── __init__.py
230-
│ │ ├── blast.py # BLAST operations
231-
│ │ ├── dereplication.py # ANI calculation and clustering
232-
│ │ └── filtering.py # CheckV filtering logic
233-
│ └── utils/ # Utilities
234-
│ ├── __init__.py
235-
│ ├── validators.py # Input validation
236-
│ ├── io.py # File I/O helpers
237-
│ └── logging.py # Logging setup
238-
└── tests/ # Test suite
239-
├── test_derep.py
240-
└── test_filter.py
241-
```
242-
243-
## Development
244-
245-
### Install development dependencies
246-
247-
```bash
248-
pip install -e ".[dev]"
249-
```
250-
251-
### Run tests
252-
253-
```bash
254-
pytest
255-
```
256-
257-
### Code formatting
258-
259-
```bash
260-
# Format code
261-
black src/
262-
263-
# Lint code
264-
ruff check src/
265-
```
266-
267-
## Environment Variables
268-
269-
- `VOTUDEREP_BLASTN_PATH`: Custom path to blastn executable
270-
271211
## License
272212

273213
MIT License - See LICENSE file for details
274214

275-
## Citation
276-
277-
If you use votuderep in your research, please cite:
278-
279-
```
280-
[Citation information to be added]
281-
```
282-
215+
283216
## Contributing
284217

285218
Contributions are welcome! Please feel free to submit a Pull Request.
286219

287220
## Authors
288221

289-
- Your Name (your.email@example.com)
290-
291-
## Acknowledgments
222+
Andrea Telatin & QIB Core Bioinformatics
292223

293-
- BLAST+ for sequence comparison
294-
- CheckV for viral genome quality assessment
295-
- Rich and rich-click for beautiful CLI output
224+
©️ Quadram Institute Bioscience 2025
225+

src/votuderep/cli.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,7 @@
2222

2323
@click.group()
2424
@click.version_option(version=__version__, prog_name="votuderep")
25-
@click.option(
26-
"-v", "--verbose",
27-
is_flag=True,
28-
help="Enable verbose logging"
29-
)
25+
@click.option("-v", "--verbose", is_flag=True, help="Enable verbose logging")
3026
@click.pass_context
3127
def cli(ctx, verbose: bool):
3228
"""
@@ -39,7 +35,7 @@ def cli(ctx, verbose: bool):
3935
"""
4036
# Ensure context object exists
4137
ctx.ensure_object(dict)
42-
ctx.obj['verbose'] = verbose
38+
ctx.obj["verbose"] = verbose
4339

4440
# Setup logger
4541
setup_logger(verbose=verbose)

src/votuderep/commands/derep.py

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
check_blastn,
1717
validate_file_exists,
1818
validate_percentage,
19-
VotuDerepError
19+
VotuDerepError,
2020
)
2121
from ..utils.io import filter_sequences
2222

@@ -42,7 +42,7 @@ def get_temp_directory(tmp_arg: str) -> str:
4242
return tmp_arg
4343

4444
# Try common temp directories
45-
for temp_dir in [os.environ.get('TEMP'), os.environ.get('TMP'), '/tmp', '.']:
45+
for temp_dir in [os.environ.get("TEMP"), os.environ.get("TMP"), "/tmp", "."]:
4646
if temp_dir and os.path.isdir(temp_dir):
4747
return temp_dir
4848

@@ -51,46 +51,36 @@ def get_temp_directory(tmp_arg: str) -> str:
5151

5252
@click.command(name="derep")
5353
@click.option(
54-
"-i", "--input",
54+
"-i",
55+
"--input",
5556
required=True,
5657
type=click.Path(exists=True, dir_okay=False),
57-
help="Input FASTA file containing vOTUs"
58+
help="Input FASTA file containing vOTUs",
5859
)
5960
@click.option(
60-
"-o", "--output",
61+
"-o",
62+
"--output",
6163
default="dereplicated_vOTUs.fasta",
6264
type=click.Path(dir_okay=False),
63-
help="Output FASTA file with dereplicated vOTUs"
64-
)
65-
@click.option(
66-
"-t", "--threads",
67-
default=2,
68-
type=int,
69-
help="Number of threads for BLAST"
65+
help="Output FASTA file with dereplicated vOTUs",
7066
)
67+
@click.option("-t", "--threads", default=2, type=int, help="Number of threads for BLAST")
7168
@click.option(
7269
"--tmp",
7370
default="",
7471
type=str,
75-
help="Directory for temporary files (default: $TEMP or /tmp or ./)"
72+
help="Directory for temporary files (default: $TEMP or /tmp or ./)",
7673
)
7774
@click.option(
78-
"--min-ani",
79-
default=95.0,
80-
type=float,
81-
help="Minimum ANI to consider two vOTUs as the same"
75+
"--min-ani", default=95.0, type=float, help="Minimum ANI to consider two vOTUs as the same"
8276
)
8377
@click.option(
8478
"--min-tcov",
8579
default=85.0,
8680
type=float,
87-
help="Minimum target coverage to consider two vOTUs as the same"
88-
)
89-
@click.option(
90-
"--keep",
91-
is_flag=True,
92-
help="Keep the temporary directory after completion"
81+
help="Minimum target coverage to consider two vOTUs as the same",
9382
)
83+
@click.option("--keep", is_flag=True, help="Keep the temporary directory after completion")
9484
@click.pass_context
9585
def derep(
9686
ctx,
@@ -100,7 +90,7 @@ def derep(
10090
tmp: str,
10191
min_ani: float,
10292
min_tcov: float,
103-
keep: bool
93+
keep: bool,
10494
):
10595
"""
10696
Dereplicate vOTUs using BLAST and ANI clustering.
@@ -154,7 +144,7 @@ def derep(
154144
TextColumn("[progress.description]{task.description}"),
155145
BarColumn(),
156146
TimeElapsedColumn(),
157-
console=console
147+
console=console,
158148
) as progress:
159149

160150
# Step 1: Create BLAST database
@@ -166,12 +156,7 @@ def derep(
166156
# Step 2: Run BLAST
167157
task2 = progress.add_task(f"[cyan]Running BLASTN ({threads} threads)...", total=None)
168158
blast_output = os.path.join(temp_dir, "blast.tsv")
169-
run_blastn(
170-
query=input,
171-
database=db_path,
172-
output=blast_output,
173-
threads=threads
174-
)
159+
run_blastn(query=input, database=db_path, output=blast_output, threads=threads)
175160
progress.update(task2, completed=True)
176161

177162
# Step 3: Dereplicate sequences
@@ -182,16 +167,14 @@ def derep(
182167
blast_file=blast_output,
183168
output_ani=ani_output,
184169
min_ani=min_ani,
185-
min_tcov=min_tcov
170+
min_tcov=min_tcov,
186171
)
187172
progress.update(task3, completed=True)
188173

189174
# Step 4: Write output
190175
task4 = progress.add_task("[cyan]Writing dereplicated sequences...", total=None)
191176
num_written = filter_sequences(
192-
file_path=input,
193-
sequence_ids=representative_ids,
194-
output_path=output
177+
file_path=input, sequence_ids=representative_ids, output_path=output
195178
)
196179
progress.update(task4, completed=True)
197180

@@ -206,7 +189,7 @@ def derep(
206189

207190
except Exception as e:
208191
console.print(f"\n[bold red]Error during dereplication:[/bold red] {e}")
209-
if ctx.obj.get('verbose'):
192+
if ctx.obj.get("verbose"):
210193
console.print_exception()
211194
raise click.Abort()
212195

0 commit comments

Comments
 (0)