Skip to content

Commit 19a2b92

Browse files
Merge pull request #103 from jeromekelleher/cli-refinements
Cli refinements
2 parents c054caa + c8b00c0 commit 19a2b92

File tree

4 files changed

+376
-124
lines changed

4 files changed

+376
-124
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 0.0.3 2024-03-28
2+
3+
- Various refinements to the CLI
4+
15
# 0.0.2 2024-03-27
26

37
- Merged 1D and 2D encode steps into one, and change rate reporting to bytes

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ and will always work.
3131

3232
## vcf2zarr
3333

34+
3435
Convert a VCF to zarr format:
3536

3637
```
@@ -72,6 +73,21 @@ vcf2zarr encode tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
7273
Use the ``-p, --worker-processes`` argument to control the number of workers used
7374
in the ``explode`` and ``encode`` phases.
7475

76+
### Shell completion
77+
78+
To enable shell completion for a particular session in Bash do:
79+
80+
```
81+
eval "$(_VCF2ZARR_COMPLETE=bash_source vcf2zarr)"
82+
```
83+
84+
If you add this to your ``.bashrc`` vcf2zarr shell completion should available
85+
in all new shell sessions.
86+
87+
See the [Click documentation](https://click.palletsprojects.com/en/8.1.x/shell-completion/#enabling-completion)
88+
for instructions on how to enable completion in other shells.
89+
a
90+
7591
## plink2zarr
7692

7793
Convert a plink ``.bed`` file to zarr format. **This is incomplete**

bio2zarr/cli.py

Lines changed: 79 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import logging
2+
import os
3+
import pathlib
4+
import shutil
5+
16
import click
27
import tabulate
38
import coloredlogs
@@ -8,6 +13,9 @@
813
from . import provenance
914

1015

16+
logger = logging.getLogger(__name__)
17+
18+
1119
class NaturalOrderGroup(click.Group):
1220
"""
1321
List commands in the order they are provided in the help text.
@@ -18,8 +26,32 @@ def list_commands(self, ctx):
1826

1927

2028
# Common arguments/options
29+
vcfs = click.argument(
30+
"vcfs", nargs=-1, required=True, type=click.Path(exists=True, dir_okay=False)
31+
)
32+
33+
new_icf_path = click.argument(
34+
"icf_path", type=click.Path(file_okay=False, dir_okay=True)
35+
)
36+
37+
icf_path = click.argument(
38+
"icf_path", type=click.Path(exists=True, file_okay=False, dir_okay=True)
39+
)
40+
41+
new_zarr_path = click.argument(
42+
"zarr_path", type=click.Path(file_okay=False, dir_okay=True)
43+
)
44+
2145
verbose = click.option("-v", "--verbose", count=True, help="Increase verbosity")
2246

47+
force = click.option(
48+
"-f",
49+
"--force",
50+
is_flag=True,
51+
flag_value=True,
52+
help="Force overwriting of existing directories",
53+
)
54+
2355
version = click.version_option(version=f"{provenance.__version__}")
2456

2557
worker_processes = click.option(
@@ -64,41 +96,62 @@ def setup_logging(verbosity):
6496
coloredlogs.install(level=level)
6597

6698

99+
def check_overwrite_dir(path, force):
100+
path = pathlib.Path(path)
101+
if path.exists():
102+
if not force:
103+
click.confirm(
104+
f"Do you want to overwrite {path}? (use --force to skip this check)",
105+
abort=True,
106+
)
107+
# These trees can be mondo-big and on slow file systems, so it's entirely
108+
# feasible that the delete would fail or be killed. This makes it less likely
109+
# that partially deleted paths are mistaken for good paths.
110+
tmp_delete_path = path.with_suffix(f"{path.suffix}.{os.getpid()}.DELETING")
111+
logger.info(f"Deleting {path} (renamed to {tmp_delete_path} while in progress)")
112+
os.rename(path, tmp_delete_path)
113+
shutil.rmtree(tmp_delete_path)
114+
115+
67116
@click.command
68-
@click.argument("vcfs", nargs=-1, required=True)
69-
@click.argument("zarr_path", type=click.Path())
117+
@vcfs
118+
@new_icf_path
119+
@force
70120
@verbose
71121
@worker_processes
72122
@column_chunk_size
73-
def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
123+
def explode(vcfs, icf_path, force, verbose, worker_processes, column_chunk_size):
74124
"""
75125
Convert VCF(s) to intermediate columnar format
76126
"""
77127
setup_logging(verbose)
128+
check_overwrite_dir(icf_path, force)
78129
vcf.explode(
79130
vcfs,
80-
zarr_path,
131+
icf_path,
81132
worker_processes=worker_processes,
82133
column_chunk_size=column_chunk_size,
83134
show_progress=True,
84135
)
85136

86137

87138
@click.command
88-
@click.argument("vcfs", nargs=-1, required=True)
89-
@click.argument("icf_path", type=click.Path())
90-
@click.argument("num_partitions", type=int)
139+
@vcfs
140+
@new_icf_path
141+
@click.argument("num_partitions", type=click.IntRange(min=1))
142+
@force
91143
@column_chunk_size
92144
@verbose
93145
@worker_processes
94146
def dexplode_init(
95-
vcfs, icf_path, num_partitions, column_chunk_size, verbose, worker_processes
147+
vcfs, icf_path, num_partitions, force, column_chunk_size, verbose, worker_processes
96148
):
97149
"""
98-
Initial step for parallel conversion of VCF(s) to intermediate columnar format
150+
Initial step for distributed conversion of VCF(s) to intermediate columnar format
99151
over the requested number of paritions.
100152
"""
101153
setup_logging(verbose)
154+
check_overwrite_dir(icf_path, force)
102155
num_partitions = vcf.explode_init(
103156
icf_path,
104157
vcfs,
@@ -111,12 +164,12 @@ def dexplode_init(
111164

112165

113166
@click.command
114-
@click.argument("icf_path", type=click.Path())
115-
@click.argument("partition", type=int)
167+
@icf_path
168+
@click.argument("partition", type=click.IntRange(min=0))
116169
@verbose
117170
def dexplode_partition(icf_path, partition, verbose):
118171
"""
119-
Convert a VCF partition into intermediate columnar format. Must be called *after*
172+
Convert a VCF partition to intermediate columnar format. Must be called *after*
120173
the ICF path has been initialised with dexplode_init. Partition indexes must be
121174
from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
122175
"""
@@ -129,26 +182,26 @@ def dexplode_partition(icf_path, partition, verbose):
129182
@verbose
130183
def dexplode_finalise(path, verbose):
131184
"""
132-
Final step for parallel conversion of VCF(s) to intermediate columnar format
185+
Final step for distributed conversion of VCF(s) to intermediate columnar format.
133186
"""
134187
setup_logging(verbose)
135188
vcf.explode_finalise(path)
136189

137190

138191
@click.command
139-
@click.argument("icf_path", type=click.Path())
192+
@click.argument("path", type=click.Path())
140193
@verbose
141-
def inspect(icf_path, verbose):
194+
def inspect(path, verbose):
142195
"""
143-
Inspect an intermediate format or Zarr path.
196+
Inspect an intermediate columnar format or Zarr path.
144197
"""
145198
setup_logging(verbose)
146-
data = vcf.inspect(icf_path)
199+
data = vcf.inspect(path)
147200
click.echo(tabulate.tabulate(data, headers="keys"))
148201

149202

150203
@click.command
151-
@click.argument("icf_path", type=click.Path())
204+
@icf_path
152205
def mkschema(icf_path):
153206
"""
154207
Generate a schema for zarr encoding
@@ -158,8 +211,9 @@ def mkschema(icf_path):
158211

159212

160213
@click.command
161-
@click.argument("icf_path", type=click.Path())
162-
@click.argument("zarr_path", type=click.Path())
214+
@icf_path
215+
@new_zarr_path
216+
@force
163217
@verbose
164218
@click.option("-s", "--schema", default=None, type=click.Path(exists=True))
165219
@variants_chunk_size
@@ -186,6 +240,7 @@ def mkschema(icf_path):
186240
def encode(
187241
icf_path,
188242
zarr_path,
243+
force,
189244
verbose,
190245
schema,
191246
variants_chunk_size,
@@ -198,10 +253,11 @@ def encode(
198253
Encode intermediate columnar format (see explode) to vcfzarr.
199254
"""
200255
setup_logging(verbose)
256+
check_overwrite_dir(zarr_path, force)
201257
vcf.encode(
202258
icf_path,
203259
zarr_path,
204-
schema,
260+
schema_path=schema,
205261
variants_chunk_size=variants_chunk_size,
206262
samples_chunk_size=samples_chunk_size,
207263
max_v_chunks=max_variant_chunks,
@@ -212,8 +268,8 @@ def encode(
212268

213269

214270
@click.command(name="convert")
215-
@click.argument("vcfs", nargs=-1, required=True)
216-
@click.argument("zarr_path", type=click.Path())
271+
@vcfs
272+
@new_zarr_path
217273
@variants_chunk_size
218274
@samples_chunk_size
219275
@verbose
@@ -235,17 +291,6 @@ def convert_vcf(
235291
)
236292

237293

238-
@click.command
239-
@click.argument("vcfs", nargs=-1, required=True)
240-
@click.argument("zarr_path", type=click.Path())
241-
def validate(vcfs, zarr_path):
242-
"""
243-
Development only, do not use. Will be removed before release.
244-
"""
245-
# FIXME! Will silently not look at remaining VCFs
246-
vcf.validate(vcfs[0], zarr_path, show_progress=True)
247-
248-
249294
@version
250295
@click.group(cls=NaturalOrderGroup)
251296
def vcf2zarr():
@@ -309,7 +354,6 @@ def vcf2zarr():
309354
vcf2zarr.add_command(dexplode_init)
310355
vcf2zarr.add_command(dexplode_partition)
311356
vcf2zarr.add_command(dexplode_finalise)
312-
vcf2zarr.add_command(validate)
313357

314358

315359
@click.command(name="convert")

0 commit comments

Comments
 (0)