Skip to content

Commit 3cd1744

Browse files
Add single argument definition for VCFs, and declare path type
1 parent c054caa commit 3cd1744

File tree

2 files changed

+37
-58
lines changed

2 files changed

+37
-58
lines changed

bio2zarr/cli.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ def list_commands(self, ctx):
1818

1919

2020
# Common arguments/options
21+
vcfs = click.argument(
22+
"vcfs", nargs=-1, required=True, type=click.Path(exists=True, dir_okay=False)
23+
)
24+
25+
icf_path = click.argument("icf_path", type=click.Path())
26+
2127
verbose = click.option("-v", "--verbose", count=True, help="Increase verbosity")
2228

2329
version = click.version_option(version=f"{provenance.__version__}")
@@ -65,7 +71,7 @@ def setup_logging(verbosity):
6571

6672

6773
@click.command
68-
@click.argument("vcfs", nargs=-1, required=True)
74+
@vcfs
6975
@click.argument("zarr_path", type=click.Path())
7076
@verbose
7177
@worker_processes
@@ -85,8 +91,8 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
8591

8692

8793
@click.command
88-
@click.argument("vcfs", nargs=-1, required=True)
89-
@click.argument("icf_path", type=click.Path())
94+
@vcfs
95+
@icf_path
9096
@click.argument("num_partitions", type=int)
9197
@column_chunk_size
9298
@verbose
@@ -95,7 +101,7 @@ def dexplode_init(
95101
vcfs, icf_path, num_partitions, column_chunk_size, verbose, worker_processes
96102
):
97103
"""
98-
Initial step for parallel conversion of VCF(s) to intermediate columnar format
104+
Initial step for distributed conversion of VCF(s) to intermediate columnar format
99105
over the requested number of paritions.
100106
"""
101107
setup_logging(verbose)
@@ -111,12 +117,12 @@ def dexplode_init(
111117

112118

113119
@click.command
114-
@click.argument("icf_path", type=click.Path())
120+
@icf_path
115121
@click.argument("partition", type=int)
116122
@verbose
117123
def dexplode_partition(icf_path, partition, verbose):
118124
"""
119-
Convert a VCF partition into intermediate columnar format. Must be called *after*
125+
Convert a VCF partition to intermediate columnar format. Must be called *after*
120126
the ICF path has been initialised with dexplode_init. Partition indexes must be
121127
from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
122128
"""
@@ -129,26 +135,26 @@ def dexplode_partition(icf_path, partition, verbose):
129135
@verbose
130136
def dexplode_finalise(path, verbose):
131137
"""
132-
Final step for parallel conversion of VCF(s) to intermediate columnar format
138+
Final step for distributed conversion of VCF(s) to intermediate columnar format.
133139
"""
134140
setup_logging(verbose)
135141
vcf.explode_finalise(path)
136142

137143

138144
@click.command
139-
@click.argument("icf_path", type=click.Path())
145+
@click.argument("path", type=click.Path())
140146
@verbose
141-
def inspect(icf_path, verbose):
147+
def inspect(path, verbose):
142148
"""
143-
Inspect an intermediate format or Zarr path.
149+
Inspect an intermediate columnar format or Zarr path.
144150
"""
145151
setup_logging(verbose)
146-
data = vcf.inspect(icf_path)
152+
data = vcf.inspect(path)
147153
click.echo(tabulate.tabulate(data, headers="keys"))
148154

149155

150156
@click.command
151-
@click.argument("icf_path", type=click.Path())
157+
@icf_path
152158
def mkschema(icf_path):
153159
"""
154160
Generate a schema for zarr encoding
@@ -158,7 +164,7 @@ def mkschema(icf_path):
158164

159165

160166
@click.command
161-
@click.argument("icf_path", type=click.Path())
167+
@icf_path
162168
@click.argument("zarr_path", type=click.Path())
163169
@verbose
164170
@click.option("-s", "--schema", default=None, type=click.Path(exists=True))
@@ -212,7 +218,7 @@ def encode(
212218

213219

214220
@click.command(name="convert")
215-
@click.argument("vcfs", nargs=-1, required=True)
221+
@vcfs
216222
@click.argument("zarr_path", type=click.Path())
217223
@variants_chunk_size
218224
@samples_chunk_size
@@ -235,17 +241,6 @@ def convert_vcf(
235241
)
236242

237243

238-
@click.command
239-
@click.argument("vcfs", nargs=-1, required=True)
240-
@click.argument("zarr_path", type=click.Path())
241-
def validate(vcfs, zarr_path):
242-
"""
243-
Development only, do not use. Will be removed before release.
244-
"""
245-
# FIXME! Will silently not look at remaining VCFs
246-
vcf.validate(vcfs[0], zarr_path, show_progress=True)
247-
248-
249244
@version
250245
@click.group(cls=NaturalOrderGroup)
251246
def vcf2zarr():
@@ -309,7 +304,6 @@ def vcf2zarr():
309304
vcf2zarr.add_command(dexplode_init)
310305
vcf2zarr.add_command(dexplode_partition)
311306
vcf2zarr.add_command(dexplode_finalise)
312-
vcf2zarr.add_command(validate)
313307

314308

315309
@click.command(name="convert")

tests/test_cli.py

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,39 @@
99

1010

1111
class TestWithMocks:
12+
vcf_path = "tests/data/vcf/sample.vcf.gz"
13+
1214
@mock.patch("bio2zarr.vcf.explode")
13-
def test_vcf_explode(self, mocked):
15+
def test_vcf_explode(self, mocked, tmp_path):
1416
runner = ct.CliRunner(mix_stderr=False)
1517
result = runner.invoke(
16-
cli.vcf2zarr, ["explode", "source", "dest"], catch_exceptions=False
18+
cli.vcf2zarr, f"explode {self.vcf_path} {tmp_path}", catch_exceptions=False
1719
)
1820
assert result.exit_code == 0
1921
assert len(result.stdout) == 0
2022
assert len(result.stderr) == 0
2123
mocked.assert_called_once_with(
22-
("source",),
23-
"dest",
24+
(self.vcf_path,),
25+
str(tmp_path),
2426
column_chunk_size=64,
2527
worker_processes=1,
2628
show_progress=True,
2729
)
2830

29-
def test_vcf_dexplode_init(self):
31+
def test_vcf_dexplode_init(self, tmp_path):
3032
runner = ct.CliRunner(mix_stderr=False)
3133
with mock.patch("bio2zarr.vcf.explode_init", return_value=5) as mocked:
3234
result = runner.invoke(
3335
cli.vcf2zarr,
34-
["dexplode-init", "source", "dest", "5"],
36+
f"dexplode-init {self.vcf_path} {tmp_path} 5",
3537
catch_exceptions=False,
3638
)
3739
assert result.exit_code == 0
3840
assert len(result.stderr) == 0
3941
assert result.stdout == "5\n"
4042
mocked.assert_called_once_with(
41-
"dest",
42-
("source",),
43+
str(tmp_path),
44+
(self.vcf_path,),
4345
target_num_partitions=5,
4446
worker_processes=1,
4547
column_chunk_size=64,
@@ -125,38 +127,21 @@ def test_convert_vcf(self, mocked):
125127
runner = ct.CliRunner(mix_stderr=False)
126128
result = runner.invoke(
127129
cli.vcf2zarr,
128-
["convert", "vcf_path", "zarr_path"],
130+
f"convert {self.vcf_path} zarr_path",
129131
catch_exceptions=False,
130132
)
131133
assert result.exit_code == 0
132134
assert len(result.stdout) == 0
133135
assert len(result.stderr) == 0
134136
mocked.assert_called_once_with(
135-
("vcf_path",),
137+
(self.vcf_path,),
136138
"zarr_path",
137139
variants_chunk_size=None,
138140
samples_chunk_size=None,
139141
worker_processes=1,
140142
show_progress=True,
141143
)
142144

143-
@mock.patch("bio2zarr.vcf.validate")
144-
def test_validate(self, mocked):
145-
runner = ct.CliRunner(mix_stderr=False)
146-
result = runner.invoke(
147-
cli.vcf2zarr,
148-
["validate", "vcf_path", "zarr_path"],
149-
catch_exceptions=False,
150-
)
151-
assert result.exit_code == 0
152-
assert len(result.stdout) == 0
153-
assert len(result.stderr) == 0
154-
mocked.assert_called_once_with(
155-
"vcf_path",
156-
"zarr_path",
157-
show_progress=True,
158-
)
159-
160145
@mock.patch("bio2zarr.plink.convert")
161146
def test_convert_plink(self, mocked):
162147
runner = ct.CliRunner(mix_stderr=False)
@@ -177,14 +162,14 @@ def test_convert_plink(self, mocked):
177162

178163

179164
class TestVcfEndToEnd:
180-
data_path = "tests/data/vcf/sample.vcf.gz"
165+
vcf_path = "tests/data/vcf/sample.vcf.gz"
181166

182167
def test_dexplode(self, tmp_path):
183168
icf_path = tmp_path / "icf"
184169
runner = ct.CliRunner(mix_stderr=False)
185170
result = runner.invoke(
186171
cli.vcf2zarr,
187-
f"dexplode-init {self.data_path} {icf_path} 5",
172+
f"dexplode-init {self.vcf_path} {icf_path} 5",
188173
catch_exceptions=False,
189174
)
190175
assert result.exit_code == 0
@@ -213,7 +198,7 @@ def test_explode(self, tmp_path):
213198
icf_path = tmp_path / "icf"
214199
runner = ct.CliRunner(mix_stderr=False)
215200
result = runner.invoke(
216-
cli.vcf2zarr, f"explode {self.data_path} {icf_path}", catch_exceptions=False
201+
cli.vcf2zarr, f"explode {self.vcf_path} {icf_path}", catch_exceptions=False
217202
)
218203
assert result.exit_code == 0
219204
result = runner.invoke(
@@ -228,7 +213,7 @@ def test_encode(self, tmp_path):
228213
zarr_path = tmp_path / "zarr"
229214
runner = ct.CliRunner(mix_stderr=False)
230215
result = runner.invoke(
231-
cli.vcf2zarr, f"explode {self.data_path} {icf_path}", catch_exceptions=False
216+
cli.vcf2zarr, f"explode {self.vcf_path} {icf_path}", catch_exceptions=False
232217
)
233218
assert result.exit_code == 0
234219
result = runner.invoke(
@@ -247,7 +232,7 @@ def test_convert(self, tmp_path):
247232
runner = ct.CliRunner(mix_stderr=False)
248233
result = runner.invoke(
249234
cli.vcf2zarr,
250-
f"convert {self.data_path} {zarr_path}",
235+
f"convert {self.vcf_path} {zarr_path}",
251236
catch_exceptions=False,
252237
)
253238
assert result.exit_code == 0

0 commit comments

Comments
 (0)