Skip to content

Commit 7760c57

Browse files
Merge pull request #198 from jeromekelleher/refactor-more
Refactor to use vcf2zarr module
2 parents e42cc38 + 51d71e8 commit 7760c57

File tree

13 files changed

+295
-245
lines changed

13 files changed

+295
-245
lines changed

bio2zarr/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def bio2zarr():
1414
# install individual commands as console scripts. However, this
1515
# is handy for development and for those whose PATHs aren't set
1616
# up in the right way.
17-
bio2zarr.add_command(cli.vcf2zarr)
17+
bio2zarr.add_command(cli.vcf2zarr_main)
1818
bio2zarr.add_command(cli.plink2zarr)
1919
bio2zarr.add_command(cli.vcfpartition)
2020

bio2zarr/cli.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import numcodecs
99
import tabulate
1010

11-
from . import icf, plink, provenance, vcf, vcf_utils
11+
from . import plink, provenance, vcf2zarr, vcf_utils
12+
from .vcf2zarr import icf as icf_mod
1213

1314
logger = logging.getLogger(__name__)
1415

@@ -167,7 +168,7 @@ def check_overwrite_dir(path, force):
167168
def get_compressor(cname):
168169
if cname is None:
169170
return None
170-
config = icf.ICF_DEFAULT_COMPRESSOR.get_config()
171+
config = icf_mod.ICF_DEFAULT_COMPRESSOR.get_config()
171172
config["cname"] = cname
172173
return numcodecs.get_codec(config)
173174

@@ -198,7 +199,7 @@ def explode(
198199
"""
199200
setup_logging(verbose)
200201
check_overwrite_dir(icf_path, force)
201-
icf.explode(
202+
vcf2zarr.explode(
202203
icf_path,
203204
vcfs,
204205
worker_processes=worker_processes,
@@ -235,7 +236,7 @@ def dexplode_init(
235236
"""
236237
setup_logging(verbose)
237238
check_overwrite_dir(icf_path, force)
238-
work_summary = icf.explode_init(
239+
work_summary = vcf2zarr.explode_init(
239240
icf_path,
240241
vcfs,
241242
target_num_partitions=num_partitions,
@@ -263,7 +264,7 @@ def dexplode_partition(icf_path, partition, verbose, one_based):
263264
setup_logging(verbose)
264265
if one_based:
265266
partition -= 1
266-
icf.explode_partition(icf_path, partition)
267+
vcf2zarr.explode_partition(icf_path, partition)
267268

268269

269270
@click.command
@@ -274,7 +275,7 @@ def dexplode_finalise(icf_path, verbose):
274275
Final step for distributed conversion of VCF(s) to intermediate columnar format.
275276
"""
276277
setup_logging(verbose)
277-
icf.explode_finalise(icf_path)
278+
vcf2zarr.explode_finalise(icf_path)
278279

279280

280281
@click.command
@@ -285,7 +286,7 @@ def inspect(path, verbose):
285286
Inspect an intermediate columnar format or Zarr path.
286287
"""
287288
setup_logging(verbose)
288-
data = vcf.inspect(path)
289+
data = vcf2zarr.inspect(path)
289290
click.echo(tabulate.tabulate(data, headers="keys"))
290291

291292

@@ -296,7 +297,7 @@ def mkschema(icf_path):
296297
Generate a schema for zarr encoding
297298
"""
298299
stream = click.get_text_stream("stdout")
299-
vcf.mkschema(icf_path, stream)
300+
vcf2zarr.mkschema(icf_path, stream)
300301

301302

302303
@click.command
@@ -327,7 +328,7 @@ def encode(
327328
"""
328329
setup_logging(verbose)
329330
check_overwrite_dir(zarr_path, force)
330-
vcf.encode(
331+
vcf2zarr.encode(
331332
icf_path,
332333
zarr_path,
333334
schema_path=schema,
@@ -378,7 +379,7 @@ def dencode_init(
378379
"""
379380
setup_logging(verbose)
380381
check_overwrite_dir(zarr_path, force)
381-
work_summary = vcf.encode_init(
382+
work_summary = vcf2zarr.encode_init(
382383
icf_path,
383384
zarr_path,
384385
target_num_partitions=num_partitions,
@@ -406,7 +407,7 @@ def dencode_partition(zarr_path, partition, verbose, one_based):
406407
setup_logging(verbose)
407408
if one_based:
408409
partition -= 1
409-
vcf.encode_partition(zarr_path, partition)
410+
vcf2zarr.encode_partition(zarr_path, partition)
410411

411412

412413
@click.command
@@ -417,7 +418,7 @@ def dencode_finalise(zarr_path, verbose):
417418
Final step for distributed conversion of ICF to VCF Zarr.
418419
"""
419420
setup_logging(verbose)
420-
vcf.encode_finalise(zarr_path, show_progress=True)
421+
vcf2zarr.encode_finalise(zarr_path, show_progress=True)
421422

422423

423424
@click.command(name="convert")
@@ -442,7 +443,7 @@ def convert_vcf(
442443
"""
443444
setup_logging(verbose)
444445
check_overwrite_dir(zarr_path, force)
445-
vcf.convert(
446+
vcf2zarr.convert(
446447
vcfs,
447448
zarr_path,
448449
variants_chunk_size=variants_chunk_size,
@@ -453,8 +454,8 @@ def convert_vcf(
453454

454455

455456
@version
456-
@click.group(cls=NaturalOrderGroup)
457-
def vcf2zarr():
457+
@click.group(cls=NaturalOrderGroup, name="vcf2zarr")
458+
def vcf2zarr_main():
458459
"""
459460
Convert VCF file(s) to the vcfzarr format.
460461
@@ -506,18 +507,17 @@ def vcf2zarr():
506507
"""
507508

508509

509-
# TODO figure out how to get click to list these in the given order.
510-
vcf2zarr.add_command(convert_vcf)
511-
vcf2zarr.add_command(inspect)
512-
vcf2zarr.add_command(explode)
513-
vcf2zarr.add_command(mkschema)
514-
vcf2zarr.add_command(encode)
515-
vcf2zarr.add_command(dexplode_init)
516-
vcf2zarr.add_command(dexplode_partition)
517-
vcf2zarr.add_command(dexplode_finalise)
518-
vcf2zarr.add_command(dencode_init)
519-
vcf2zarr.add_command(dencode_partition)
520-
vcf2zarr.add_command(dencode_finalise)
510+
vcf2zarr_main.add_command(convert_vcf)
511+
vcf2zarr_main.add_command(inspect)
512+
vcf2zarr_main.add_command(explode)
513+
vcf2zarr_main.add_command(mkschema)
514+
vcf2zarr_main.add_command(encode)
515+
vcf2zarr_main.add_command(dexplode_init)
516+
vcf2zarr_main.add_command(dexplode_partition)
517+
vcf2zarr_main.add_command(dexplode_finalise)
518+
vcf2zarr_main.add_command(dencode_init)
519+
vcf2zarr_main.add_command(dencode_partition)
520+
vcf2zarr_main.add_command(dencode_finalise)
521521

522522

523523
@click.command(name="convert")

bio2zarr/vcf2zarr/__init__.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from .icf import (
2+
IntermediateColumnarFormat,
3+
explode,
4+
explode_finalise,
5+
explode_init,
6+
explode_partition,
7+
)
8+
from .vcz import (
9+
VcfZarrSchema,
10+
convert,
11+
encode,
12+
encode_finalise,
13+
encode_init,
14+
encode_partition,
15+
inspect,
16+
mkschema,
17+
)
18+
from .verification import verify
19+
20+
# NOTE some of these aren't intended to be part of the external
21+
# interface (like IntermediateColumnarFormat), but putting
22+
# them into the list to keep the lint nagging under control
23+
__all__ = [
24+
"IntermediateColumnarFormat",
25+
"explode",
26+
"explode_finalise",
27+
"explode_init",
28+
"explode_partition",
29+
"VcfZarrSchema",
30+
"convert",
31+
"encode",
32+
"encode_finalise",
33+
"encode_init",
34+
"encode_partition",
35+
"inspect",
36+
"mkschema",
37+
"verify",
38+
]

bio2zarr/icf.py renamed to bio2zarr/vcf2zarr/icf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import numcodecs
1414
import numpy as np
1515

16-
from . import constants, core, provenance, vcf_utils
16+
from .. import constants, core, provenance, vcf_utils
1717

1818
logger = logging.getLogger(__name__)
1919

bio2zarr/vcf.py renamed to bio2zarr/vcf2zarr/vcz.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
import numpy as np
1313
import zarr
1414

15-
from . import constants, core, icf, provenance
15+
from .. import constants, core, provenance
16+
from . import icf
1617

1718
logger = logging.getLogger(__name__)
1819

bio2zarr/verification.py renamed to bio2zarr/vcf2zarr/verification.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import tqdm
55
import zarr
66

7-
from . import constants
7+
from .. import constants
88

99

1010
def assert_all_missing_float(a):
@@ -146,8 +146,7 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type):
146146
nt.assert_equal(vcf_val, zarr_val)
147147

148148

149-
# TODO rename to "verify"
150-
def validate(vcf_path, zarr_path, show_progress=False):
149+
def verify(vcf_path, zarr_path, show_progress=False):
151150
store = zarr.DirectoryStore(zarr_path)
152151

153152
root = zarr.group(store=store)

docs/cli.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Command Line Interface
22

3-
% A note on cross references... There's some weird long-standing problem with
3+
% A note on cross references... There's some weird long-standing problem with
44
% cross referencing program values in Sphinx, which means that we can't use
55
% the built-in labels generated by sphinx-click. We can make our own explicit
66
% targets, but these have to have slightly weird names to avoid conflicting
@@ -9,8 +9,8 @@
99

1010
```{eval-rst}
1111
.. _cmd-vcf2zarr:
12-
.. click:: bio2zarr.cli:vcf2zarr
13-
:prog: vcf2zarr
12+
.. click:: bio2zarr.cli:vcf2zarr_main
13+
:prog: vcf2zarr
1414
:nested: short
1515
1616
.. _cmd-vcf2zarr-convert:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ repository = "https://github.com/sgkit-dev/bio2zarr"
4545
documentation = "https://sgkit-dev.github.io/bio2zarr/"
4646

4747
[project.scripts]
48-
vcf2zarr = "bio2zarr.cli:vcf2zarr"
48+
vcf2zarr = "bio2zarr.cli:vcf2zarr_main"
4949
vcfpartition = "bio2zarr.cli:vcfpartition"
5050

5151
[project.optional-dependencies]

0 commit comments

Comments
 (0)