Skip to content

Commit 63085b6

Browse files
Improve inspect error handling
Closes #303
1 parent 488150b commit 63085b6

File tree

4 files changed

+117
-3
lines changed

4 files changed

+117
-3
lines changed

bio2zarr/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def dexplode_finalise(icf_path, verbose):
319319

320320

321321
@click.command
322-
@click.argument("path", type=click.Path())
322+
@click.argument("path", type=click.Path(exists=True))
323323
@verbose
324324
def inspect(path, verbose):
325325
"""

bio2zarr/vcf2zarr/vcz.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323

2424
def inspect(path):
2525
path = pathlib.Path(path)
26-
# TODO add support for the Zarr format also
26+
if not path.exists():
27+
raise ValueError(f"Path not found: {path}")
2728
if (path / "metadata.json").exists():
2829
obj = icf.IntermediateColumnarFormat(path)
30+
# NOTE: this is too strict, we should support more general Zarrs, see #276
2931
elif (path / ".zmetadata").exists():
3032
obj = VcfZarr(path)
3133
else:
32-
raise ValueError("Format not recognised") # NEEDS TEST
34+
raise ValueError(f"{path} not in ICF or VCF Zarr format")
3335
return obj.summary_table()
3436

3537

tests/test_cli.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,19 @@ def test_convert(self, tmp_path):
807807
assert "variant_position" in result.stdout
808808

809809

810+
class TestBadPaths:
811+
@pytest.mark.parametrize("bad_path", ["PPP", "/dev/no_such_thing"])
812+
def test_inspect(self, bad_path):
813+
runner = ct.CliRunner(mix_stderr=False)
814+
result = runner.invoke(
815+
cli.vcf2zarr_main,
816+
f"inspect {bad_path}",
817+
catch_exceptions=False,
818+
)
819+
assert result.exit_code == 2
820+
assert "Invalid value for" in result.stderr
821+
822+
810823
class TestVcfPartition:
811824
path = "tests/data/vcf/NA12878.prod.chr20snippet.g.vcf.gz"
812825
paths = (path, "tests/data/vcf/1kg_2020_chrM.vcf.gz")

tests/test_vcz.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22

3+
import numpy.testing as nt
4+
import pandas as pd
35
import pysam
46
import pytest
57
import sgkit as sg
@@ -662,3 +664,100 @@ def test_removed_samples(self, tmp_path, schema, icf_path, samples):
662664
json.dump(d, f)
663665
with pytest.raises(ValueError, match="Subsetting or reordering samples"):
664666
vcf2zarr.encode(icf_path, tmp_path / "z", schema_path=schema_path)
667+
668+
669+
class TestInspect:
670+
def test_icf(self, icf_path):
671+
df = pd.DataFrame(vcz_mod.inspect(icf_path))
672+
assert list(df) == [
673+
"name",
674+
"type",
675+
"chunks",
676+
"size",
677+
"compressed",
678+
"max_n",
679+
"min_val",
680+
"max_val",
681+
]
682+
nt.assert_array_equal(
683+
df["name"].values,
684+
[
685+
"CHROM",
686+
"POS",
687+
"QUAL",
688+
"ID",
689+
"FILTERS",
690+
"REF",
691+
"ALT",
692+
"rlen",
693+
"INFO/NS",
694+
"INFO/AN",
695+
"INFO/AC",
696+
"INFO/DP",
697+
"INFO/AF",
698+
"INFO/AA",
699+
"INFO/DB",
700+
"INFO/H2",
701+
"FORMAT/GT",
702+
"FORMAT/GQ",
703+
"FORMAT/DP",
704+
"FORMAT/HQ",
705+
],
706+
)
707+
708+
def test_vcz(self, zarr_path):
709+
df = pd.DataFrame(vcz_mod.inspect(zarr_path))
710+
cols = [
711+
"name",
712+
"dtype",
713+
"stored",
714+
"size",
715+
"ratio",
716+
"nchunks",
717+
"chunk_size",
718+
"avg_chunk_stored",
719+
"shape",
720+
"chunk_shape",
721+
"compressor",
722+
"filters",
723+
]
724+
assert list(df) == cols
725+
fields = [
726+
"/call_genotype",
727+
"/call_HQ",
728+
"/call_genotype_mask",
729+
"/call_GQ",
730+
"/call_DP",
731+
"/call_genotype_phased",
732+
"/variant_allele",
733+
"/variant_AC",
734+
"/variant_AF",
735+
"/region_index",
736+
"/variant_filter",
737+
"/variant_id",
738+
"/variant_contig",
739+
"/variant_AA",
740+
"/variant_quality",
741+
"/variant_position",
742+
"/variant_AN",
743+
"/variant_length",
744+
"/variant_NS",
745+
"/variant_DB",
746+
"/variant_DP",
747+
"/variant_H2",
748+
"/sample_id",
749+
"/variant_id_mask",
750+
"/filter_id",
751+
"/contig_id",
752+
]
753+
nt.assert_array_equal(df["name"], fields)
754+
755+
@pytest.mark.parametrize("bad_path", ["/NO_WAY", "TTTTTT"])
756+
def test_no_such_path(self, bad_path):
757+
with pytest.raises(ValueError, match=f"Path not found: {bad_path}"):
758+
vcz_mod.inspect(bad_path)
759+
760+
@pytest.mark.parametrize("path", ["./", "tests/data/vcf/sample.vcf.gz"])
761+
def test_unknown_format(self, path):
762+
with pytest.raises(ValueError, match="not in ICF or VCF Zarr format"):
763+
vcz_mod.inspect(path)

0 commit comments

Comments
 (0)