Skip to content

Commit 44c142e

Browse files
Improve inspect error handling
Closes #303
1 parent 488150b commit 44c142e

File tree

4 files changed

+121
-3
lines changed

4 files changed

+121
-3
lines changed

bio2zarr/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def dexplode_finalise(icf_path, verbose):
319319

320320

321321
@click.command
322-
@click.argument("path", type=click.Path())
322+
@click.argument("path", type=click.Path(exists=True))
323323
@verbose
324324
def inspect(path, verbose):
325325
"""

bio2zarr/vcf2zarr/vcz.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323

2424
def inspect(path):
2525
path = pathlib.Path(path)
26-
# TODO add support for the Zarr format also
26+
if not path.exists():
27+
raise ValueError(f"Path not found: {path}")
2728
if (path / "metadata.json").exists():
2829
obj = icf.IntermediateColumnarFormat(path)
30+
# NOTE: this is too strict, we should support more general Zarrs, see #276
2931
elif (path / ".zmetadata").exists():
3032
obj = VcfZarr(path)
3133
else:
32-
raise ValueError("Format not recognised") # NEEDS TEST
34+
raise ValueError(f"{path} not in ICF or VCF Zarr format")
3335
return obj.summary_table()
3436

3537

tests/test_cli.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,19 @@ def test_convert(self, tmp_path):
807807
assert "variant_position" in result.stdout
808808

809809

810+
class TestBadPaths:
811+
@pytest.mark.parametrize("bad_path", ["PPP", "/dev/no_such_thing"])
812+
def test_inspect(self, bad_path):
813+
runner = ct.CliRunner(mix_stderr=False)
814+
result = runner.invoke(
815+
cli.vcf2zarr_main,
816+
f"inspect {bad_path}",
817+
catch_exceptions=False,
818+
)
819+
assert result.exit_code == 2
820+
assert "Invalid value for" in result.stderr
821+
822+
810823
class TestVcfPartition:
811824
path = "tests/data/vcf/NA12878.prod.chr20snippet.g.vcf.gz"
812825
paths = (path, "tests/data/vcf/1kg_2020_chrM.vcf.gz")

tests/test_vcz.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22

3+
import numpy.testing as nt
4+
import pandas as pd
35
import pysam
46
import pytest
57
import sgkit as sg
@@ -662,3 +664,104 @@ def test_removed_samples(self, tmp_path, schema, icf_path, samples):
662664
json.dump(d, f)
663665
with pytest.raises(ValueError, match="Subsetting or reordering samples"):
664666
vcf2zarr.encode(icf_path, tmp_path / "z", schema_path=schema_path)
667+
668+
669+
class TestInspect:
670+
def test_icf(self, icf_path):
671+
df = pd.DataFrame(vcz_mod.inspect(icf_path))
672+
assert sorted(list(df)) == sorted(
673+
[
674+
"name",
675+
"type",
676+
"chunks",
677+
"size",
678+
"compressed",
679+
"max_n",
680+
"min_val",
681+
"max_val",
682+
]
683+
)
684+
nt.assert_array_equal(
685+
sorted(df["name"].values),
686+
sorted(
687+
[
688+
"CHROM",
689+
"POS",
690+
"QUAL",
691+
"ID",
692+
"FILTERS",
693+
"REF",
694+
"ALT",
695+
"rlen",
696+
"INFO/NS",
697+
"INFO/AN",
698+
"INFO/AC",
699+
"INFO/DP",
700+
"INFO/AF",
701+
"INFO/AA",
702+
"INFO/DB",
703+
"INFO/H2",
704+
"FORMAT/GT",
705+
"FORMAT/GQ",
706+
"FORMAT/DP",
707+
"FORMAT/HQ",
708+
]
709+
),
710+
)
711+
712+
def test_vcz(self, zarr_path):
713+
df = pd.DataFrame(vcz_mod.inspect(zarr_path))
714+
cols = [
715+
"name",
716+
"dtype",
717+
"stored",
718+
"size",
719+
"ratio",
720+
"nchunks",
721+
"chunk_size",
722+
"avg_chunk_stored",
723+
"shape",
724+
"chunk_shape",
725+
"compressor",
726+
"filters",
727+
]
728+
assert sorted(list(df)) == sorted(cols)
729+
fields = [
730+
"/call_genotype",
731+
"/call_HQ",
732+
"/call_genotype_mask",
733+
"/call_GQ",
734+
"/call_DP",
735+
"/call_genotype_phased",
736+
"/variant_allele",
737+
"/variant_AC",
738+
"/variant_AF",
739+
"/region_index",
740+
"/variant_filter",
741+
"/variant_id",
742+
"/variant_contig",
743+
"/variant_AA",
744+
"/variant_quality",
745+
"/variant_position",
746+
"/variant_AN",
747+
"/variant_length",
748+
"/variant_NS",
749+
"/variant_DB",
750+
"/variant_DP",
751+
"/variant_H2",
752+
"/sample_id",
753+
"/variant_id_mask",
754+
"/filter_id",
755+
"/contig_id",
756+
]
757+
nt.assert_array_equal(sorted(df["name"]), sorted(fields))
758+
759+
@pytest.mark.parametrize("bad_path", ["/NO_WAY", "TTTTTT"])
760+
def test_no_such_path(self, bad_path):
761+
with pytest.raises(ValueError, match=f"Path not found: {bad_path}"):
762+
vcz_mod.inspect(bad_path)
763+
764+
@pytest.mark.parametrize("path", ["./", "tests/data/vcf/sample.vcf.gz"])
765+
def test_unknown_format(self, path):
766+
with pytest.raises(ValueError, match="not in ICF or VCF Zarr format"):
767+
vcz_mod.inspect(path)

0 commit comments

Comments
 (0)