Skip to content

Commit d4b9686

Browse files
Missing test files
1 parent e35d473 commit d4b9686

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

tests/test_cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import click.testing as ct
2+
3+
from bio2zarr import cli
4+
5+
# NOTE just putting things together here to see what works.
6+
# Probably want to mock the module functions here to
7+
# avoid testing any real functionality.
8+
def test_vcf_summarise():
9+
runner = ct.CliRunner()
10+
result = runner.invoke(cli.vcf2zarr, "summarise", "filename")
11+
# FIXME not testing anything!

tests/test_pcvcf.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import pytest
2+
import numpy as np
3+
import numpy.testing as nt
4+
5+
from bio2zarr import vcf
6+
7+
8+
class TestSmallExample:
9+
data_path = "tests/data/vcf/sample.vcf.gz"
10+
11+
# fmt: off
12+
columns = [
13+
'ALT', 'CHROM', 'FILTERS', 'FORMAT/DP', 'FORMAT/GQ',
14+
'FORMAT/GT', 'FORMAT/HQ', 'ID', 'INFO/AA', 'INFO/AC',
15+
'INFO/AF', 'INFO/AN', 'INFO/DB', 'INFO/DP', 'INFO/H2',
16+
'INFO/NS', 'POS', 'QUAL', 'REF'
17+
]
18+
# fmt: on
19+
20+
@pytest.fixture(scope="class")
21+
def pcvcf(self, tmp_path_factory):
22+
out = tmp_path_factory.mktemp("data") / "example.exploded"
23+
return vcf.explode([self.data_path], out)
24+
25+
def test_summary_table(self, pcvcf):
26+
data = pcvcf.summary_table()
27+
cols = [d["name"] for d in data]
28+
assert sorted(cols) == self.columns
29+
30+
def test_mapping_methods(self, pcvcf):
31+
assert len(pcvcf) == len(self.columns)
32+
assert pcvcf["ALT"] is pcvcf.columns["ALT"]
33+
assert list(iter(pcvcf)) == list(iter(pcvcf))
34+
35+
def test_num_partitions(self, pcvcf):
36+
assert pcvcf.num_partitions == 1
37+
38+
def test_num_records(self, pcvcf):
39+
assert pcvcf.num_records == 9
40+
41+
def test_POS(self, pcvcf):
42+
nt.assert_array_equal(
43+
[v[0] for v in pcvcf["POS"].values],
44+
[111, 112, 14370, 17330, 1110696, 1230237, 1234567, 1235237, 10],
45+
)
46+
47+
def test_REF(self, pcvcf):
48+
ref = ["A", "A", "G", "T", "A", "T", "G", "T", "AC"]
49+
assert pcvcf["REF"].values == ref
50+
51+
def test_ALT(self, pcvcf):
52+
alt = [
53+
["C"],
54+
["G"],
55+
["A"],
56+
["A"],
57+
["G", "T"],
58+
[],
59+
["GA", "GAC"],
60+
[],
61+
["A", "ATG", "C"],
62+
]
63+
assert [list(v) for v in pcvcf["ALT"].values] == alt
64+
65+
def test_INFO_NS(self, pcvcf):
66+
assert pcvcf["INFO/NS"].values == [None, None, 3, 3, 2, 3, 3, None, None]
67+
68+
69+
class TestGeneratedFieldsExample:
70+
data_path = "tests/data/vcf/field_type_combos.vcf.gz"
71+
72+
@pytest.fixture(scope="class")
73+
def pcvcf(self, tmp_path_factory):
74+
out = tmp_path_factory.mktemp("data") / "example.exploded"
75+
# import sgkit
76+
# from sgkit.io.vcf import vcf_to_zarr
77+
# vcf_to_zarr(self.data_path, "tmp/fields.vcf.sg", fields=
78+
# ["INFO/IS1", "INFO/IC2", "INFO/IS2", "INFO/ISR", "FORMAT/FS2"])
79+
# df = sgkit.load_dataset("tmp/fields.vcf.sg")
80+
# print(df["variant_IC2"])
81+
# print(df["variant_IC2"].values)
82+
return vcf.explode([self.data_path], out)
83+
84+
@pytest.fixture(scope="class")
85+
def schema(self, pcvcf):
86+
return vcf.ZarrConversionSpec.generate(pcvcf)
87+
88+
@pytest.mark.parametrize(
89+
("name", "dtype", "shape"),
90+
[
91+
("variant_II1", "i1", (208,)),
92+
("variant_II2", "i2", (208, 2)),
93+
("variant_IIA", "i2", (208, 2)),
94+
("variant_IIR", "i2", (208, 3)),
95+
("variant_IID", "i2", (208, 7)),
96+
("variant_IF1", "f4", (208,)),
97+
("variant_IF2", "f4", (208, 2)),
98+
("variant_IFA", "f4", (208, 2)),
99+
("variant_IFR", "f4", (208, 3)),
100+
("variant_IFD", "f4", (208, 9)),
101+
("variant_IC1", "U1", (208,)),
102+
("variant_IC2", "U1", (208, 2)),
103+
("variant_IS1", "O", (208,)),
104+
("variant_IS2", "O", (208, 2)),
105+
("call_FS2", "O", (208, 2, 2)),
106+
("call_FC2", "U1", (208, 2, 2)),
107+
],
108+
)
109+
def test_info_schemas(self, schema, name, dtype, shape):
110+
variables = [v for v in schema.variables if v.name == name]
111+
v = variables[0]
112+
assert v.dtype == dtype
113+
assert tuple(v.shape) == shape
114+
115+
def test_info_string1(self, pcvcf):
116+
non_missing = [v for v in pcvcf["INFO/IS1"].values if v is not None]
117+
assert non_missing[0] == "bc"
118+
assert non_missing[1] == "."
119+
120+
def test_info_char1(self, pcvcf):
121+
non_missing = [v for v in pcvcf["INFO/IC1"].values if v is not None]
122+
assert non_missing[0] == "f"
123+
assert non_missing[1] == "."
124+
125+
def test_info_string2(self, pcvcf):
126+
non_missing = [v for v in pcvcf["INFO/IS2"].values if v is not None]
127+
nt.assert_array_equal(non_missing[0], ["hij", "d"])
128+
nt.assert_array_equal(non_missing[1], [".", "d"])
129+
nt.assert_array_equal(non_missing[2], ["hij", "."])
130+
nt.assert_array_equal(non_missing[3], [".", "."])
131+
132+
def test_format_string1(self, pcvcf):
133+
non_missing = [v for v in pcvcf["FORMAT/FS1"].values if v is not None]
134+
nt.assert_array_equal(non_missing[0], [["bc"], ["."]])
135+
136+
def test_format_string2(self, pcvcf):
137+
non_missing = [v for v in pcvcf["FORMAT/FS2"].values if v is not None]
138+
nt.assert_array_equal(non_missing[0], [["bc", "op"], [".", "op"]])
139+
nt.assert_array_equal(non_missing[1], [["bc", "."], [".", "."]])

0 commit comments

Comments
 (0)