Skip to content

Commit 0152662

Browse files
Refactored index tests into vcf_utils class
1 parent 8bcffbb commit 0152662

File tree

3 files changed

+69
-81
lines changed

3 files changed

+69
-81
lines changed

tests/test_csi.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

tests/test_tbi.py

Lines changed: 0 additions & 40 deletions
This file was deleted.

tests/test_vcf_utils.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import pytest
2+
from cyvcf2 import VCF
3+
4+
from bio2zarr.csi import read_csi
5+
from bio2zarr.tbi import read_tabix
6+
from bio2zarr.vcf_partition import get_csi_path
7+
from bio2zarr.vcf_partition import get_tabix_path
8+
9+
from .utils import count_variants, path_for_test
10+
11+
12+
class TestCsiIndex:
13+
@pytest.mark.parametrize(
14+
"vcf_file",
15+
[
16+
"CEUTrio.20.21.gatk3.4.csi.g.vcf.bgz",
17+
],
18+
)
19+
def test_record_counts(self, shared_datadir, vcf_file):
20+
# Check record counts in csi with actual count of VCF
21+
vcf_path = path_for_test(shared_datadir, vcf_file, True)
22+
csi_path = get_csi_path(vcf_path)
23+
assert csi_path is not None
24+
csi = read_csi(csi_path)
25+
26+
for i, contig in enumerate(VCF(vcf_path).seqnames):
27+
assert csi.record_counts[i] == count_variants(vcf_path, contig)
28+
29+
@pytest.mark.parametrize(
30+
"file",
31+
["CEUTrio.20.21.gatk3.4.g.vcf.bgz", "CEUTrio.20.21.gatk3.4.g.vcf.bgz.tbi"],
32+
)
33+
def test_invalid_csi(self, shared_datadir, file):
34+
with pytest.raises(ValueError, match=r"File not in CSI format."):
35+
read_csi(path_for_test(shared_datadir, file, True))
36+
37+
@pytest.mark.parametrize(
38+
"file",
39+
["CEUTrio.20.21.gatk3.4.g.vcf.bgz", "CEUTrio.20.21.gatk3.4.g.vcf.bgz.tbi"],
40+
)
41+
def test_invalid_csi(self, shared_datadir, file):
42+
with pytest.raises(ValueError, match=r"File not in CSI format."):
43+
read_csi(path_for_test(shared_datadir, file, True))
44+
45+
46+
class TestTabixIndex:
47+
@pytest.mark.parametrize(
48+
"vcf_file",
49+
[
50+
"CEUTrio.20.21.gatk3.4.g.vcf.bgz",
51+
],
52+
)
53+
def test_record_counts(self, shared_datadir, vcf_file):
54+
# Check record counts in tabix with actual count of VCF
55+
vcf_path = path_for_test(shared_datadir, vcf_file, True)
56+
tabix_path = get_tabix_path(vcf_path)
57+
assert tabix_path is not None
58+
tabix = read_tabix(tabix_path)
59+
60+
for i, contig in enumerate(tabix.sequence_names):
61+
assert tabix.record_counts[i] == count_variants(vcf_path, contig)
62+
63+
@pytest.mark.parametrize(
64+
"file",
65+
["CEUTrio.20.21.gatk3.4.g.vcf.bgz", "CEUTrio.20.21.gatk3.4.csi.g.vcf.bgz.csi"],
66+
)
67+
def test_read_tabix__invalid_tbi(self, shared_datadir, file):
68+
with pytest.raises(ValueError, match=r"File not in Tabix format."):
69+
read_tabix(path_for_test(shared_datadir, file, True))

0 commit comments

Comments
 (0)