Skip to content

Commit 203f771

Browse files
committed
Loosen test for checking if VCF headers are the same
1 parent 8b78e08 commit 203f771

File tree

2 files changed

+46
-13
lines changed

2 files changed

+46
-13
lines changed

tests/test_vcf_writer.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import numpy as np
77
import pytest
88
import zarr
9-
from bio2zarr import icf
109
from cyvcf2 import VCF
1110
from numpy.testing import assert_array_equal
1211

@@ -301,15 +300,9 @@ def test_write_vcf__header_flags(tmp_path):
301300
assert_vcfs_close(original, output)
302301

303302

304-
def test_write_vcf__generate_header(tmp_path):
303+
def test_write_vcf__generate_header():
305304
original = pathlib.Path("tests/data/vcf") / "sample.vcf.gz"
306-
# don't use cache here since we mutate the vcz
307-
vcz = tmp_path.joinpath("intermediate.vcz")
308-
icf.convert([original], vcz, worker_processes=0, local_alleles=False)
309-
310-
# remove vcf_header
311-
root = zarr.open(vcz, mode="r+")
312-
del root.attrs["vcf_header"]
305+
vcz = vcz_path_cache(original)
313306

314307
output_header = StringIO()
315308
write_vcf(vcz, output_header, header_only=True, no_version=True)
@@ -324,9 +317,9 @@ def test_write_vcf__generate_header(tmp_path):
324317
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
325318
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
326319
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
327-
##FILTER=<ID=PASS,Description="">
328-
##FILTER=<ID=s50,Description="">
329-
##FILTER=<ID=q10,Description="">
320+
##FILTER=<ID=PASS,Description="All filters passed">
321+
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
322+
##FILTER=<ID=q10,Description="Quality below 10">
330323
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
331324
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
332325
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
@@ -338,6 +331,7 @@ def test_write_vcf__generate_header(tmp_path):
338331
""" # noqa: E501
339332

340333
# substitute value of source
334+
root = zarr.open(vcz, mode="r+")
341335
expected_vcf_header = expected_vcf_header.format(root.attrs["source"])
342336

343337
assert output_header.getvalue() == expected_vcf_header

tests/utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,45 @@ def normalise_info_missingness(info_dict, key):
2929
return value
3030

3131

32+
def _get_headers(vcf, header_type):
33+
def to_dict(header_field):
34+
d = header_field.info(extra=True)
35+
del d[b"IDX"] # remove IDX since we don't care about ordering
36+
37+
# cyvcf2 duplicates some keys as strings and bytes, so remove the bytes one
38+
for k in list(d.keys()):
39+
if isinstance(k, bytes) and k.decode("utf-8") in d:
40+
del d[k]
41+
return d
42+
43+
return {
44+
field["ID"]: to_dict(field)
45+
for field in vcf.header_iter()
46+
if field["HeaderType"] == header_type
47+
}
48+
49+
50+
def _assert_vcf_headers_equivalent(vcf1, vcf2):
51+
# Only compare INFO, FORMAT, FILTER, CONTIG fields, ignoring order
52+
# Other fields are ignored
53+
54+
info1 = _get_headers(vcf1, "INFO")
55+
info2 = _get_headers(vcf2, "INFO")
56+
assert info1 == info2
57+
58+
format1 = _get_headers(vcf1, "FORMAT")
59+
format2 = _get_headers(vcf2, "FORMAT")
60+
assert format1 == format2
61+
62+
filter1 = _get_headers(vcf1, "FILTER")
63+
filter2 = _get_headers(vcf2, "FILTER")
64+
assert filter1 == filter2
65+
66+
contig1 = _get_headers(vcf1, "CONTIG")
67+
contig2 = _get_headers(vcf2, "CONTIG")
68+
assert contig1 == contig2
69+
70+
3271
def assert_vcfs_close(f1, f2, *, rtol=1e-05, atol=1e-03, allow_zero_variants=False):
3372
"""Like :py:func:`numpy.testing.assert_allclose()`, but for VCF files.
3473
@@ -48,7 +87,7 @@ def assert_vcfs_close(f1, f2, *, rtol=1e-05, atol=1e-03, allow_zero_variants=Fal
4887
Absolute tolerance.
4988
"""
5089
with open_vcf(f1) as vcf1, open_vcf(f2) as vcf2:
51-
assert vcf1.raw_header == vcf2.raw_header
90+
_assert_vcf_headers_equivalent(vcf1, vcf2)
5291
assert vcf1.samples == vcf2.samples
5392

5493
count = 0

0 commit comments

Comments
 (0)