|
| 1 | +import pathlib |
| 2 | + |
| 3 | +import numpy.testing as nt |
| 4 | +import zarr |
| 5 | + |
| 6 | +from vcztools.retrieval import variant_chunk_iter |
| 7 | +from vcztools.samples import parse_samples |
| 8 | + |
| 9 | +from .utils import vcz_path_cache |
| 10 | + |
| 11 | + |
| 12 | +def test_variant_chunk_iter(): |
| 13 | + original = pathlib.Path("tests/data/vcf") / "sample.vcf.gz" |
| 14 | + vcz = vcz_path_cache(original) |
| 15 | + root = zarr.open(vcz, mode="r") |
| 16 | + |
| 17 | + _, samples_selection = parse_samples("NA00002,NA00003", root["sample_id"][:]) |
| 18 | + chunk_data = next( |
| 19 | + variant_chunk_iter( |
| 20 | + root, |
| 21 | + fields=["variant_contig", "variant_position", "call_DP", "call_GQ"], |
| 22 | + variant_regions="20:1230236-", |
| 23 | + include="FMT/DP>3", |
| 24 | + samples_selection=samples_selection, |
| 25 | + ) |
| 26 | + ) |
| 27 | + nt.assert_array_equal(chunk_data["variant_contig"], [1, 1]) |
| 28 | + nt.assert_array_equal(chunk_data["variant_position"], [1230237, 1234567]) |
| 29 | + nt.assert_array_equal(chunk_data["call_DP"], [[4, 2], [2, 3]]) |
| 30 | + nt.assert_array_equal(chunk_data["call_GQ"], [[48, 61], [17, 40]]) |
| 31 | + # note second site (at pos 1234567) is included even though both samples in mask |
| 32 | + # are False (NA00002 and NA00003), since sample NA00001 matched filter criteria, |
| 33 | + # but was then removed by samples_selection |
| 34 | + nt.assert_array_equal(chunk_data["call_mask"], [[True, False], [False, False]]) |
0 commit comments