Skip to content

Commit 4c437a2

Browse files
Some basic testing of local alleles
1 parent b92ec7c commit 4c437a2

File tree

3 files changed

+48
-3
lines changed

3 files changed

+48
-3
lines changed

bio2zarr/vcf2zarr/vcz.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,14 +204,15 @@ def convert_local_allele_field_types(fields):
204204

205205
shape = gt.shape[:-1]
206206
chunks = gt.chunks[:-1]
207+
dimensions = gt.dimensions[:-1]
207208

208209
la = ZarrArraySpec.new(
209210
vcf_field=None,
210211
name="call_LA",
211212
dtype="i1",
212213
shape=gt.shape,
213214
chunks=gt.chunks,
214-
dimensions=gt.dimensions, # FIXME
215+
dimensions=(*dimensions, "local_alleles"),
215216
description=(
216217
"0-based indices into REF+ALT, indicating which alleles"
217218
" are relevant (local) for the current sample"
@@ -224,8 +225,8 @@ def convert_local_allele_field_types(fields):
224225
ad.vcf_field = None
225226
ad.shape = (*shape, 2)
226227
ad.chunks = (*chunks, 2)
228+
ad.dimensions = (*dimensions, "local_alleles")
227229
ad.description += " (local-alleles)"
228-
# TODO fix dimensions
229230

230231
pl = fields_by_name.get("call_PL", None)
231232
if pl is not None:
@@ -235,7 +236,7 @@ def convert_local_allele_field_types(fields):
235236
pl.shape = (*shape, 3)
236237
pl.chunks = (*chunks, 3)
237238
pl.description += " (local-alleles)"
238-
# TODO fix dimensions
239+
pl.dimensions = (*dimensions, "local_" + pl.dimensions[-1])
239240
return [*fields, la]
240241

241242

tests/test_vcf_examples.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,16 +716,19 @@ def test_call_LAD(self, ds):
716716
[[446, -2], [393, -2], [486, -2]],
717717
]
718718
nt.assert_array_equal(ds.call_LAD.values, call_LAD)
719+
assert ds.call_LAD.dims == ("variants", "samples", "local_alleles")
719720

720721
def test_call_LA(self, ds):
721722
# All the genotypes are 0/0
722723
call_LA = np.full((23, 3, 2), -2)
723724
call_LA[:, :, 0] = 0
724725
nt.assert_array_equal(ds.call_LA.values, call_LA)
726+
assert ds.call_LA.dims == ("variants", "samples", "local_alleles")
725727

726728
def test_call_LPL(self, ds):
727729
call_LPL = np.tile([0, -2, -2], (23, 3, 1))
728730
nt.assert_array_equal(ds.call_LPL.values, call_LPL)
731+
assert ds.call_LPL.dims == ("variants", "samples", "local_genotypes")
729732

730733

731734
class Test1000G2020AnnotationsExample:

tests/test_vcz.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ def schema(schema_path):
4040
return vcf2zarr.VcfZarrSchema.fromjson(f.read())
4141

4242

43+
@pytest.fixture(scope="module")
44+
def local_alleles_schema(icf_path, tmp_path_factory):
45+
# FIXME: this is stupid way of getting a test fixture, should
46+
# be much easier.
47+
out = tmp_path_factory.mktemp("data") / "example.schema.json"
48+
with open(out, "w") as f:
49+
vcf2zarr.mkschema(icf_path, f, local_alleles=True)
50+
with open(out) as f:
51+
return vcf2zarr.VcfZarrSchema.fromjson(f.read())
52+
53+
4354
@pytest.fixture(scope="module")
4455
def zarr_path(icf_path, tmp_path_factory):
4556
out = tmp_path_factory.mktemp("data") / "example.zarr"
@@ -445,6 +456,36 @@ def test_call_GQ(self, schema):
445456
}
446457

447458

459+
class TestLocalAllelesDefaultSchema:
460+
def test_differences(self, schema, local_alleles_schema):
461+
assert len(schema.fields) == len(local_alleles_schema.fields) - 1
462+
non_local = [f for f in local_alleles_schema.fields if f.name != "call_LA"]
463+
assert schema.fields == non_local
464+
465+
def test_call_LA(self, local_alleles_schema):
466+
d = get_field_dict(local_alleles_schema, "call_LA")
467+
assert d == {
468+
"name": "call_LA",
469+
"dtype": "i1",
470+
"shape": (9, 3, 2),
471+
"chunks": (1000, 10000, 2),
472+
"dimensions": ("variants", "samples", "local_alleles"),
473+
"description": (
474+
"0-based indices into REF+ALT, indicating which alleles"
475+
" are relevant (local) for the current sample"
476+
),
477+
"vcf_field": None,
478+
"compressor": {
479+
"id": "blosc",
480+
"cname": "zstd",
481+
"clevel": 7,
482+
"shuffle": 0,
483+
"blocksize": 0,
484+
},
485+
"filters": tuple(),
486+
}
487+
488+
448489
class TestVcfDescriptions:
449490
@pytest.mark.parametrize(
450491
("field", "description"),

0 commit comments

Comments
 (0)