Skip to content

Commit edfae18

Browse files
Force haploids to always be phased
Closes #399
1 parent 14d121b commit edfae18

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

bio2zarr/vcf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,10 @@ def iter_genotypes(self, shape, start, stop):
10511051
phased = value[:, -1] if value is not None else None
10521052
sanitised_genotypes = sanitise_value_int_2d(shape, genotypes)
10531053
sanitised_phased = sanitise_value_int_1d(shape[:-1], phased)
1054+
# Force haploids to always be phased
1055+
# https://github.com/sgkit-dev/bio2zarr/issues/399
1056+
if sanitised_genotypes.shape[1] == 1:
1057+
sanitised_phased[:] = True
10541058
yield sanitised_genotypes, sanitised_phased
10551059

10561060
def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):

tests/test_simulated_data.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pysam
44
import pytest
55
import sgkit as sg
6+
import numpy as np
67

78
from bio2zarr import vcf as vcf_mod
89

@@ -32,6 +33,9 @@ def assert_ts_ds_equal(ts, ds, ploidy=1):
3233
ts.genotype_matrix().reshape((ts.num_sites, ts.num_individuals, ploidy)),
3334
ds.call_genotype.values,
3435
)
36+
nt.assert_array_equal(
37+
ds.call_genotype_phased.values,
38+
np.ones((ts.num_sites, ts.num_individuals), dtype=bool))
3539
nt.assert_equal(ds.variant_allele[:, 0].values, "A")
3640
nt.assert_equal(ds.variant_allele[:, 1].values, "T")
3741
nt.assert_equal(ds.variant_position, ts.sites_position)

0 commit comments

Comments
 (0)