Skip to content

Commit 1f3d86b

Browse files
Switch tskit haploid phasing back to True
1 parent 3d4c8f2 commit 1f3d86b

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

bio2zarr/tskit.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,6 @@ def __init__(
4141
individuals_nodes = model_mapping.individuals_nodes
4242
sample_ids = model_mapping.individuals_name
4343

44-
self.is_phased = True
45-
if individuals_nodes.shape[1] == 1:
46-
# For simplicity we defined haploids as unphased to do the same thing as the
47-
# VCF conversion code. We should just omit the array for haploids anyway.
48-
self.is_phased = False
49-
5044
self._num_samples = individuals_nodes.shape[0]
5145
if self._num_samples < 1:
5246
raise ValueError("individuals_nodes must have at least one sample")
@@ -107,7 +101,7 @@ def iter_field(self, field_name, shape, start, stop):
107101

108102
def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):
109103
# All genotypes in tskit are considered phased
110-
phased = np.full(shape[:-1], self.is_phased, dtype=bool)
104+
phased = np.ones(shape[:-1], dtype=bool)
111105

112106
for variant in self.ts.variants(
113107
isolated_as_missing=self.isolated_as_missing,

tests/test_simulated_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import msprime
2+
import numpy as np
23
import numpy.testing as nt
34
import pysam
45
import pytest
56
import sgkit as sg
6-
import numpy as np
77

88
from bio2zarr import vcf as vcf_mod
99

@@ -35,7 +35,8 @@ def assert_ts_ds_equal(ts, ds, ploidy=1):
3535
)
3636
nt.assert_array_equal(
3737
ds.call_genotype_phased.values,
38-
np.ones((ts.num_sites, ts.num_individuals), dtype=bool))
38+
np.ones((ts.num_sites, ts.num_individuals), dtype=bool),
39+
)
3940
nt.assert_equal(ds.variant_allele[:, 0].values, "A")
4041
nt.assert_equal(ds.variant_allele[:, 1].values, "T")
4142
nt.assert_equal(ds.variant_position, ts.sites_position)

tests/test_tskit.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def test_phased(self, conversion):
143143
phased = zroot["call_genotype_phased"][:]
144144
assert phased.shape == (3, 4)
145145
assert phased.dtype == "bool"
146-
assert np.all(~phased)
146+
assert np.all(phased)
147147

148148
def test_contig_id(self, conversion):
149149
ts, zroot = conversion
@@ -470,7 +470,17 @@ def test_genotype_dtype_i4(self, tmp_path):
470470

471471
@pytest.mark.parametrize(
472472
"ts",
473-
[add_mutations(msprime.sim_ancestry(2, sequence_length=10, random_seed=42))],
473+
[
474+
add_mutations(
475+
msprime.sim_ancestry(4, ploidy=1, sequence_length=10, random_seed=42)
476+
),
477+
add_mutations(
478+
msprime.sim_ancestry(2, ploidy=2, sequence_length=10, random_seed=42)
479+
),
480+
add_mutations(
481+
msprime.sim_ancestry(3, ploidy=12, sequence_length=10, random_seed=142)
482+
),
483+
],
474484
)
475485
def test_against_tskit_vcf_output(ts, tmp_path):
476486
vcf_path = tmp_path / "ts.vcf"

0 commit comments

Comments
 (0)