Skip to content

Commit 6eb88ed

Browse files
Test on generated bed files written by bed_reader
1 parent 2b3b40f commit 6eb88ed

File tree

2 files changed

+49
-5
lines changed

2 files changed

+49
-5
lines changed

bio2zarr/plink.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def __init__(self, path, num_variants, num_samples):
6262
# bytes per variant: 1 byte per 4 samples, rounded up
6363
self.bytes_per_variant = (self.num_samples + 3) // 4
6464

65+
# TODO open this as a persistent file and support reading from a
66+
# stream
6567
with open(self.path, "rb") as f:
6668
magic = f.read(3)
6769
if magic != b"\x6c\x1b\x01":
@@ -132,10 +134,8 @@ def __init__(self, prefix):
132134
self.prefix + ".bim",
133135
self.prefix + ".fam",
134136
)
135-
136137
self.bim = read_bim(self.paths.bim_path)
137138
self.fam = read_fam(self.paths.fam_path)
138-
139139
self._num_records = self.bim.shape[0]
140140
self._num_samples = self.fam.shape[0]
141141
self.bed_reader = BedReader(
@@ -177,11 +177,8 @@ def iter_id(self, start, stop):
177177
def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles):
178178
alt_field = self.bim.allele_1.values
179179
ref_field = self.bim.allele_2.values
180-
181180
gt = self.bed_reader.decode(start, stop)
182-
183181
phased = np.zeros(gt.shape[:2], dtype=bool)
184-
185182
for i, (ref, alt) in enumerate(
186183
zip(ref_field[start:stop], alt_field[start:stop])
187184
):

tests/test_plink.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os.path
2+
13
import bed_reader
24
import numpy as np
35
import numpy.testing as nt
@@ -61,6 +63,51 @@ def test_bad_file_type(self, path):
6163
with pytest.raises(ValueError, match="Invalid BED file magic bytes"):
6264
plink.BedReader(path, 1, 1)
6365

66+
@pytest.mark.parametrize(
67+
("num_variants", "num_samples"),
68+
[
69+
(1, 1),
70+
(1, 2),
71+
(1, 3),
72+
(1, 4),
73+
(1, 5),
74+
(1, 6),
75+
(1, 7),
76+
(1, 8),
77+
(1, 9),
78+
(2, 1),
79+
(3, 1),
80+
(10, 1),
81+
(100, 1),
82+
(10, 2),
83+
(10, 3),
84+
(10, 4),
85+
(10, 5),
86+
(20, 20),
87+
(30, 3),
88+
],
89+
)
90+
def test_generated_bed_files(self, tmp_path, num_variants, num_samples):
91+
bed_file = tmp_path / "a_file.bed"
92+
# Generate a regular pattern of all possible values
93+
data = np.arange(num_variants * num_samples, dtype=int) % 4
94+
data[data == 3] = -127
95+
data = data.reshape((num_variants, num_samples))
96+
97+
bed_reader.to_bed(bed_file, data.T, num_threads=1)
98+
99+
bytes_per_variant = (num_samples + 3) // 4
100+
expected_size = 3 + bytes_per_variant * num_variants
101+
assert os.path.getsize(bed_file) == expected_size
102+
103+
br_map = {0: (0, 0), 1: (0, 1), 2: (1, 1), -127: (-1, -1)}
104+
reader = plink.BedReader(bed_file, num_variants, num_samples)
105+
g = reader.decode(0, num_variants)
106+
assert g.shape == (num_variants, num_samples, 2)
107+
for j in range(num_variants):
108+
for k in range(num_samples):
109+
assert br_map[data[j, k]] == tuple(g[j, k])
110+
64111

65112
class TestSmallExample:
66113
@pytest.fixture(scope="class")

0 commit comments

Comments
 (0)