Skip to content

Commit 872bb8e

Browse files
Close LAA loophole
1 parent 49fc182 commit 872bb8e

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

bio2zarr/vcf2zarr/vcz.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -525,14 +525,17 @@ def compute_laa_field(genotypes) -> np.ndarray:
525525
if np.any(genotypes >= v):
526526
raise ValueError("Extreme allele value not supported")
527527
G = genotypes.astype(np.int32)
528-
# Anything <=0 gets mapped to -2 (pad) in the output, which comes last.
529-
# So, to get this sorting correctly, we remap to the largest value for
530-
# sorting, then map back. We promote the genotypes up to 32 bit for convenience
531-
# here, assuming that we'll never have a allele of 2**31 - 1.
532-
assert np.all(G != v)
533-
G[G <= 0] = v
534-
G.sort(axis=1)
535-
G[G == v] = -2
528+
if len(G) > 0:
529+
# Anything <=0 gets mapped to -2 (pad) in the output, which comes last.
530+
# So, to get this sorting correctly, we remap to the largest value for
531+
# sorting, then map back. We promote the genotypes up to 32 bit for convenience
532+
# here, assuming that we'll never have a allele of 2**31 - 1.
533+
assert np.all(G != v)
534+
G[G <= 0] = v
535+
G.sort(axis=1)
536+
# Equal non-zero values result in padding also
537+
G[G[:, 0] == G[:, 1], 1] = -2
538+
G[G == v] = -2
536539
return G.astype(genotypes.dtype)
537540

538541

tests/test_local_alleles.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ class TestComputeLAA:
99
@pytest.mark.parametrize(
1010
("genotypes", "expected"),
1111
[
12-
([[]], [[]]),
12+
([], []),
1313
([[0, 0]], [[-2, -2]]),
1414
([[0, 0], [0, 0]], [[-2, -2], [-2, -2]]),
15+
([[1, 1], [0, 0]], [[1, -2], [-2, -2]]),
1516
([[0, 1], [3, 2], [3, 0]], [[1, -2], [2, 3], [3, -2]]),
1617
([[0, 0], [2, 3]], [[-2, -2], [2, 3]]),
1718
([[2, 3], [0, 0]], [[2, 3], [-2, -2]]),

0 commit comments

Comments
 (0)