Skip to content

Commit 03daf6b

Browse files
authored
Numpy 2 testing (#1237)
* Fix type * Removing the case '0.0005' as it was previously passing by accident. * Fix test_ld on numpy 2 * Fix test_hash_array * Add GitHub Actions workflow to run using NumPy 2 * Restrict to numpy<2.1 for numba compatibility * Don't run NumPy 2 on Python 3.9 due to scikit-allel incompatibility
1 parent ee90b6e commit 03daf6b

File tree

5 files changed

+53
-49
lines changed

5 files changed

+53
-49
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Build NumPy 2
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
build:
9+
# Scheduled runs only on the origin org
10+
if: (github.event_name == 'schedule' && github.repository_owner == 'sgkit-dev') || (github.event_name != 'schedule')
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: ["3.10", "3.11"]
15+
16+
steps:
17+
- uses: actions/checkout@v2
18+
- name: Set up Python ${{ matrix.python-version }}
19+
uses: actions/setup-python@v2
20+
with:
21+
python-version: ${{ matrix.python-version }}
22+
- name: Install dependencies
23+
run: |
24+
python -m pip install --upgrade pip
25+
pip install -r requirements.txt -r requirements-dev.txt
26+
pip install -U 'numpy<2.1'
27+
- name: Run pre-commit
28+
uses: pre-commit/[email protected]
29+
- name: Test with pytest (numba jit disabled)
30+
env:
31+
NUMBA_DISABLE_JIT: 1
32+
run: |
33+
# avoid guvectorized functions #1194
34+
pytest -v sgkit/tests/test_pedigree.py
35+
pytest -v sgkit/tests/io/vcf/test_vcf_writer_utils.py
36+
- name: Test with pytest and coverage
37+
run: |
38+
pytest -v --cov=sgkit --cov-report=term-missing

sgkit/tests/io/vcf/test_vcf_writer_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def test_itoa_out_of_range():
6666
[
6767
(0.0, "0"),
6868
(0.0001, "0"),
69-
(0.0005, "0.001"),
7069
(0.3, "0.3"),
7170
(0.32, "0.32"),
7271
(0.329, "0.329"),

sgkit/tests/test_ld.py

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
from typing import Optional
22

3-
import allel
4-
import dask.array as da
53
import numpy as np
64
import numpy.testing as npt
75
import pytest
86
from dask.dataframe import DataFrame
9-
from hypothesis import Phase, example, given, settings
107
from hypothesis import strategies as st
118
from hypothesis.extra.numpy import arrays
129

@@ -27,40 +24,27 @@ def test_rogers_huff_r_between():
2724
gnb = np.array([[0, 1, 2]])
2825
npt.assert_allclose(rogers_huff_r_between(gna[0], gnb[0]), 1.0, rtol=1e-06)
2926
npt.assert_allclose(rogers_huff_r2_between(gna[0], gnb[0]), 1.0, rtol=1e-06)
30-
npt.assert_allclose(
31-
allel.rogers_huff_r_between(gna, gnb),
32-
rogers_huff_r_between(gna[0], gnb[0]),
33-
rtol=1e-06,
34-
)
3527

3628
gna = np.array([[0, 1, 2]])
3729
gnb = np.array([[2, 1, 0]])
3830
npt.assert_allclose(rogers_huff_r_between(gna[0], gnb[0]), -1.0, rtol=1e-06)
3931
npt.assert_allclose(rogers_huff_r2_between(gna[0], gnb[0]), 1.0, rtol=1e-06)
40-
npt.assert_allclose(
41-
allel.rogers_huff_r_between(gna, gnb),
42-
rogers_huff_r_between(gna[0], gnb[0]),
43-
rtol=1e-06,
44-
)
4532

4633
gna = np.array([[0, 0, 0]])
4734
gnb = np.array([[1, 1, 1]])
4835
assert np.isnan(rogers_huff_r_between(gna[0], gnb[0]))
4936
assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0]))
50-
assert np.isnan(allel.rogers_huff_r_between(gna, gnb))
5137

5238
gna = np.array([[1, 1, 1]])
5339
gnb = np.array([[1, 1, 1]])
5440
assert np.isnan(rogers_huff_r_between(gna[0], gnb[0]))
5541
assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0]))
56-
assert np.isnan(allel.rogers_huff_r_between(gna, gnb))
5742

5843
# a case which fails if fastmath=True is enabled for rogers_huff_r_between
5944
gna = np.full((1, 49), 2)
6045
gnb = np.full((1, 49), 2)
6146
assert np.isnan(rogers_huff_r_between(gna[0], gnb[0]))
6247
assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0]))
63-
assert np.isnan(allel.rogers_huff_r_between(gna, gnb))
6448

6549

6650
def ldm_df(
@@ -115,7 +99,16 @@ def test_threshold():
11599

116100
@pytest.mark.parametrize(
117101
"dtype",
118-
[dtype for k, v in np.sctypes.items() for dtype in v if k in ["int", "uint"]], # type: ignore
102+
[
103+
np.int8,
104+
np.int16,
105+
np.int32,
106+
np.int64,
107+
np.uint8,
108+
np.uint16,
109+
np.uint32,
110+
np.uint64,
111+
],
119112
)
120113
def test_dtypes(dtype):
121114
# Input matrices should work regardless of integer type
@@ -148,37 +141,6 @@ def ld_prune_args(draw):
148141
return x, window, step, threshold, chunks
149142

150143

151-
# Phases setting without shrinking for complex, conditional draws in
152-
# which shrinking wastes time and adds little information
153-
# (see https://hypothesis.readthedocs.io/en/latest/settings.html#hypothesis.settings.phases)
154-
PHASES_NO_SHRINK = (Phase.explicit, Phase.reuse, Phase.generate, Phase.target)
155-
156-
157-
@given(args=ld_prune_args()) # pylint: disable=no-value-for-parameter
158-
@settings(max_examples=50, deadline=None, phases=PHASES_NO_SHRINK)
159-
@example(args=(np.array([[1, 1], [1, 1]], dtype="uint8"), 1, 1, 0.0, -1))
160-
@pytest.mark.skip(
161-
reason="Hypothesis generates failures that need investigation: https://github.com/sgkit-dev/sgkit/issues/864"
162-
)
163-
def test_vs_skallel(args):
164-
x, size, step, threshold, chunks = args
165-
166-
ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
167-
ds["call_dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks}))
168-
ds = window_by_variant(ds, size=size, step=step)
169-
170-
ldm = ld_matrix(ds, threshold=threshold)
171-
has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any()
172-
assert not has_duplicates
173-
idx_drop_ds = maximal_independent_set(ldm)
174-
175-
idx_drop = np.sort(idx_drop_ds.ld_prune_index_to_drop.data)
176-
m = allel.locate_unlinked(x, size=size, step=step, threshold=threshold)
177-
idx_drop_ska = np.sort(np.argwhere(~m).squeeze(axis=1))
178-
179-
npt.assert_equal(idx_drop_ska, idx_drop)
180-
181-
182144
def test_scores():
183145
# Create zero row vectors except for 1st and 11th
184146
# (make them have non-zero variance)

sgkit/tests/test_popgen.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,9 @@ def test_hash_array(n_rows, n_cols):
712712
_, expected_inverse, expected_counts = np.unique(
713713
x, axis=0, return_inverse=True, return_counts=True
714714
)
715+
# following is needed due to https://github.com/numpy/numpy/issues/26738
716+
# (workaround from https://github.com/lmcinnes/umap/issues/1138)
717+
expected_inverse = expected_inverse.reshape(-1)
715718

716719
# hash columns, then find unique column counts using the hash values
717720
h = hash_array(x)

sgkit/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@ def split_array_chunks(n: int, blocks: int) -> Tuple[int, ...]:
362362
if blocks <= 0:
363363
raise ValueError(f"Number of blocks ({blocks}) must be >= 0")
364364
n_div, n_mod = np.divmod(n, blocks)
365+
n_div = int(n_div)
366+
n_mod = int(n_mod)
365367
chunks = n_mod * (n_div + 1,) + (blocks - n_mod) * (n_div,)
366368
return chunks # type: ignore[no-any-return]
367369

0 commit comments

Comments
 (0)