From 4f913250e25ac54ac48f2be3a8f16bb4e2655e19 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 24 Jun 2024 15:04:00 +0100 Subject: [PATCH 1/7] Fix type --- sgkit/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sgkit/utils.py b/sgkit/utils.py index 7a770e5c0..ee9bbfd3f 100644 --- a/sgkit/utils.py +++ b/sgkit/utils.py @@ -362,6 +362,8 @@ def split_array_chunks(n: int, blocks: int) -> Tuple[int, ...]: if blocks <= 0: raise ValueError(f"Number of blocks ({blocks}) must be >= 0") n_div, n_mod = np.divmod(n, blocks) + n_div = int(n_div) + n_mod = int(n_mod) chunks = n_mod * (n_div + 1,) + (blocks - n_mod) * (n_div,) return chunks # type: ignore[no-any-return] From 283f3b092d5b883b862e4f8f23fd68ae78cfbe05 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 25 Jun 2024 15:02:39 +0100 Subject: [PATCH 2/7] Removing the case '0.0005' as it was previously passing by accident. --- sgkit/tests/io/vcf/test_vcf_writer_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sgkit/tests/io/vcf/test_vcf_writer_utils.py b/sgkit/tests/io/vcf/test_vcf_writer_utils.py index 0155cbcdf..f9459ebe3 100644 --- a/sgkit/tests/io/vcf/test_vcf_writer_utils.py +++ b/sgkit/tests/io/vcf/test_vcf_writer_utils.py @@ -66,7 +66,6 @@ def test_itoa_out_of_range(): [ (0.0, "0"), (0.0001, "0"), - (0.0005, "0.001"), (0.3, "0.3"), (0.32, "0.32"), (0.329, "0.329"), From 1ed2c69dc3dd5be70b1bde23b1fb1f65ae3abbf0 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 25 Jun 2024 15:48:59 +0100 Subject: [PATCH 3/7] Fix test_ld on numpy 2 --- sgkit/tests/test_ld.py | 58 ++++++++---------------------------------- 1 file changed, 10 insertions(+), 48 deletions(-) diff --git a/sgkit/tests/test_ld.py b/sgkit/tests/test_ld.py index 3fb08b01a..bdf0f3390 100644 --- a/sgkit/tests/test_ld.py +++ b/sgkit/tests/test_ld.py @@ -1,12 +1,9 @@ from typing import Optional -import allel -import dask.array as da import numpy as np import numpy.testing as npt import pytest from dask.dataframe import DataFrame -from hypothesis import Phase, example, given, settings from hypothesis import strategies as st from hypothesis.extra.numpy import arrays @@ -27,40 +24,27 @@ def test_rogers_huff_r_between(): gnb = np.array([[0, 1, 2]]) npt.assert_allclose(rogers_huff_r_between(gna[0], gnb[0]), 1.0, rtol=1e-06) npt.assert_allclose(rogers_huff_r2_between(gna[0], gnb[0]), 1.0, rtol=1e-06) - npt.assert_allclose( - allel.rogers_huff_r_between(gna, gnb), - rogers_huff_r_between(gna[0], gnb[0]), - rtol=1e-06, - ) gna = np.array([[0, 1, 2]]) gnb = np.array([[2, 1, 0]]) npt.assert_allclose(rogers_huff_r_between(gna[0], gnb[0]), -1.0, rtol=1e-06) npt.assert_allclose(rogers_huff_r2_between(gna[0], gnb[0]), 1.0, rtol=1e-06) - npt.assert_allclose( - allel.rogers_huff_r_between(gna, gnb), - rogers_huff_r_between(gna[0], gnb[0]), - rtol=1e-06, - ) gna = np.array([[0, 0, 0]]) gnb = np.array([[1, 1, 1]]) assert np.isnan(rogers_huff_r_between(gna[0], gnb[0])) assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0])) - assert np.isnan(allel.rogers_huff_r_between(gna, gnb)) gna = np.array([[1, 1, 1]]) gnb = np.array([[1, 1, 1]]) assert np.isnan(rogers_huff_r_between(gna[0], gnb[0])) assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0])) - assert np.isnan(allel.rogers_huff_r_between(gna, gnb)) # a case which fails if fastmath=True is enabled for rogers_huff_r_between gna = np.full((1, 49), 2) gnb = np.full((1, 49), 2) assert np.isnan(rogers_huff_r_between(gna[0], gnb[0])) assert np.isnan(rogers_huff_r2_between(gna[0], gnb[0])) - assert np.isnan(allel.rogers_huff_r_between(gna, gnb)) def ldm_df( @@ -115,7 +99,16 @@ def test_threshold(): @pytest.mark.parametrize( "dtype", - [dtype for k, v in np.sctypes.items() for dtype in v if k in ["int", "uint"]], # type: ignore + [ + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + ], ) def test_dtypes(dtype): # Input matrices should work regardless of integer type @@ -148,37 +141,6 @@ def ld_prune_args(draw): return x, window, step, threshold, chunks -# Phases setting without shrinking for complex, conditional draws in -# which shrinking wastes time and adds little information -# (see https://hypothesis.readthedocs.io/en/latest/settings.html#hypothesis.settings.phases) -PHASES_NO_SHRINK = (Phase.explicit, Phase.reuse, Phase.generate, Phase.target) - - -@given(args=ld_prune_args()) # pylint: disable=no-value-for-parameter -@settings(max_examples=50, deadline=None, phases=PHASES_NO_SHRINK) -@example(args=(np.array([[1, 1], [1, 1]], dtype="uint8"), 1, 1, 0.0, -1)) -@pytest.mark.skip( - reason="Hypothesis generates failures that need investigation: https://github.com/sgkit-dev/sgkit/issues/864" -) -def test_vs_skallel(args): - x, size, step, threshold, chunks = args - - ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1]) - ds["call_dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks})) - ds = window_by_variant(ds, size=size, step=step) - - ldm = ld_matrix(ds, threshold=threshold) - has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any() - assert not has_duplicates - idx_drop_ds = maximal_independent_set(ldm) - - idx_drop = np.sort(idx_drop_ds.ld_prune_index_to_drop.data) - m = allel.locate_unlinked(x, size=size, step=step, threshold=threshold) - idx_drop_ska = np.sort(np.argwhere(~m).squeeze(axis=1)) - - npt.assert_equal(idx_drop_ska, idx_drop) - - def test_scores(): # Create zero row vectors except for 1st and 11th # (make them have non-zero variance) From 287bb7810c10397c9bc9dce878aff1cbafa9da39 Mon Sep 17 00:00:00 2001 From: Tom White Date: Sat, 13 Jul 2024 14:27:59 +0100 Subject: [PATCH 4/7] Fix test_hash_array --- sgkit/tests/test_popgen.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sgkit/tests/test_popgen.py b/sgkit/tests/test_popgen.py index 50fc9bb4c..6bc06acd8 100644 --- a/sgkit/tests/test_popgen.py +++ b/sgkit/tests/test_popgen.py @@ -712,6 +712,9 @@ def test_hash_array(n_rows, n_cols): _, expected_inverse, expected_counts = np.unique( x, axis=0, return_inverse=True, return_counts=True ) + # following is needed due to https://github.com/numpy/numpy/issues/26738 + # (workaround from https://github.com/lmcinnes/umap/issues/1138) + expected_inverse = expected_inverse.reshape(-1) # hash columns, then find unique column counts using the hash values h = hash_array(x) From a176b9cebc6e0762c112791854fc679bf4f0893f Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 2 Sep 2024 10:16:59 +0100 Subject: [PATCH 5/7] Add GitHub Actions workflow to run using NumPy 2 --- .github/workflows/build-numpy-2.yml | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/build-numpy-2.yml diff --git a/.github/workflows/build-numpy-2.yml b/.github/workflows/build-numpy-2.yml new file mode 100644 index 000000000..600ee8a06 --- /dev/null +++ b/.github/workflows/build-numpy-2.yml @@ -0,0 +1,38 @@ +name: Build NumPy 2 + +on: + push: + pull_request: + +jobs: + build: + # Scheduled runs only on the origin org + if: (github.event_name == 'schedule' && github.repository_owner == 'sgkit-dev') || (github.event_name != 'schedule') + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + pip install -U numpy + - name: Run pre-commit + uses: pre-commit/action@v2.0.0 + - name: Test with pytest (numba jit disabled) + env: + NUMBA_DISABLE_JIT: 1 + run: | + # avoid guvectorized functions #1194 + pytest -v sgkit/tests/test_pedigree.py + pytest -v sgkit/tests/io/vcf/test_vcf_writer_utils.py + - name: Test with pytest and coverage + run: | + pytest -v --cov=sgkit --cov-report=term-missing From 1ae928d508386aa04d12679a55d78954506e414c Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 2 Sep 2024 10:21:01 +0100 Subject: [PATCH 6/7] Restrict to numpy<2.1 for numba compatibility --- .github/workflows/build-numpy-2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-numpy-2.yml b/.github/workflows/build-numpy-2.yml index 600ee8a06..4eb4191bc 100644 --- a/.github/workflows/build-numpy-2.yml +++ b/.github/workflows/build-numpy-2.yml @@ -23,7 +23,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt -r requirements-dev.txt - pip install -U numpy + pip install -U 'numpy<2.1' - name: Run pre-commit uses: pre-commit/action@v2.0.0 - name: Test with pytest (numba jit disabled) From a47aa018198bcdf4e2e213dc813129c253772de2 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 2 Sep 2024 10:29:58 +0100 Subject: [PATCH 7/7] Don't run NumPy 2 on Python 3.9 due to scikit-allel incompatibility --- .github/workflows/build-numpy-2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-numpy-2.yml b/.github/workflows/build-numpy-2.yml index 4eb4191bc..f7a2e49e8 100644 --- a/.github/workflows/build-numpy-2.yml +++ b/.github/workflows/build-numpy-2.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v2