malariagen · jonbrenas · Apr 15, 2025 · Apr 15, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/malariagen_data/anoph/snp_frq.py b/malariagen_data/anoph/snp_frq.py
@@ -8,7 +8,6 @@
 from numpydoc_decorator import doc  # type: ignore
 import xarray as xr
 import numba  # type: ignore
-
 from .. import veff
 from ..util import (
     check_types,
@@ -577,8 +576,8 @@ def snp_allele_frequencies_advanced(
                 raise ValueError("No SNPs remaining after dropping invariant SNPs.")
 
             df_variants = df_variants.loc[loc_variant].reset_index(drop=True)
-            count = np.compress(loc_variant, count, axis=0)
-            nobs = np.compress(loc_variant, nobs, axis=0)
+            count = np.compress(loc_variant, count, axis=0).reshape(-1, count.shape[1])
+            nobs = np.compress(loc_variant, nobs, axis=0).reshape(-1, nobs.shape[1])
             frequency = np.compress(loc_variant, frequency, axis=0)
 
         # Set up variant effect annotator.

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ license = "MIT"
 
 [tool.poetry.dependencies]
 python = ">=3.10,<3.13"
-numpy = "<2.2"
+numpy = "*"
 numba = ">=0.60.0"
 llvmlite = "*"
 scipy = "*"

diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py
diff --git a/tests/anoph/test_aim_data.py b/tests/anoph/test_aim_data.py
@@ -1,14 +1,14 @@
 import itertools
-import random
-
 import plotly.graph_objects as go
 import pytest
 import xarray as xr
 from numpy.testing import assert_array_equal
-
+import numpy as np
 from malariagen_data import ag3 as _ag3
 from malariagen_data.anoph.aim_data import AnophelesAimData
 
+rng = np.random.default_rng(seed=42)
+
 
 @pytest.fixture
 def ag3_sim_api(ag3_sim_fixture):
@@ -88,9 +88,9 @@ def test_aim_calls(aims, ag3_sim_api):
     all_releases = api.releases
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize sample_query.
@@ -179,9 +179,9 @@ def test_plot_aim_heatmap(aims, ag3_sim_api):
     all_releases = api.releases
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize sample_query.

diff --git a/tests/anoph/test_base.py b/tests/anoph/test_base.py
@@ -8,6 +8,9 @@
 from malariagen_data import ag3 as _ag3
 from malariagen_data.anoph.base import AnophelesBase
 
+# Global RNG for test file; functions may override with local RNG for reproducibility
+rng = np.random.default_rng(seed=42)
+
 
 @pytest.fixture
 def ag3_sim_api(ag3_sim_fixture):
@@ -210,7 +213,7 @@ def test_lookup_study(fixture, api):
     # Set up test.
     df_sample_sets = api.sample_sets()
     all_sample_sets = df_sample_sets["sample_set"].values
-    sample_set = np.random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
 
     study_rec_by_sample_set = api.lookup_study(sample_set)
     df_sample_set = df_sample_sets.set_index("sample_set").loc[sample_set]

diff --git a/tests/anoph/test_cnv_data.py b/tests/anoph/test_cnv_data.py
@@ -1,5 +1,3 @@
-import random
-
 import bokeh.models
 import dask.array as da
 import numpy as np
@@ -13,6 +11,9 @@
 from malariagen_data import ag3 as _ag3
 from malariagen_data.anoph.cnv_data import AnophelesCnvData
 
+# Global RNG for test file; functions may override with local RNG for reproducibility
+rng = np.random.default_rng(seed=42)
+
 
 @pytest.fixture
 def ag3_sim_api(ag3_sim_fixture):
@@ -136,14 +137,14 @@ def test_open_cnv_coverage_calls(fixture, api: AnophelesCnvData):
     # Check with a sample set that should not exist
     with pytest.raises(ValueError):
         root = api.open_cnv_coverage_calls(
-            sample_set="foobar", analysis=random.choice(api.coverage_calls_analysis_ids)
+            sample_set="foobar", analysis=rng.choice(api.coverage_calls_analysis_ids)
         )
 
     # Check with an analysis that should not exist
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     with pytest.raises(ValueError):
         root = api.open_cnv_coverage_calls(
-            sample_set=random.choice(all_sample_sets), analysis="foobar"
+            sample_set=rng.choice(all_sample_sets), analysis="foobar"
         )
 
     # Check with a sample set and analysis that should not exist
@@ -343,15 +344,15 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize region.
     parametrize_region = [
         fixture.random_contig(),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
         fixture.random_region_str(),
     ]
 
@@ -421,11 +422,11 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
 def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
     # Set up test.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    sample_set = random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
     region = fixture.random_contig()
 
     # Parametrize max_coverage_variance.
-    parametrize_max_coverage_variance = np.random.uniform(low=0, high=1, size=4)
+    parametrize_max_coverage_variance = rng.uniform(low=0, high=1, size=4)
 
     for max_coverage_variance in parametrize_max_coverage_variance:
         ds = api.cnv_hmm(
@@ -465,15 +466,15 @@ def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
 def test_cnv_coverage_calls(fixture, api: AnophelesCnvData):
     # Parametrize sample_sets.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    parametrize_sample_sets = random.sample(all_sample_sets, 3)
+    parametrize_sample_sets = rng.choice(all_sample_sets, 3, replace=False).tolist()
 
     # Parametrize analysis.
     parametrize_analysis = api.coverage_calls_analysis_ids
 
     # Parametrize region.
     parametrize_region = [
         fixture.random_contig(),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
         fixture.random_region_str(),
     ]
 
@@ -551,15 +552,15 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize contig.
     parametrize_contig = [
-        random.choice(api.contigs),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
     ]
 
     for sample_sets in parametrize_sample_sets:
@@ -628,13 +629,13 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
     # Check with a contig that should not exist
     with pytest.raises(ValueError):
         api.cnv_discordant_read_calls(
-            contig="foobar", sample_sets=random.choice(all_sample_sets)
+            contig="foobar", sample_sets=rng.choice(all_sample_sets)
         )
 
     # Check with a sample set that should not exist
     with pytest.raises(ValueError):
         api.cnv_discordant_read_calls(
-            contig=random.choice(api.contigs), sample_sets="foobar"
+            contig=rng.choice(api.contigs), sample_sets="foobar"
         )
 
     # Check with a contig and sample set that should not exist
@@ -806,11 +807,11 @@ def test_cnv_discordant_read_calls__sample_query_options(
 def test_plot_cnv_hmm_coverage_track(fixture, api: AnophelesCnvData):
     # Set up test.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    sample_set = random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
     region = fixture.random_contig()
     df_samples = api.sample_metadata(sample_sets=sample_set)
     all_sample_ids = df_samples["sample_id"].values
-    sample_id = np.random.choice(all_sample_ids)
+    sample_id = rng.choice(all_sample_ids)
 
     fig = api.plot_cnv_hmm_coverage_track(
         sample=sample_id,
@@ -859,11 +860,11 @@ def test_plot_cnv_hmm_coverage_track(fixture, api: AnophelesCnvData):
 def test_plot_cnv_hmm_coverage(fixture, api: AnophelesCnvData):
     # Set up test.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    sample_set = random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
     region = fixture.random_contig()
     df_samples = api.sample_metadata(sample_sets=sample_set)
     all_sample_ids = df_samples["sample_id"].values
-    sample_id = np.random.choice(all_sample_ids)
+    sample_id = rng.choice(all_sample_ids)
 
     fig = api.plot_cnv_hmm_coverage(
         sample=sample_id,
@@ -913,9 +914,9 @@ def test_plot_cnv_hmm_heatmap_track(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     for region in parametrize_region: