Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
a1cdee3
Modernize NumPy random functions, fix mypy errors for issue#756
mohamed-laarej Apr 15, 2025
118d25e
Merge branch 'malariagen:master' into fix-numpy-random-tests-756-clean
mohamed-laarej Apr 15, 2025
8e9d1a5
Merge branch 'master' into fix-numpy-random-tests-756-clean
jonbrenas Apr 30, 2025
717136d
Some slight updates
jonbrenas Apr 30, 2025
e7ef120
Updates tests to consistently use the seeded NumPy random number
mohamed-laarej May 1, 2025
fa9fe3a
Merge branch 'fix-numpy-random-tests-756-clean' of github.com:mohamed…
jonbrenas May 12, 2025
4d41538
Merge branch 'master' into GH756-mohamed-laarej-shadow
jonbrenas May 12, 2025
3feb26e
Fix random number generation and type casting issues
mohamed-laarej May 16, 2025
504a73d
Fix test_frq.py to handle single-row dataframes in CI environment
mohamed-laarej May 16, 2025
78e8a31
Lowering n_snps from 50_000 to 10_000 in notebooks/karyotype.ipynb to…
mohamed-laarej May 17, 2025
3475ee6
Merge branch 'master' into GH756-mohamed-laarej-shadow
jonbrenas May 20, 2025
188e0a6
Merge branch 'master' into GH756-mohamed-laarej-shadow
jonbrenas May 30, 2025
194f2c1
Solving linting issue
jonbrenas May 30, 2025
6a23218
Update sample_metadata.py
jonbrenas May 30, 2025
a84edcf
Update sample_metadata.py
jonbrenas May 30, 2025
0ddf4ca
Update karyotype.ipynb
jonbrenas May 30, 2025
ee3f144
refactor: use private column name '_partition' in plot_haplotype_network
mohamed-laarej Aug 4, 2025
260a0d3
Merge branch 'GH756-mohamed-laarej-shadow' of https://github.com/mala…
mohamed-laarej Aug 4, 2025
0464808
Resolve merge conflicts
mohamed-laarej Aug 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions malariagen_data/anoph/snp_frq.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from numpydoc_decorator import doc # type: ignore
import xarray as xr
import numba # type: ignore

from .. import veff
from ..util import (
check_types,
Expand Down Expand Up @@ -577,8 +576,8 @@ def snp_allele_frequencies_advanced(
raise ValueError("No SNPs remaining after dropping invariant SNPs.")

df_variants = df_variants.loc[loc_variant].reset_index(drop=True)
count = np.compress(loc_variant, count, axis=0)
nobs = np.compress(loc_variant, nobs, axis=0)
count = np.compress(loc_variant, count, axis=0).reshape(-1, count.shape[1])
nobs = np.compress(loc_variant, nobs, axis=0).reshape(-1, nobs.shape[1])
frequency = np.compress(loc_variant, frequency, axis=0)

# Set up variant effect annotator.
Expand Down
1,296 changes: 673 additions & 623 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ license = "MIT"

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
numpy = "<2.2"
numpy = "*"
numba = ">=0.60.0"
llvmlite = "*"
scipy = "*"
Expand Down
157 changes: 80 additions & 77 deletions tests/anoph/conftest.py

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions tests/anoph/test_aim_data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import itertools
import random

import plotly.graph_objects as go
import pytest
import xarray as xr
from numpy.testing import assert_array_equal

import numpy as np
from malariagen_data import ag3 as _ag3
from malariagen_data.anoph.aim_data import AnophelesAimData

rng = np.random.default_rng(seed=42)


@pytest.fixture
def ag3_sim_api(ag3_sim_fixture):
Expand Down Expand Up @@ -88,9 +88,9 @@ def test_aim_calls(aims, ag3_sim_api):
all_releases = api.releases
parametrize_sample_sets = [
None,
random.choice(all_sample_sets),
random.sample(all_sample_sets, 2),
random.choice(all_releases),
rng.choice(all_sample_sets),
rng.choice(all_sample_sets, 2, replace=False).tolist(),
rng.choice(all_releases),
]

# Parametrize sample_query.
Expand Down Expand Up @@ -179,9 +179,9 @@ def test_plot_aim_heatmap(aims, ag3_sim_api):
all_releases = api.releases
parametrize_sample_sets = [
None,
random.choice(all_sample_sets),
random.sample(all_sample_sets, 2),
random.choice(all_releases),
rng.choice(all_sample_sets),
rng.choice(all_sample_sets, 2, replace=False).tolist(),
rng.choice(all_releases),
]

# Parametrize sample_query.
Expand Down
5 changes: 4 additions & 1 deletion tests/anoph/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from malariagen_data import ag3 as _ag3
from malariagen_data.anoph.base import AnophelesBase

# Global RNG for test file; functions may override with local RNG for reproducibility
rng = np.random.default_rng(seed=42)


@pytest.fixture
def ag3_sim_api(ag3_sim_fixture):
Expand Down Expand Up @@ -210,7 +213,7 @@ def test_lookup_study(fixture, api):
# Set up test.
df_sample_sets = api.sample_sets()
all_sample_sets = df_sample_sets["sample_set"].values
sample_set = np.random.choice(all_sample_sets)
sample_set = rng.choice(all_sample_sets)

study_rec_by_sample_set = api.lookup_study(sample_set)
df_sample_set = df_sample_sets.set_index("sample_set").loc[sample_set]
Expand Down
53 changes: 27 additions & 26 deletions tests/anoph/test_cnv_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import random

import bokeh.models
import dask.array as da
import numpy as np
Expand All @@ -13,6 +11,9 @@
from malariagen_data import ag3 as _ag3
from malariagen_data.anoph.cnv_data import AnophelesCnvData

# Global RNG for test file; functions may override with local RNG for reproducibility
rng = np.random.default_rng(seed=42)


@pytest.fixture
def ag3_sim_api(ag3_sim_fixture):
Expand Down Expand Up @@ -136,14 +137,14 @@ def test_open_cnv_coverage_calls(fixture, api: AnophelesCnvData):
# Check with a sample set that should not exist
with pytest.raises(ValueError):
root = api.open_cnv_coverage_calls(
sample_set="foobar", analysis=random.choice(api.coverage_calls_analysis_ids)
sample_set="foobar", analysis=rng.choice(api.coverage_calls_analysis_ids)
)

# Check with an analysis that should not exist
all_sample_sets = api.sample_sets()["sample_set"].to_list()
with pytest.raises(ValueError):
root = api.open_cnv_coverage_calls(
sample_set=random.choice(all_sample_sets), analysis="foobar"
sample_set=rng.choice(all_sample_sets), analysis="foobar"
)

# Check with a sample set and analysis that should not exist
Expand Down Expand Up @@ -343,15 +344,15 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
all_sample_sets = api.sample_sets()["sample_set"].to_list()
parametrize_sample_sets = [
None,
random.choice(all_sample_sets),
random.sample(all_sample_sets, 2),
random.choice(all_releases),
rng.choice(all_sample_sets),
rng.choice(all_sample_sets, 2, replace=False).tolist(),
rng.choice(all_releases),
]

# Parametrize region.
parametrize_region = [
fixture.random_contig(),
random.sample(api.contigs, 2),
rng.choice(api.contigs, 2, replace=False).tolist(),
fixture.random_region_str(),
]

Expand Down Expand Up @@ -421,11 +422,11 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
# Set up test.
all_sample_sets = api.sample_sets()["sample_set"].to_list()
sample_set = random.choice(all_sample_sets)
sample_set = rng.choice(all_sample_sets)
region = fixture.random_contig()

# Parametrize max_coverage_variance.
parametrize_max_coverage_variance = np.random.uniform(low=0, high=1, size=4)
parametrize_max_coverage_variance = rng.uniform(low=0, high=1, size=4)

for max_coverage_variance in parametrize_max_coverage_variance:
ds = api.cnv_hmm(
Expand Down Expand Up @@ -465,15 +466,15 @@ def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
def test_cnv_coverage_calls(fixture, api: AnophelesCnvData):
# Parametrize sample_sets.
all_sample_sets = api.sample_sets()["sample_set"].to_list()
parametrize_sample_sets = random.sample(all_sample_sets, 3)
parametrize_sample_sets = rng.choice(all_sample_sets, 3, replace=False).tolist()

# Parametrize analysis.
parametrize_analysis = api.coverage_calls_analysis_ids

# Parametrize region.
parametrize_region = [
fixture.random_contig(),
random.sample(api.contigs, 2),
rng.choice(api.contigs, 2, replace=False).tolist(),
fixture.random_region_str(),
]

Expand Down Expand Up @@ -551,15 +552,15 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
all_sample_sets = api.sample_sets()["sample_set"].to_list()
parametrize_sample_sets = [
None,
random.choice(all_sample_sets),
random.sample(all_sample_sets, 2),
random.choice(all_releases),
rng.choice(all_sample_sets),
rng.choice(all_sample_sets, 2, replace=False).tolist(),
rng.choice(all_releases),
]

# Parametrize contig.
parametrize_contig = [
random.choice(api.contigs),
random.sample(api.contigs, 2),
rng.choice(api.contigs),
rng.choice(api.contigs, 2, replace=False).tolist(),
]

for sample_sets in parametrize_sample_sets:
Expand Down Expand Up @@ -628,13 +629,13 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
# Check with a contig that should not exist
with pytest.raises(ValueError):
api.cnv_discordant_read_calls(
contig="foobar", sample_sets=random.choice(all_sample_sets)
contig="foobar", sample_sets=rng.choice(all_sample_sets)
)

# Check with a sample set that should not exist
with pytest.raises(ValueError):
api.cnv_discordant_read_calls(
contig=random.choice(api.contigs), sample_sets="foobar"
contig=rng.choice(api.contigs), sample_sets="foobar"
)

# Check with a contig and sample set that should not exist
Expand Down Expand Up @@ -806,11 +807,11 @@ def test_cnv_discordant_read_calls__sample_query_options(
def test_plot_cnv_hmm_coverage_track(fixture, api: AnophelesCnvData):
# Set up test.
all_sample_sets = api.sample_sets()["sample_set"].to_list()
sample_set = random.choice(all_sample_sets)
sample_set = rng.choice(all_sample_sets)
region = fixture.random_contig()
df_samples = api.sample_metadata(sample_sets=sample_set)
all_sample_ids = df_samples["sample_id"].values
sample_id = np.random.choice(all_sample_ids)
sample_id = rng.choice(all_sample_ids)

fig = api.plot_cnv_hmm_coverage_track(
sample=sample_id,
Expand Down Expand Up @@ -859,11 +860,11 @@ def test_plot_cnv_hmm_coverage_track(fixture, api: AnophelesCnvData):
def test_plot_cnv_hmm_coverage(fixture, api: AnophelesCnvData):
# Set up test.
all_sample_sets = api.sample_sets()["sample_set"].to_list()
sample_set = random.choice(all_sample_sets)
sample_set = rng.choice(all_sample_sets)
region = fixture.random_contig()
df_samples = api.sample_metadata(sample_sets=sample_set)
all_sample_ids = df_samples["sample_id"].values
sample_id = np.random.choice(all_sample_ids)
sample_id = rng.choice(all_sample_ids)

fig = api.plot_cnv_hmm_coverage(
sample=sample_id,
Expand Down Expand Up @@ -913,9 +914,9 @@ def test_plot_cnv_hmm_heatmap_track(fixture, api: AnophelesCnvData):
all_sample_sets = api.sample_sets()["sample_set"].to_list()
parametrize_sample_sets = [
None,
random.choice(all_sample_sets),
random.sample(all_sample_sets, 2),
random.choice(all_releases),
rng.choice(all_sample_sets),
rng.choice(all_sample_sets, 2, replace=False).tolist(),
rng.choice(all_releases),
]

for region in parametrize_region:
Expand Down
Loading
Loading