malariagen
diff --git a/‎tests/anoph/conftest.py‎
Lines changed: 27 additions & 22 deletions b/‎tests/anoph/conftest.py‎
Lines changed: 27 additions & 22 deletions
diff --git a/‎tests/anoph/test_aim_data.py‎
Lines changed: 9 additions & 9 deletions b/‎tests/anoph/test_aim_data.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎tests/anoph/test_cnv_data.py‎
Lines changed: 20 additions & 22 deletions b/‎tests/anoph/test_cnv_data.py‎
Lines changed: 20 additions & 22 deletions
@@ -2,7 +2,6 @@
 import shutil
 import string
 from pathlib import Path
-from random import choice, choices, randint
 from typing import Any, Dict, Tuple
 
 import numpy as np
@@ -40,7 +39,7 @@ def fixture_dir():
 
 
 def simulate_contig(*, low, high, base_composition):
-    size = rng.integers(low=low, high=high)
+    size = int(rng.integers(low=low, high=high))
     bases = np.array([b"a", b"c", b"g", b"t", b"n", b"A", b"C", b"G", b"T", b"N"])
     p = np.array([base_composition[b] for b in bases])
     seq = rng.choice(bases, size=size, replace=True, p=p)
@@ -151,9 +150,9 @@ def simulate_genes(self, *, contig, contig_size):
         # Simulate genes.
         for gene_ix in range(self.max_genes):
             gene_id = f"gene-{contig}-{gene_ix}"
-            strand = choice(["+", "-"])
-            inter_size = randint(self.inter_size_low, self.inter_size_high)
-            gene_size = randint(self.gene_size_low, self.gene_size_high)
+            strand = rng.choice(["+", "-"])
+            inter_size = int(rng.integers(self.inter_size_low, self.inter_size_high))
+            gene_size = int(rng.integers(self.gene_size_low, self.gene_size_high))
             if strand == "+":
                 gene_start = cur_fwd + inter_size
             else:
@@ -166,7 +165,11 @@ def simulate_genes(self, *, contig, contig_size):
             gene_attrs = f"ID={gene_id}"
             for attr in self.attrs:
                 random_str = "".join(
-                    choices(string.ascii_uppercase + string.digits, k=5)
+                    rng.choice(
+                        list(string.ascii_uppercase + string.digits),
+                        size=5,
+                        replace=True,
+                    )
                 )
                 gene_attrs += f";{attr}={random_str}"
             gene = (
@@ -212,7 +215,7 @@ def simulate_transcripts(
         # accurate in real data.
 
         for transcript_ix in range(
-            randint(self.n_transcripts_low, self.n_transcripts_high)
+            int(rng.integers(self.n_transcripts_low, self.n_transcripts_high))
         ):
             transcript_id = f"transcript-{contig}-{gene_ix}-{transcript_ix}"
             transcript_start = gene_start
@@ -260,13 +263,16 @@ def simulate_exons(
         transcript_size = transcript_end - transcript_start
         exons = []
         exon_end = transcript_start
-        n_exons = randint(self.n_exons_low, self.n_exons_high)
+        n_exons = int(rng.integers(self.n_exons_low, self.n_exons_high))
         for exon_ix in range(n_exons):
             exon_id = f"exon-{contig}-{gene_ix}-{transcript_ix}-{exon_ix}"
             if exon_ix > 0:
                 # Insert an intron between this exon and the previous one.
-                intron_size = randint(
-                    self.intron_size_low, min(transcript_size, self.intron_size_high)
+                intron_size = int(
+                    rng.integers(
+                        self.intron_size_low,
+                        min(transcript_size, self.intron_size_high),
+                    )
                 )
                 exon_start = exon_end + intron_size
                 if exon_start >= transcript_end:
@@ -275,7 +281,7 @@ def simulate_exons(
             else:
                 # First exon, assume exon starts where the transcript starts.
                 exon_start = transcript_start
-            exon_size = randint(self.exon_size_low, self.exon_size_high)
+            exon_size = int(rng.integers(self.exon_size_low, self.exon_size_high))
             exon_end = min(exon_start + exon_size, transcript_end)
             assert exon_end > exon_start
             exon = (
@@ -311,7 +317,7 @@ def simulate_exons(
             else:
                 feature_type = self.cds_type
                 # Cheat a little, random phase.
-                phase = choice([1, 2, 3])
+                phase = rng.choice([1, 2, 3])
             feature = (
                 contig,
                 self.source,
@@ -549,7 +555,7 @@ def simulate_aim_variants(path, contigs, snp_sites, n_sites_low, n_sites_high):
         # Simulate AIM positions variable.
         snp_pos = snp_sites[f"{contig}/variants/POS"][:]
         loc_aim_sites = rng.choice(
-            snp_pos.shape[0], size=rng.integers(n_sites_low, n_sites_high)
+            snp_pos.shape[0], size=int(rng.integers(n_sites_low, n_sites_high))
         )
         loc_aim_sites.sort()
         aim_pos = snp_pos[loc_aim_sites]
@@ -731,11 +737,10 @@ def simulate_cnv_coverage_calls(zarr_path, metadata_path, contigs, contig_sizes)
         contig_length_bp = contig_sizes[contig]
 
         # Get a random number of CNV alleles ("variants") to simulate.
-        n_cnv_alleles = rng.integers(1, 5_000)
+        n_cnv_alleles = int(rng.integers(1, 5_000))
 
         # Produce a set of random start positions for each allele as a sorted list.
         allele_start_pos = sorted(rng.integers(1, contig_length_bp, size=n_cnv_alleles))
-
         # Produce a set of random allele lengths for each allele, according to a range.
         allele_length_bp_min = 100
         allele_length_bp_max = 100_000
@@ -874,7 +879,7 @@ def simulate_cnv_discordant_read_calls(zarr_path, metadata_path, contigs, contig
         contig_length_bp = contig_sizes[contig]
 
         # Get a random number of CNV variants to simulate.
-        n_cnv_variants = rng.integers(1, 100)
+        n_cnv_variants = int(rng.integers(1, 100))
 
         # Produce a set of random start positions for each variant as a sorted list.
         variant_start_pos = sorted(
@@ -1010,28 +1015,28 @@ def contigs(self) -> Tuple[str, ...]:
         return tuple(self.config["CONTIGS"])
 
     def random_contig(self):
-        return choice(self.contigs)
+        return rng.choice(self.contigs)
 
     def random_transcript_id(self):
         df_transcripts = self.genome_features.query("type == 'mRNA'")
         transcript_ids = [
             gff3_parse_attributes(t)["ID"] for t in df_transcripts.loc[:, "attributes"]
         ]
-        transcript_id = choice(transcript_ids)
+        transcript_id = rng.choice(transcript_ids)
         return transcript_id
 
     def random_region_str(self, region_size=None):
         contig = self.random_contig()
         contig_size = self.contig_sizes[contig]
-        region_start = randint(1, contig_size)
+        region_start = int(rng.integers(1, contig_size))
         if region_size:
             # Ensure we the region span doesn't exceed the contig size.
             if contig_size - region_start < region_size:
                 region_start = contig_size - region_size
 
             region_end = region_start + region_size
         else:
-            region_end = randint(region_start, contig_size)
+            region_end = int(rng.integers(region_start, contig_size))
         region = f"{contig}:{region_start:,}-{region_end:,}"
         return region
 
@@ -1133,7 +1138,7 @@ def init_public_release_manifest(self):
         manifest = pd.DataFrame(
             {
                 "sample_set": ["AG1000G-AO", "AG1000G-BF-A"],
-                "sample_count": [randint(10, 50), randint(10, 40)],
+                "sample_count": [int(rng.integers(10, 50)), int(rng.integers(10, 40))],
                 "study_id": ["AG1000G-AO", "AG1000G-BF-1"],
                 "study_url": [
                     "https://www.malariagen.net/network/where-we-work/AG1000G-AO",
@@ -1165,7 +1170,7 @@ def init_pre_release_manifest(self):
                     "1177-VO-ML-LEHMANN-VMF00004",
                 ],
                 # Make sure we have some gambiae, coluzzii and arabiensis.
-                "sample_count": [randint(20, 60)],
+                "sample_count": [int(rng.integers(20, 60))],
                 "study_id": ["1177-VO-ML-LEHMANN"],
                 "study_url": [
                     "https://www.malariagen.net/network/where-we-work/1177-VO-ML-LEHMANN"
 
@@ -1,14 +1,14 @@
 import itertools
-import random
-
 import plotly.graph_objects as go
 import pytest
 import xarray as xr
 from numpy.testing import assert_array_equal
-
+import numpy as np
 from malariagen_data import ag3 as _ag3
 from malariagen_data.anoph.aim_data import AnophelesAimData
 
+rng = np.random.default_rng(seed=42)
+
 
 @pytest.fixture
 def ag3_sim_api(ag3_sim_fixture):
@@ -88,9 +88,9 @@ def test_aim_calls(aims, ag3_sim_api):
     all_releases = api.releases
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize sample_query.
@@ -179,9 +179,9 @@ def test_plot_aim_heatmap(aims, ag3_sim_api):
     all_releases = api.releases
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize sample_query.
 
@@ -1,5 +1,3 @@
-import random
-
 import bokeh.models
 import dask.array as da
 import numpy as np
@@ -139,14 +137,14 @@ def test_open_cnv_coverage_calls(fixture, api: AnophelesCnvData):
     # Check with a sample set that should not exist
     with pytest.raises(ValueError):
         root = api.open_cnv_coverage_calls(
-            sample_set="foobar", analysis=random.choice(api.coverage_calls_analysis_ids)
+            sample_set="foobar", analysis=rng.choice(api.coverage_calls_analysis_ids)
         )
 
     # Check with an analysis that should not exist
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     with pytest.raises(ValueError):
         root = api.open_cnv_coverage_calls(
-            sample_set=random.choice(all_sample_sets), analysis="foobar"
+            sample_set=rng.choice(all_sample_sets), analysis="foobar"
         )
 
     # Check with a sample set and analysis that should not exist
@@ -346,15 +344,15 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize region.
     parametrize_region = [
         fixture.random_contig(),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
         fixture.random_region_str(),
     ]
 
@@ -424,7 +422,7 @@ def test_cnv_hmm(fixture, api: AnophelesCnvData):
 def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
     # Set up test.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    sample_set = random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
     region = fixture.random_contig()
 
     # Parametrize max_coverage_variance.
@@ -468,15 +466,15 @@ def test_cnv_hmm__max_coverage_variance(fixture, api: AnophelesCnvData):
 def test_cnv_coverage_calls(fixture, api: AnophelesCnvData):
     # Parametrize sample_sets.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    parametrize_sample_sets = random.sample(all_sample_sets, 3)
+    parametrize_sample_sets = rng.choice(all_sample_sets, 3, replace=False).tolist()
 
     # Parametrize analysis.
     parametrize_analysis = api.coverage_calls_analysis_ids
 
     # Parametrize region.
     parametrize_region = [
         fixture.random_contig(),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
         fixture.random_region_str(),
     ]
 
@@ -554,15 +552,15 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     # Parametrize contig.
     parametrize_contig = [
-        random.choice(api.contigs),
-        random.sample(api.contigs, 2),
+        rng.choice(api.contigs),
+        rng.choice(api.contigs, 2, replace=False).tolist(),
     ]
 
     for sample_sets in parametrize_sample_sets:
@@ -631,13 +629,13 @@ def test_cnv_discordant_read_calls(fixture, api: AnophelesCnvData):
     # Check with a contig that should not exist
     with pytest.raises(ValueError):
         api.cnv_discordant_read_calls(
-            contig="foobar", sample_sets=random.choice(all_sample_sets)
+            contig="foobar", sample_sets=rng.choice(all_sample_sets)
         )
 
     # Check with a sample set that should not exist
     with pytest.raises(ValueError):
         api.cnv_discordant_read_calls(
-            contig=random.choice(api.contigs), sample_sets="foobar"
+            contig=rng.choice(api.contigs), sample_sets="foobar"
         )
 
     # Check with a contig and sample set that should not exist
@@ -809,7 +807,7 @@ def test_cnv_discordant_read_calls__sample_query_options(
 def test_plot_cnv_hmm_coverage_track(fixture, api: AnophelesCnvData):
     # Set up test.
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
-    sample_set = random.choice(all_sample_sets)
+    sample_set = rng.choice(all_sample_sets)
     region = fixture.random_contig()
     df_samples = api.sample_metadata(sample_sets=sample_set)
     all_sample_ids = df_samples["sample_id"].values
@@ -916,9 +914,9 @@ def test_plot_cnv_hmm_heatmap_track(fixture, api: AnophelesCnvData):
     all_sample_sets = api.sample_sets()["sample_set"].to_list()
     parametrize_sample_sets = [
         None,
-        random.choice(all_sample_sets),
-        random.sample(all_sample_sets, 2),
-        random.choice(all_releases),
+        rng.choice(all_sample_sets),
+        rng.choice(all_sample_sets, 2, replace=False).tolist(),
+        rng.choice(all_releases),
     ]
 
     for region in parametrize_region: