Skip tests that use scikit-allel if not installed

tomwhite · tomwhite · commit ce4305a98cb9 · 2024-07-11T16:14:51.000+01:00
diff --git a/sgkit/tests/io/vcf/test_vcf_scikit_allel.py b/sgkit/tests/io/vcf/test_vcf_scikit_allel.py
@@ -22,7 +22,6 @@
 from pathlib import Path
 from typing import Any
 
-import allel
 import numpy as np
 import pytest
 import xarray as xr
@@ -32,6 +31,8 @@
 from sgkit.io.utils import INT_FILL, INT_MISSING
 from sgkit.io.vcf import vcf_to_zarr
 
+allel = pytest.importorskip("allel")
+
 
 def assert_identical(ds1: Dataset, ds2: Dataset) -> None:
     """Assert two Datasets are identical, including dtypes for all variables, except strings."""
diff --git a/sgkit/tests/test_pca.py b/sgkit/tests/test_pca.py
@@ -1,6 +1,5 @@
 from typing import Any, Optional
 
-import allel
 import dask.array as da
 import numpy as np
 import pytest
@@ -54,6 +53,7 @@ def simulate_dataset(
 
 
 def allel_pca(gn: ArrayLike, randomized: bool = False, **kwargs: Any) -> Dataset:
+    allel = pytest.importorskip("allel")
     fn = allel.randomized_pca if randomized else allel.pca
     pcs, est = fn(gn, **kwargs)
     return xr.Dataset(
diff --git a/sgkit/tests/test_popgen.py b/sgkit/tests/test_popgen.py
@@ -1,13 +1,11 @@
 import itertools
 
-import allel
 import dask.array as da
 import msprime  # type: ignore
 import numpy as np
 import pytest
 import tskit  # type: ignore
 import xarray as xr
-from allel import hudson_fst
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
@@ -136,6 +134,8 @@ def test_diversity(sample_size, chunks, cohort_allele_count):
 
 @pytest.mark.parametrize("sample_size", [10])
 def test_diversity__windowed(sample_size):
+    allel = pytest.importorskip("allel")
+
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts)
     ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"])
@@ -223,6 +223,8 @@ def test_divergence__windowed(sample_size, n_cohorts, chunks):
 @pytest.mark.parametrize("chunks", [(-1, -1), (50, -1)])
 @pytest.mark.xfail()  # combine with test_divergence__windowed when this is passing
 def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, chunks):
+    allel = pytest.importorskip("allel")
+
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)
     ds, subsets = add_cohorts(ds, ts, n_cohorts)
@@ -272,6 +274,8 @@ def test_divergence__missing_calls():
 
 @pytest.mark.parametrize("sample_size", [2, 3, 10, 100])
 def test_Fst__Hudson(sample_size):
+    allel = pytest.importorskip("allel")
+
     # scikit-allel can only calculate Fst for pairs of cohorts (populations)
     n_cohorts = 2
     ts = simulate_ts(sample_size)
@@ -285,7 +289,7 @@ def test_Fst__Hudson(sample_size):
     # scikit-allel
     ac1 = ds.cohort_allele_count.values[:, 0, :]
     ac2 = ds.cohort_allele_count.values[:, 1, :]
-    num, den = hudson_fst(ac1, ac2)
+    num, den = allel.hudson_fst(ac1, ac2)
     ska_fst = np.sum(num) / np.sum(den)
 
     np.testing.assert_allclose(fst, ska_fst)
@@ -326,6 +330,8 @@ def test_Fst__unknown_estimator():
 )
 @pytest.mark.parametrize("chunks", [(-1, -1), (50, -1)])
 def test_Fst__windowed(sample_size, n_cohorts, chunks):
+    allel = pytest.importorskip("allel")
+
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)
     ds, subsets = add_cohorts(ds, ts, n_cohorts)
@@ -395,6 +401,8 @@ def test_Tajimas_D_per_site(sample_size):
     [(10, 3), (20, 4)],
 )
 def test_pbs(sample_size, n_cohorts):
+    allel = pytest.importorskip("allel")
+
     ts = simulate_ts(sample_size)
     ds = ts_to_dataset(ts)
     ds, subsets = add_cohorts(
@@ -432,6 +440,8 @@ def test_pbs(sample_size, n_cohorts):
 )
 @pytest.mark.parametrize("chunks", [(-1, -1), (50, -1)])
 def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
+    allel = pytest.importorskip("allel")
+
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)
     ds, subsets = add_cohorts(
@@ -475,6 +485,8 @@ def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
 def test_Garud_h(
     n_variants, n_samples, n_contigs, n_cohorts, cohorts, cohort_indexes, chunks
 ):
+    allel = pytest.importorskip("allel")
+
     ds = simulate_genotype_call_dataset(
         n_variant=n_variants, n_sample=n_samples, n_contig=n_contigs
     )
@@ -674,6 +686,8 @@ def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
 def test_observed_heterozygosity__scikit_allel_comparison(
     n_variant, n_sample, missing_pct, window_size, seed
 ):
+    allel = pytest.importorskip("allel")
+
     ds = simulate_genotype_call_dataset(
         n_variant=n_variant,
         n_sample=n_sample,
diff --git a/sgkit/tests/test_preprocessing.py b/sgkit/tests/test_preprocessing.py
@@ -1,7 +1,6 @@
 from contextlib import nullcontext
 from typing import Any
 
-import allel.stats.preprocessing
 import dask
 import dask.array as da
 import numpy as np
@@ -30,6 +29,8 @@ def simulate_alternate_allele_counts(
 @pytest.mark.parametrize("shape", [(100, 50), (50, 100), (50, 50)])
 @pytest.mark.parametrize("ploidy", [2, 4])
 def test_patterson_scaler__allel_comparison(shape, ploidy):
+    allel = pytest.importorskip("allel")
+
     ac = simulate_alternate_allele_counts(*shape, ploidy=ploidy)  # type: ignore[misc]
     expected = sgkit.stats.preprocessing.PattersonScaler(ploidy=ploidy).fit_transform(
         ac
diff --git a/sgkit/tests/test_vcfzarr_reader.py b/sgkit/tests/test_vcfzarr_reader.py
@@ -1,4 +1,3 @@
-import allel
 import numpy as np
 import pytest
 import xarray as xr
@@ -8,6 +7,8 @@
 from sgkit import read_scikit_allel_vcfzarr
 from sgkit.io.vcfzarr_reader import _ensure_2d, vcfzarr_to_zarr
 
+allel = pytest.importorskip("allel")
+
 
 def create_vcfzarr(
     shared_datadir, tmpdir, *, fields=None, grouped_by_contig=False, consolidated=False
diff --git a/sgkit/tests/test_window.py b/sgkit/tests/test_window.py
@@ -1,6 +1,5 @@
 import re
 
-import allel
 import dask.array as da
 import numpy as np
 import pandas as pd
@@ -29,6 +28,8 @@
 )
 @pytest.mark.parametrize("dtype", [np.int64, np.float32, np.float64])
 def test_moving_statistic_1d(length, chunks, size, step, dtype):
+    allel = pytest.importorskip("allel")
+
     values = da.from_array(np.arange(length, dtype=dtype), chunks=chunks)
 
     stat = moving_statistic(values, np.sum, size=size, step=step, dtype=values.dtype)
@@ -49,6 +50,8 @@ def test_moving_statistic_1d(length, chunks, size, step, dtype):
 )
 @pytest.mark.parametrize("dtype", [np.int64, np.float32, np.float64])
 def test_moving_statistic_2d(length, chunks, size, step, dtype):
+    allel = pytest.importorskip("allel")
+
     arr = np.arange(length * 3, dtype=dtype).reshape(length, 3)
 
     def sum_cols(x):