Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,46 @@ jobs:
# https://github.com/coverallsapp/github-action
fail-on-error: false

optional_dependencies:
name: Optional dependencies
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Test optional dependencies
run: |
python -m venv env-tskit
source env-tskit/bin/activate
python -m pip install .
python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz > ts.txt 2>&1 || echo $? > ts_exit.txt
test "$(cat ts_exit.txt)" = "1"
grep -q "This process requires the optional tskit module. Install it with: pip install bio2zarr\[tskit\]" ts.txt
python -m pip install '.[tskit]'
python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz
deactivate

python -m venv env-plink
source env-plink/bin/activate
python -m pip install .
python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz > plink.txt 2>&1 || echo $? > plink_exit.txt
test "$(cat plink_exit.txt)" = "1"
grep -q "This process requires the optional bed_reader module. Install it with: pip install bio2zarr\[plink\]" plink.txt
python -m pip install '.[plink]'
python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz
deactivate

python -m venv env-vcf
source env-vcf/bin/activate
python -m pip install .
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz > vcf.txt 2>&1 || echo $? > vcf_exit.txt
test "$(cat vcf_exit.txt)" = "1"
grep -q "This process requires the optional cyvcf2 module. Install it with: pip install bio2zarr\[vcf\]" vcf.txt
python -m pip install '.[vcf]'
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz
deactivate

packaging:
name: Packaging
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:

- name: Install package
run: |
python3 -m pip install .
python3 -m pip install '.[all]'

- name: Build Docs
run: |
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# 0.1.6 2025-0X-XX

- Make format-specific dependencies optional (#385)

- Add contigs to plink output (#344)

Breaking changes
Expand Down
22 changes: 22 additions & 0 deletions bio2zarr/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import concurrent.futures as cf
import contextlib
import dataclasses
import functools
import importlib
import json
import logging
import math
Expand All @@ -21,6 +23,26 @@
numcodecs.blosc.use_threads = False


def requires_optional_dependency(module_name, extras_name):
"""Decorator to check for optional dependencies"""

def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
importlib.import_module(module_name)
except ImportError:
raise ImportError(
f"This process requires the optional {module_name} module. "
f"Install it with: pip install bio2zarr[{extras_name}]"
) from None
return func(*args, **kwargs)

return wrapper

return decorator


def display_number(x):
ret = "n/a"
if math.isfinite(x):
Expand Down
7 changes: 6 additions & 1 deletion bio2zarr/plink.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import pathlib

import bed_reader
import numpy as np
import zarr

Expand All @@ -11,7 +10,10 @@


class PlinkFormat(vcz.Source):
@core.requires_optional_dependency("bed_reader", "plink")
def __init__(self, path):
import bed_reader

self._path = pathlib.Path(path)
self.bed = bed_reader.open_bed(path, num_threads=1, count_A1=False)

Expand Down Expand Up @@ -175,7 +177,10 @@ def convert(

# FIXME do this more efficiently - currently reading the whole thing
# in for convenience, and also comparing call-by-call
@core.requires_optional_dependency("bed_reader", "plink")
def validate(bed_path, zarr_path):
import bed_reader

root = zarr.open(store=zarr_path, mode="r")
call_genotype = root["call_genotype"][:]

Expand Down
4 changes: 3 additions & 1 deletion bio2zarr/tskit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import pathlib

import numpy as np
import tskit

from bio2zarr import constants, core, vcz

logger = logging.getLogger(__name__)


class TskitFormat(vcz.Source):
@core.requires_optional_dependency("tskit", "tskit")
def __init__(
self,
ts_path,
Expand All @@ -18,6 +18,8 @@ def __init__(
contig_id=None,
isolated_as_missing=False,
):
import tskit

self._path = ts_path
self.ts = tskit.load(ts_path)
self.contig_id = contig_id if contig_id is not None else "1"
Expand Down
5 changes: 4 additions & 1 deletion bio2zarr/vcf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from enum import Enum
from typing import IO, Any

import cyvcf2
import humanfriendly
import numpy as np

from bio2zarr import core
from bio2zarr.typing import PathType

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -395,7 +395,10 @@ class VcfIndexType(Enum):


class VcfFile(contextlib.AbstractContextManager):
@core.requires_optional_dependency("cyvcf2", "vcf")
def __init__(self, vcf_path, index_path=None):
import cyvcf2

self.vcf = None
self.file_type = None
self.index_type = None
Expand Down
5 changes: 4 additions & 1 deletion bio2zarr/vcz_verification.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import cyvcf2
import numpy as np
import numpy.testing as nt
import tqdm
import zarr

from bio2zarr import core
from bio2zarr.zarr_utils import first_dim_iter

from . import constants
Expand Down Expand Up @@ -146,7 +146,10 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number):
nt.assert_equal(vcf_val, zarr_val)


@core.requires_optional_dependency("cyvcf2", "vcf")
def verify(vcf_path, zarr_path, show_progress=False):
import cyvcf2

root = zarr.open(store=zarr_path, mode="r")
pos = root["variant_position"][:]
allele = root["variant_allele"][:]
Expand Down
25 changes: 17 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,12 @@ dependencies = [
"zarr >= 2.17,< 3",
# Pinning numcodecs due to https://github.com/zarr-developers/zarr-python/issues/2963
"numcodecs[msgpack]!=0.14.0,!=0.14.1,<0.16",
"click",
"tabulate",
"tqdm",
"humanfriendly",
# cyvcf2 also pulls in coloredlogs and click",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep the comments here with the rationale for these dependencies. The only reason we're using coloredlogs is because it comes anyway with cyvcf2

# colouredlogs pulls in humanfriendly",
"cyvcf2",
"bed_reader",
# TODO Using dev version of tskit for CI, FIXME before release
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
# cyvcf2 also pulls in coloredlogs and click
"coloredlogs",
"click",
]
requires-python = ">=3.10"
classifiers = [
Expand Down Expand Up @@ -65,8 +61,21 @@ dev = [
"pytest-coverage",
"pytest-xdist",
"sgkit>=0.8.0",
"tqdm"
"tqdm",
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
"bed_reader",
"cyvcf2"
]
# TODO Using dev version of tskit for CI, FIXME before release
tskit = ["tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python"]
plink = ["bed_reader"]
vcf = ["cyvcf2"]
all = [
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
"bed_reader",
"cyvcf2"
]


[tool.setuptools]
packages = ["bio2zarr"]
Expand Down
11 changes: 11 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,14 @@ def test_examples(self, chunk_size, size, start, stop):
)
def test_du(path, expected):
assert core.du(path) == expected


def test_decorator_missing_dependency():
@core.requires_optional_dependency("non_existent_module", "extras")
def test_function():
return "success"

with pytest.raises(ImportError) as exc_info:
test_function()

assert "pip install bio2zarr[extras]" in str(exc_info.value)
17 changes: 17 additions & 0 deletions tests/test_plink.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from unittest import mock

import bed_reader
import numpy as np
import numpy.testing as nt
Expand Down Expand Up @@ -55,6 +57,21 @@ def test_genotypes(self, ds):
],
)

def test_missing_dependency(self):
with mock.patch(
"importlib.import_module",
side_effect=ImportError("No module named 'bed_reader'"),
):
with pytest.raises(ImportError) as exc_info:
plink.convert(
"UNUSED_PATH",
"UNUSED_PATH",
)
assert (
"This process requires the optional bed_reader module. "
"Install it with: pip install bio2zarr[plink]" in str(exc_info.value)
)


class TestEqualSgkit:
def test_simulated_example(self, tmp_path):
Expand Down
16 changes: 16 additions & 0 deletions tests/test_ts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import tempfile
from unittest import mock

import numpy as np
import pytest
Expand Down Expand Up @@ -93,6 +94,21 @@ def test_simple_tree_sequence(self, tmp_path):
"sample_id",
}

def test_missing_dependency(self):
with mock.patch(
"importlib.import_module",
side_effect=ImportError("No module named 'tskit'"),
):
with pytest.raises(ImportError) as exc_info:
ts.convert(
"UNUSED_PATH",
"UNUSED_PATH",
)
assert (
"This process requires the optional tskit module. Install "
"it with: pip install bio2zarr[tskit]" in str(exc_info.value)
)


class TestTskitFormat:
"""Unit tests for TskitFormat without using full conversion."""
Expand Down
17 changes: 17 additions & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections
import pathlib
import re
from unittest import mock

import cyvcf2
import numpy as np
Expand Down Expand Up @@ -481,6 +482,22 @@ def test_small_example_all_missing_gts(self, ds, tmp_path_factory):
p1[1] = True
nt.assert_array_equal(p1, ds2["call_genotype_phased"].values)

def test_missing_dependency(self, tmp_path):
with mock.patch(
"importlib.import_module",
side_effect=ImportError("No module named 'cyvcf2'"),
):
with pytest.raises(ImportError) as exc_info:
vcf_mod.convert(
["tests/data/vcf/sample.vcf.gz"],
tmp_path / "example.vcf.zarr",
worker_processes=0, # Synchronous mode so the mock works
)
assert (
"This process requires the optional cyvcf2 module. Install "
"it with: pip install bio2zarr[vcf]" in str(exc_info.value)
)


class TestSmallExampleLocalAlleles:
data_path = "tests/data/vcf/sample.vcf.gz"
Expand Down