Skip to content

Commit c3355e3

Browse files
committed
Make format dependancies optional
1 parent 587a29e commit c3355e3

File tree

12 files changed

+157
-11
lines changed

12 files changed

+157
-11
lines changed

.github/workflows/ci.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,46 @@ jobs:
8181
# https://github.com/coverallsapp/github-action
8282
fail-on-error: false
8383

84+
optional_dependencies:
85+
name: Optional dependencies
86+
runs-on: ubuntu-latest
87+
steps:
88+
- uses: actions/checkout@v4
89+
- uses: actions/setup-python@v5
90+
with:
91+
python-version: '3.12'
92+
- name: Check optional dependencies
93+
run: |
94+
python -m venv env-tskit
95+
source env-tskit/bin/activate
96+
python -m pip install .
97+
{ python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz 2>&1; echo $? > ts_exit.txt; } | tee ts.txt
98+
test "$(cat ts_exit.txt)" = "1"
99+
grep -q "This process requires the optional tskit module. Install it with: pip install bio2zarr\[tskit\]" ts.txt
100+
python -m pip install '.[tskit]'
101+
python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz
102+
deactivate
103+
104+
python -m venv env-plink
105+
source env-plink/bin/activate
106+
python -m pip install .
107+
{ python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz 2>&1; echo $? > plink_exit.txt; } | tee plink.txt
108+
test "$(cat plink_exit.txt)" = "1"
109+
grep -q "This process requires the optional bed_reader module. Install it with: pip install bio2zarr\[plink\]" plink.txt
110+
python -m pip install '.[plink]'
111+
python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz
112+
deactivate
113+
114+
python -m venv env-vcf
115+
source env-vcf/bin/activate
116+
python -m pip install .
117+
{ python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz 2>&1; echo $? > vcf_exit.txt; } | tee vcf.txt
118+
test "$(cat vcf_exit.txt)" = "1"
119+
grep -q "This process requires the optional cyvcf2 module. Install it with: pip install bio2zarr\[vcf\]" vcf.txt
120+
python -m pip install '.[vcf]'
121+
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz
122+
deactivate
123+
84124
packaging:
85125
name: Packaging
86126
runs-on: ubuntu-latest

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# 0.1.6 2025-0X-XX
22

3+
- Make format-specific dependencies optional (#XXX)
4+
35
- Add contigs to plink output (#344)
46

57
Breaking changes

bio2zarr/core.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import concurrent.futures as cf
22
import contextlib
33
import dataclasses
4+
import functools
5+
import importlib
46
import json
57
import logging
68
import math
@@ -21,6 +23,26 @@
2123
numcodecs.blosc.use_threads = False
2224

2325

26+
def requires_optional_dependency(module_name, extras_name):
27+
"""Decorator to check for optional dependencies"""
28+
29+
def decorator(func):
30+
@functools.wraps(func)
31+
def wrapper(*args, **kwargs):
32+
try:
33+
importlib.import_module(module_name)
34+
except ImportError:
35+
raise ImportError(
36+
f"This process requires the optional {module_name} module. "
37+
f"Install it with: pip install bio2zarr[{extras_name}]"
38+
) from None
39+
return func(*args, **kwargs)
40+
41+
return wrapper
42+
43+
return decorator
44+
45+
2446
def display_number(x):
2547
ret = "n/a"
2648
if math.isfinite(x):

bio2zarr/plink.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22
import pathlib
33

4-
import bed_reader
54
import numpy as np
65
import zarr
76

@@ -11,7 +10,10 @@
1110

1211

1312
class PlinkFormat(vcz.Source):
13+
@core.requires_optional_dependency("bed_reader", "plink")
1414
def __init__(self, path):
15+
import bed_reader
16+
1517
self._path = pathlib.Path(path)
1618
self.bed = bed_reader.open_bed(path, num_threads=1, count_A1=False)
1719

@@ -175,7 +177,10 @@ def convert(
175177

176178
# FIXME do this more efficiently - currently reading the whole thing
177179
# in for convenience, and also comparing call-by-call
180+
@core.requires_optional_dependency("bed_reader", "plink")
178181
def validate(bed_path, zarr_path):
182+
import bed_reader
183+
179184
root = zarr.open(store=zarr_path, mode="r")
180185
call_genotype = root["call_genotype"][:]
181186

bio2zarr/tskit.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
import pathlib
33

44
import numpy as np
5-
import tskit
65

76
from bio2zarr import constants, core, vcz
87

98
logger = logging.getLogger(__name__)
109

1110

1211
class TskitFormat(vcz.Source):
12+
@core.requires_optional_dependency("tskit", "tskit")
1313
def __init__(
1414
self,
1515
ts_path,
@@ -18,6 +18,8 @@ def __init__(
1818
contig_id=None,
1919
isolated_as_missing=False,
2020
):
21+
import tskit
22+
2123
self._path = ts_path
2224
self.ts = tskit.load(ts_path)
2325
self.contig_id = contig_id if contig_id is not None else "1"

bio2zarr/vcf_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
from enum import Enum
1010
from typing import IO, Any
1111

12-
import cyvcf2
1312
import humanfriendly
1413
import numpy as np
1514

15+
from bio2zarr import core
1616
from bio2zarr.typing import PathType
1717

1818
logger = logging.getLogger(__name__)
@@ -395,7 +395,10 @@ class VcfIndexType(Enum):
395395

396396

397397
class VcfFile(contextlib.AbstractContextManager):
398+
@core.requires_optional_dependency("cyvcf2", "vcf")
398399
def __init__(self, vcf_path, index_path=None):
400+
import cyvcf2
401+
399402
self.vcf = None
400403
self.file_type = None
401404
self.index_type = None

bio2zarr/vcz_verification.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import cyvcf2
21
import numpy as np
32
import numpy.testing as nt
43
import tqdm
54
import zarr
65

6+
from bio2zarr import core
77
from bio2zarr.zarr_utils import first_dim_iter
88

99
from . import constants
@@ -146,7 +146,10 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number):
146146
nt.assert_equal(vcf_val, zarr_val)
147147

148148

149+
@core.requires_optional_dependency("cyvcf2", "vcf")
149150
def verify(vcf_path, zarr_path, show_progress=False):
151+
import cyvcf2
152+
150153
root = zarr.open(store=zarr_path, mode="r")
151154
pos = root["variant_position"][:]
152155
allele = root["variant_allele"][:]

pyproject.toml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@ dependencies = [
2121
"tabulate",
2222
"tqdm",
2323
"humanfriendly",
24-
# cyvcf2 also pulls in coloredlogs and click",
25-
# colouredlogs pulls in humanfriendly",
26-
"cyvcf2",
27-
"bed_reader",
28-
# TODO Using dev version of tskit for CI, FIXME before release
29-
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
24+
"coloredlogs"
3025
]
3126
requires-python = ">=3.10"
3227
classifiers = [
@@ -65,8 +60,21 @@ dev = [
6560
"pytest-coverage",
6661
"pytest-xdist",
6762
"sgkit>=0.8.0",
68-
"tqdm"
63+
"tqdm",
64+
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
65+
"bed_reader",
66+
"cyvcf2"
6967
]
68+
# TODO Using dev version of tskit for CI, FIXME before release
69+
tskit = ["tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python"]
70+
plink = ["bed_reader"]
71+
vcf = ["cyvcf2"]
72+
all = [
73+
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
74+
"bed_reader",
75+
"cyvcf2"
76+
]
77+
7078

7179
[tool.setuptools]
7280
packages = ["bio2zarr"]

tests/test_core.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,14 @@ def test_examples(self, chunk_size, size, start, stop):
244244
)
245245
def test_du(path, expected):
246246
assert core.du(path) == expected
247+
248+
249+
def test_decorator_missing_dependency():
250+
@core.requires_optional_dependency("non_existent_module", "extras")
251+
def test_function():
252+
return "success"
253+
254+
with pytest.raises(ImportError) as exc_info:
255+
test_function()
256+
257+
assert "pip install bio2zarr[extras]" in str(exc_info.value)

tests/test_plink.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from unittest import mock
2+
13
import bed_reader
24
import numpy as np
35
import numpy.testing as nt
@@ -55,6 +57,21 @@ def test_genotypes(self, ds):
5557
],
5658
)
5759

60+
def test_missing_dependency(self):
61+
with mock.patch(
62+
"importlib.import_module",
63+
side_effect=ImportError("No module named 'bed_reader'"),
64+
):
65+
with pytest.raises(ImportError) as exc_info:
66+
plink.convert(
67+
"UNUSED_PATH",
68+
"UNUSED_PATH",
69+
)
70+
assert (
71+
"This process requires the optional bed_reader module. "
72+
"Install it with: pip install bio2zarr[plink]" in str(exc_info.value)
73+
)
74+
5875

5976
class TestEqualSgkit:
6077
def test_simulated_example(self, tmp_path):

0 commit comments

Comments
 (0)