diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d40fb9bb..4229f0fd 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -69,12 +69,6 @@ jobs: strategy: matrix: include: - - python-version: '3.8' - conda-env: py38 - spec: conda - - python-version: '3.8' - conda-env: py38 - spec: main - python-version: '3.9' conda-env: dev spec: conda @@ -174,7 +168,7 @@ jobs: path: bioimageio_cache key: ${{matrix.run-expensive-tests && needs.populate-cache.outputs.cache-key || needs.populate-cache.outputs.cache-key-light}} - name: pytest - run: pytest --disable-pytest-warnings + run: pytest --cov bioimageio --cov-report xml --cov-append --capture no --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} diff --git a/README.md b/README.md index 589d4666..d12ca689 100644 --- a/README.md +++ b/README.md @@ -364,6 +364,10 @@ may be controlled with the `LOGURU_LEVEL` environment variable. ## Changelog +### 0.9.0 (coming soon) + +- update to [bioimageio.spec 0.5.4.3](https://github.com/bioimage-io/spec-bioimage-io/blob/main/changelog.md#bioimageiospec-0543) + ### 0.8.0 - breaking: removed `decimals` argument from bioimageio CLI and `bioimageio.core.commands.test()` diff --git a/bioimageio/core/__init__.py b/bioimageio/core/__init__.py index c7554372..d37be4d4 100644 --- a/bioimageio/core/__init__.py +++ b/bioimageio/core/__init__.py @@ -3,6 +3,7 @@ """ from bioimageio.spec import ( + ValidationSummary, build_description, dump_description, load_dataset_description, @@ -112,4 +113,5 @@ "test_model", "test_resource", "validate_format", + "ValidationSummary", ] diff --git a/bioimageio/core/_resource_tests.py b/bioimageio/core/_resource_tests.py index 327e540a..3beb3576 100644 --- a/bioimageio/core/_resource_tests.py +++ b/bioimageio/core/_resource_tests.py @@ -21,6 +21,7 @@ overload, ) +import xarray as xr from loguru import logger from typing_extensions import NotRequired, TypedDict, Unpack, assert_never, get_args @@ -55,6 +56,7 @@ InstalledPackage, ValidationDetail, ValidationSummary, + WarningEntry, ) from ._prediction_pipeline import create_prediction_pipeline @@ -510,7 +512,7 @@ def load_description_and_test( enable_determinism(determinism, weight_formats=weight_formats) for w in weight_formats: - _test_model_inference(rd, w, devices, **deprecated) + _test_model_inference(rd, w, devices, stop_early=stop_early, **deprecated) if stop_early and rd.validation_summary.status == "failed": break @@ -587,14 +589,16 @@ def _test_model_inference( model: Union[v0_4.ModelDescr, v0_5.ModelDescr], weight_format: SupportedWeightsFormat, devices: Optional[Sequence[str]], + stop_early: bool, **deprecated: Unpack[DeprecatedKwargs], ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" logger.debug("starting '{}'", test_name) - errors: List[ErrorEntry] = [] + error_entries: List[ErrorEntry] = [] + warning_entries: List[WarningEntry] = [] def add_error_entry(msg: str, with_traceback: bool = False): - errors.append( + error_entries.append( ErrorEntry( loc=("weights", weight_format), msg=msg, @@ -603,6 +607,15 @@ def add_error_entry(msg: str, with_traceback: bool = False): ) ) + def add_warning_entry(msg: str): + warning_entries.append( + WarningEntry( + loc=("weights", weight_format), + msg=msg, + type="bioimageio.core", + ) + ) + try: inputs = get_test_inputs(model) expected = get_test_outputs(model) @@ -622,34 +635,58 @@ def add_error_entry(msg: str, with_traceback: bool = False): actual = results.members.get(m) if actual is None: add_error_entry("Output tensors for test case may not be None") - break + if stop_early: + break + else: + continue rtol, atol, mismatched_tol = _get_tolerance( model, wf=weight_format, m=m, **deprecated ) - mismatched = (abs_diff := abs(actual - expected)) > atol + rtol * abs( - expected - ) + rtol_value = rtol * abs(expected) + abs_diff = abs(actual - expected) + mismatched = abs_diff > atol + rtol_value mismatched_elements = mismatched.sum().item() - if mismatched_elements / expected.size > mismatched_tol / 1e6: - r_max_idx = (r_diff := (abs_diff / (abs(expected) + 1e-6))).argmax() - r_max = r_diff[r_max_idx].item() - r_actual = actual[r_max_idx].item() - r_expected = expected[r_max_idx].item() - a_max_idx = abs_diff.argmax() - a_max = abs_diff[a_max_idx].item() - a_actual = actual[a_max_idx].item() - a_expected = expected[a_max_idx].item() - add_error_entry( - f"Output '{m}' disagrees with {mismatched_elements} of" - + f" {expected.size} expected values." - + f"\n Max relative difference: {r_max:.2e}" - + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e} + 1e-6\|)" - + f" at {r_max_idx}" - + f"\n Max absolute difference: {a_max:.2e}" - + rf" (= \|{a_actual:.7e} - {a_expected:.7e}\|) at {a_max_idx}" - ) - break + if not mismatched_elements: + continue + + mismatched_ppm = mismatched_elements / expected.size * 1e6 + abs_diff[~mismatched] = 0 # ignore non-mismatched elements + + r_max_idx = (r_diff := (abs_diff / (abs(expected) + 1e-6))).argmax() + r_max = r_diff[r_max_idx].item() + r_actual = actual[r_max_idx].item() + r_expected = expected[r_max_idx].item() + + # Calculate the max absolute difference with the relative tolerance subtracted + abs_diff_wo_rtol: xr.DataArray = xr.ufuncs.maximum( + (abs_diff - rtol_value).data, 0 + ) + a_max_idx = { + AxisId(k): int(v) for k, v in abs_diff_wo_rtol.argmax().items() + } + + a_max = abs_diff[a_max_idx].item() + a_actual = actual[a_max_idx].item() + a_expected = expected[a_max_idx].item() + + msg = ( + f"Output '{m}' disagrees with {mismatched_elements} of" + + f" {expected.size} expected values" + + f" ({mismatched_ppm:.1f} ppm)." + + f"\n Max relative difference: {r_max:.2e}" + + rf" (= \|{r_actual:.2e} - {r_expected:.2e}\|/\|{r_expected:.2e} + 1e-6\|)" + + f" at {r_max_idx}" + + f"\n Max absolute difference not accounted for by relative tolerance: {a_max:.2e}" + + rf" (= \|{a_actual:.7e} - {a_expected:.7e}\|) at {a_max_idx}" + ) + if mismatched_ppm > mismatched_tol: + add_error_entry(msg) + if stop_early: + break + else: + add_warning_entry(msg) + except Exception as e: if get_validation_context().raise_errors: raise e @@ -660,9 +697,10 @@ def add_error_entry(msg: str, with_traceback: bool = False): ValidationDetail( name=test_name, loc=("weights", weight_format), - status="failed" if errors else "passed", + status="failed" if error_entries else "passed", recommended_env=get_conda_env(entry=dict(model.weights)[weight_format]), - errors=errors, + errors=error_entries, + warnings=warning_entries, ) ) diff --git a/bioimageio/core/_settings.py b/bioimageio/core/_settings.py index c95cf55d..96052346 100644 --- a/bioimageio/core/_settings.py +++ b/bioimageio/core/_settings.py @@ -1,13 +1,10 @@ from typing import Literal -from dotenv import load_dotenv from pydantic import Field from typing_extensions import Annotated from bioimageio.spec._internal._settings import Settings as SpecSettings -_ = load_dotenv() - class Settings(SpecSettings): """environment variables for bioimageio.spec and bioimageio.core""" diff --git a/bioimageio/core/backends/keras_backend.py b/bioimageio/core/backends/keras_backend.py index 1c10da7d..b11fb718 100644 --- a/bioimageio/core/backends/keras_backend.py +++ b/bioimageio/core/backends/keras_backend.py @@ -1,20 +1,27 @@ import os +import shutil +from pathlib import Path +from tempfile import TemporaryDirectory from typing import Any, Optional, Sequence, Union +import h5py # pyright: ignore[reportMissingTypeStubs] +from keras.src.legacy.saving import ( # pyright: ignore[reportMissingTypeStubs] + legacy_h5_format, +) from loguru import logger from numpy.typing import NDArray -from bioimageio.spec._internal.io import download -from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import Version from .._settings import settings from ..digest_spec import get_axes_infos +from ..utils._type_guards import is_list, is_tuple from ._model_adapter import ModelAdapter os.environ["KERAS_BACKEND"] = settings.keras_backend + # by default, we use the keras integrated with tensorflow # TODO: check if we should prefer keras try: @@ -67,9 +74,18 @@ def __init__( devices, ) - weight_path = download(model_description.weights.keras_hdf5.source).path + weight_reader = model_description.weights.keras_hdf5.get_reader() + if weight_reader.suffix in (".h5", "hdf5"): + h5_file = h5py.File(weight_reader, mode="r") + self._network = legacy_h5_format.load_model_from_hdf5(h5_file) + else: + with TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) / weight_reader.original_file_name + with temp_path.open("wb") as f: + shutil.copyfileobj(weight_reader, f) + + self._network = keras.models.load_model(temp_path) - self._network = keras.models.load_model(weight_path) self._output_axes = [ tuple(a.id for a in get_axes_infos(out)) for out in model_description.outputs diff --git a/bioimageio/core/backends/onnx_backend.py b/bioimageio/core/backends/onnx_backend.py index d5b89152..9d8412e5 100644 --- a/bioimageio/core/backends/onnx_backend.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -5,11 +5,10 @@ import onnxruntime as rt # pyright: ignore[reportMissingTypeStubs] from numpy.typing import NDArray -from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download from ..model_adapters import ModelAdapter +from ..utils._type_guards import is_list, is_tuple class ONNXModelAdapter(ModelAdapter): @@ -24,8 +23,8 @@ def __init__( if model_description.weights.onnx is None: raise ValueError("No ONNX weights specified for {model_description.name}") - local_path = download(model_description.weights.onnx.source).path - self._session = rt.InferenceSession(local_path.read_bytes()) + reader = model_description.weights.onnx.get_reader() + self._session = rt.InferenceSession(reader.read()) onnx_inputs = self._session.get_inputs() self._input_names: List[str] = [ipt.name for ipt in onnx_inputs] diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py index af1ea85d..3d4a422b 100644 --- a/bioimageio/core/backends/pytorch_backend.py +++ b/bioimageio/core/backends/pytorch_backend.py @@ -1,7 +1,7 @@ import gc import warnings from contextlib import nullcontext -from io import TextIOWrapper +from io import BytesIO, TextIOWrapper from pathlib import Path from typing import Any, List, Literal, Optional, Sequence, Union @@ -11,12 +11,13 @@ from torch import nn from typing_extensions import assert_never -from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple -from bioimageio.spec.common import ZipPath +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.common import BytesReader, ZipPath from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.utils import download from ..digest_spec import import_callable +from ..utils._type_guards import is_list, is_ndarray, is_tuple from ._model_adapter import ModelAdapter @@ -73,7 +74,9 @@ def _forward_impl( if r is None: result.append(None) elif isinstance(r, torch.Tensor): - r_np: NDArray[Any] = r.detach().cpu().numpy() + r_np: NDArray[Any] = ( # pyright: ignore[reportUnknownVariableType] + r.detach().cpu().numpy() + ) result.append(r_np) elif is_ndarray(r): result.append(r) @@ -129,7 +132,7 @@ def load_torch_model( if load_state: torch_model = load_torch_state_dict( torch_model, - path=download(weight_spec).path, + path=download(weight_spec), devices=use_devices, ) return torch_model @@ -137,26 +140,41 @@ def load_torch_model( def load_torch_state_dict( model: nn.Module, - path: Union[Path, ZipPath], + path: Union[Path, ZipPath, BytesReader], devices: Sequence[torch.device], ) -> nn.Module: model = model.to(devices[0]) - with path.open("rb") as f: + if isinstance(path, (Path, ZipPath)): + ctxt = path.open("rb") + else: + ctxt = nullcontext(BytesIO(path.read())) + + with ctxt as f: assert not isinstance(f, TextIOWrapper) - state = torch.load(f, map_location=devices[0], weights_only=True) + if Version(str(torch.__version__)) < Version("1.13"): + state = torch.load(f, map_location=devices[0]) + else: + state = torch.load(f, map_location=devices[0], weights_only=True) incompatible = model.load_state_dict(state) if ( - incompatible is not None # pyright: ignore[reportUnnecessaryComparison] - and incompatible.missing_keys + isinstance(incompatible, tuple) + and hasattr(incompatible, "missing_keys") + and hasattr(incompatible, "unexpected_keys") ): - logger.warning("Missing state dict keys: {}", incompatible.missing_keys) + if incompatible.missing_keys: + logger.warning("Missing state dict keys: {}", incompatible.missing_keys) - if ( - incompatible is not None # pyright: ignore[reportUnnecessaryComparison] - and incompatible.unexpected_keys - ): - logger.warning("Unexpected state dict keys: {}", incompatible.unexpected_keys) + if hasattr(incompatible, "unexpected_keys") and incompatible.unexpected_keys: + logger.warning( + "Unexpected state dict keys: {}", incompatible.unexpected_keys + ) + else: + logger.warning( + "`model.load_state_dict()` unexpectedly returned: {} " + + "(expected named tuple with `missing_keys` and `unexpected_keys` attributes)", + (s[:20] + "..." if len(s := str(incompatible)) > 20 else s), + ) return model diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py index ce3ba131..8c2de21b 100644 --- a/bioimageio/core/backends/torchscript_backend.py +++ b/bioimageio/core/backends/torchscript_backend.py @@ -6,11 +6,10 @@ import torch from numpy.typing import NDArray -from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download from ..model_adapters import ModelAdapter +from ..utils._type_guards import is_list, is_tuple class TorchscriptModelAdapter(ModelAdapter): @@ -26,7 +25,6 @@ def __init__( f"No torchscript weights found for model {model_description.name}" ) - weight_path = download(model_description.weights.torchscript.source).path if devices is None: self.devices = ["cuda" if torch.cuda.is_available() else "cpu"] else: @@ -37,8 +35,8 @@ def __init__( "Multiple devices for single torchscript model not yet implemented" ) - with weight_path.open("rb") as f: - self._model = torch.jit.load(f) + weight_reader = model_description.weights.torchscript.get_reader() + self._model = torch.jit.load(weight_reader) self._model.to(self.devices[0]) self._model = self._model.eval() diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index 8e62239d..9be8f2a6 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -57,13 +57,12 @@ update_hashes, ) from bioimageio.spec._internal.io import is_yaml_value -from bioimageio.spec._internal.io_basics import ZipPath from bioimageio.spec._internal.io_utils import open_bioimageio_yaml -from bioimageio.spec._internal.types import NotEmpty +from bioimageio.spec._internal.types import FormatVersionPlaceholder, NotEmpty from bioimageio.spec.dataset import DatasetDescr from bioimageio.spec.model import ModelDescr, v0_4, v0_5 from bioimageio.spec.notebook import NotebookDescr -from bioimageio.spec.utils import download, ensure_description_is_model, write_yaml +from bioimageio.spec.utils import ensure_description_is_model, get_reader, write_yaml from .commands import WeightFormatArgAll, WeightFormatArgAny, package, test from .common import MemberId, SampleId, SupportedWeightsFormat @@ -205,6 +204,15 @@ class TestCmd(CmdBase, WithSource, WithSummaryLogging): ) """Do not run further subtests after a failed one.""" + format_version: Union[FormatVersionPlaceholder, str] = Field( + "discover", alias="format-version" + ) + """The format version to use for testing. + - 'latest': Use the latest implemented format version for the given resource type (may trigger auto updating) + - 'discover': Use the format version as described in the resource description + - '0.4', '0.5', ...: Use the specified format version (may trigger auto updating) + """ + def run(self): sys.exit( test( @@ -214,6 +222,7 @@ def run(self): summary=self.summary, runtime_env=self.runtime_env, determinism=self.determinism, + format_version=self.format_version, ) ) @@ -487,18 +496,14 @@ def _example(self): example_path.mkdir(exist_ok=True) for t, src in zip(input_ids, example_inputs): - local = download(src).path - dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}") + reader = get_reader(src) + dst = Path(f"{example_path}/{t}/001{reader.suffix}") dst.parent.mkdir(parents=True, exist_ok=True) inputs001.append(dst.as_posix()) - if isinstance(local, Path): - shutil.copy(local, dst) - elif isinstance(local, ZipPath): - _ = local.root.extract(local.at, path=dst) - else: - assert_never(local) + with dst.open("wb") as f: + shutil.copyfileobj(reader, f) - inputs = [tuple(inputs001)] + inputs = [inputs001] output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" bioimageio_cli_path = example_path / YAML_FILE @@ -510,7 +515,7 @@ def _example(self): stats=stats_file, blockwise=self.blockwise, ) - assert is_yaml_value(cli_example_args) + assert is_yaml_value(cli_example_args), cli_example_args write_yaml( cli_example_args, bioimageio_cli_path, diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index 7184014c..36982854 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -13,6 +13,7 @@ save_bioimageio_package, save_bioimageio_package_as_folder, ) +from bioimageio.spec._internal.types import FormatVersionPlaceholder from ._resource_tests import test_description @@ -32,6 +33,7 @@ def test( Literal["currently-active", "as-described"], Path ] = "currently-active", determinism: Literal["seed_only", "full"] = "seed_only", + format_version: Union[FormatVersionPlaceholder, str] = "discover", ) -> int: """Test a bioimageio resource. @@ -42,6 +44,7 @@ def test( else: test_summary = test_description( descr, + format_version=format_version, weight_format=None if weight_format == "all" else weight_format, devices=[devices] if isinstance(devices, str) else devices, runtime_env=runtime_env, diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index fb0462f5..6a10b645 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -1,11 +1,11 @@ from __future__ import annotations import collections.abc -import hashlib import importlib.util import sys from itertools import chain from pathlib import Path +from tempfile import TemporaryDirectory from typing import ( Any, Callable, @@ -19,6 +19,7 @@ Tuple, Union, ) +from zipfile import ZipFile, is_zipfile import numpy as np import xarray as xr @@ -35,9 +36,8 @@ ArchitectureFromLibraryDescr, ParameterizedSize_N, ) -from bioimageio.spec.utils import download, load_array +from bioimageio.spec.utils import load_array -from ._settings import settings from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks from .common import Halo, MemberId, PerMember, SampleId, TotalNumberOfBlocks @@ -89,54 +89,51 @@ def _import_from_file_impl( ): src_descr = FileDescr(source=source, **kwargs) # ensure sha is valid even if perform_io_checks=False - src_descr.validate_sha256() + # or the source has changed since last sha computation + src_descr.validate_sha256(force_recompute=True) assert src_descr.sha256 is not None + source_sha = src_descr.sha256 - local_source = src_descr.download() - - source_bytes = local_source.path.read_bytes() - assert isinstance(source_bytes, bytes) - source_sha = hashlib.sha256(source_bytes).hexdigest() - + reader = src_descr.get_reader() # make sure we have unique module name - module_name = f"{local_source.path.stem}_{source_sha}" + module_name = f"{reader.original_file_name.split('.')[0]}_{source_sha}" - # make sure we have a valid module name + # make sure we have a unique and valid module name if not module_name.isidentifier(): module_name = f"custom_module_{source_sha}" assert module_name.isidentifier(), module_name + source_bytes = reader.read() + module = sys.modules.get(module_name) if module is None: try: - if isinstance(local_source.path, Path): - module_path = local_source.path - elif isinstance(local_source.path, ZipPath): - # save extract source to cache - # loading from a file from disk ensure we get readable tracebacks - # if any errors occur - module_path = ( - settings.cache_path / f"{source_sha}-{local_source.path.name}" - ) - _ = module_path.write_bytes(source_bytes) + tmp_dir = TemporaryDirectory(ignore_cleanup_errors=True) + module_path = Path(tmp_dir.name) / module_name + if reader.original_file_name.endswith(".zip") or is_zipfile(reader): + module_path.mkdir() + ZipFile(reader).extractall(path=module_path) else: - assert_never(local_source.path) + module_path = module_path.with_suffix(".py") + _ = module_path.write_bytes(source_bytes) importlib_spec = importlib.util.spec_from_file_location( - module_name, module_path + module_name, str(module_path) ) if importlib_spec is None: raise ImportError(f"Failed to import {source}") module = importlib.util.module_from_spec(importlib_spec) + + sys.modules[module_name] = module # cache this module + assert importlib_spec.loader is not None importlib_spec.loader.exec_module(module) except Exception as e: + del sys.modules[module_name] raise ImportError(f"Failed to import {source}") from e - else: - sys.modules[module_name] = module # cache this module try: callable_attr = getattr(module, callable_name) @@ -378,21 +375,13 @@ def get_tensor( if isinstance(src, Tensor): return src - - if isinstance(src, xr.DataArray): + elif isinstance(src, xr.DataArray): return Tensor.from_xarray(src) - - if isinstance(src, np.ndarray): + elif isinstance(src, np.ndarray): return Tensor.from_numpy(src, dims=get_axes_infos(ipt)) - - if isinstance(src, FileDescr): - src = download(src).path - - if isinstance(src, (ZipPath, Path, str)): + else: return load_tensor(src, axes=get_axes_infos(ipt)) - assert_never(src) - def create_sample_for_model( model: AnyModelDescr, diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index dc5b70db..55a87bda 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,7 +1,6 @@ import collections.abc import warnings import zipfile -from io import TextIOWrapper from pathlib import Path, PurePosixPath from shutil import copyfileobj from typing import ( @@ -15,15 +14,17 @@ ) import h5py # pyright: ignore[reportMissingTypeStubs] -import numpy as np from imageio.v3 import imread, imwrite # type: ignore from loguru import logger from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter from typing_extensions import assert_never -from bioimageio.spec._internal.io import interprete_file_source +from bioimageio.spec._internal.io import get_reader, interprete_file_source +from bioimageio.spec._internal.type_guards import is_ndarray from bioimageio.spec.common import ( + BytesReader, + FileSource, HttpUrl, PermissiveFileSource, RelativeFilePath, @@ -65,51 +66,51 @@ def load_image( else: src = parsed_source - # FIXME: why is pyright complaining about giving the union to _split_dataset_path? if isinstance(src, Path): - file_source, subpath = _split_dataset_path(src) + file_source, suffix, subpath = _split_dataset_path(src) elif isinstance(src, HttpUrl): - file_source, subpath = _split_dataset_path(src) + file_source, suffix, subpath = _split_dataset_path(src) elif isinstance(src, ZipPath): - file_source, subpath = _split_dataset_path(src) + file_source, suffix, subpath = _split_dataset_path(src) else: assert_never(src) - path = download(file_source).path - - if path.suffix == ".npy": + if suffix == ".npy": if subpath is not None: - raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}") - return load_array(path) - elif path.suffix in SUFFIXES_WITH_DATAPATH: + logger.warning( + "Unexpected subpath {} for .npy source {}", subpath, file_source + ) + + image = load_array(file_source) + elif suffix in SUFFIXES_WITH_DATAPATH: if subpath is None: dataset_path = DEFAULT_H5_DATASET_PATH else: dataset_path = str(subpath) - with h5py.File(path, "r") as f: + reader = download(file_source) + + with h5py.File(reader, "r") as f: h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType] dataset_path ) if not isinstance(h5_dataset, h5py.Dataset): raise ValueError( - f"{path} is not of type {h5py.Dataset}, but has type " + f"{file_source} did not load as {h5py.Dataset}, but has type " + str( type(h5_dataset) # pyright: ignore[reportUnknownArgumentType] ) ) image: NDArray[Any] image = h5_dataset[:] # pyright: ignore[reportUnknownVariableType] - assert isinstance(image, np.ndarray), type( - image # pyright: ignore[reportUnknownArgumentType] - ) - return image # pyright: ignore[reportUnknownVariableType] - elif isinstance(path, ZipPath): - return imread( - path.read_bytes(), extension=path.suffix - ) # pyright: ignore[reportUnknownVariableType] else: - return imread(path) # pyright: ignore[reportUnknownVariableType] + reader = download(file_source) + image = imread( # pyright: ignore[reportUnknownVariableType] + reader.read(), extension=suffix + ) + + assert is_ndarray(image) + return image def load_tensor( @@ -123,19 +124,21 @@ def load_tensor( _SourceT = TypeVar("_SourceT", Path, HttpUrl, ZipPath) +Suffix = str + def _split_dataset_path( source: _SourceT, -) -> Tuple[_SourceT, Optional[PurePosixPath]]: +) -> Tuple[_SourceT, Suffix, Optional[PurePosixPath]]: """Split off subpath (e.g. internal h5 dataset path) from a file path following a file extension. Examples: >>> _split_dataset_path(Path("my_file.h5/dataset")) - (...Path('my_file.h5'), PurePosixPath('dataset')) + (...Path('my_file.h5'), '.h5', PurePosixPath('dataset')) >>> _split_dataset_path(Path("my_plain_file")) - (...Path('my_plain_file'), None) + (...Path('my_plain_file'), '', None) """ if isinstance(source, RelativeFilePath): @@ -148,42 +151,47 @@ def _split_dataset_path( def separate_pure_path(path: PurePosixPath): for p in path.parents: if p.suffix in SUFFIXES_WITH_DATAPATH: - return p, PurePosixPath(path.relative_to(p)) + return p, p.suffix, PurePosixPath(path.relative_to(p)) - return path, None + return path, path.suffix, None if isinstance(src, HttpUrl): - file_path, data_path = separate_pure_path(PurePosixPath(src.path or "")) + file_path, suffix, data_path = separate_pure_path(PurePosixPath(src.path or "")) if data_path is None: - return src, None + return src, suffix, None return ( HttpUrl(str(file_path).replace(f"/{data_path}", "")), + suffix, data_path, ) if isinstance(src, ZipPath): - file_path, data_path = separate_pure_path(PurePosixPath(str(src))) + file_path, suffix, data_path = separate_pure_path(PurePosixPath(str(src))) if data_path is None: - return src, None + return src, suffix, None return ( ZipPath(str(file_path).replace(f"/{data_path}", "")), + suffix, data_path, ) - file_path, data_path = separate_pure_path(PurePosixPath(src)) - return Path(file_path), data_path + file_path, suffix, data_path = separate_pure_path(PurePosixPath(src)) + return Path(file_path), suffix, data_path def save_tensor(path: Union[Path, str], tensor: Tensor) -> None: # TODO: save axis meta data - data: NDArray[Any] = tensor.data.to_numpy() - file_path, subpath = _split_dataset_path(Path(path)) - if not file_path.suffix: + data: NDArray[Any] = ( # pyright: ignore[reportUnknownVariableType] + tensor.data.to_numpy() + ) + assert is_ndarray(data) + file_path, suffix, subpath = _split_dataset_path(Path(path)) + if not suffix: raise ValueError(f"No suffix (needed to decide file format) found in {path}") file_path.parent.mkdir(exist_ok=True, parents=True) @@ -191,7 +199,7 @@ def save_tensor(path: Union[Path, str], tensor: Tensor) -> None: if subpath is not None: raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}") save_array(file_path, data) - elif file_path.suffix in (".h5", ".hdf", ".hdf5"): + elif suffix in (".h5", ".hdf", ".hdf5"): if subpath is None: dataset_path = DEFAULT_H5_DATASET_PATH else: @@ -272,25 +280,48 @@ def load_dataset_stat(path: Path): return {e.measure: e.value for e in seq} -def ensure_unzipped(source: Union[PermissiveFileSource, ZipPath], folder: Path): - """unzip a (downloaded) **source** to a file in **folder** if source is a zip archive. - Always returns the path to the unzipped source (maybe source itself)""" - local_weights_file = download(source).path - if isinstance(local_weights_file, ZipPath): - # source is inside a zip archive - out_path = folder / local_weights_file.filename - with local_weights_file.open("rb") as src, out_path.open("wb") as dst: - assert not isinstance(src, TextIOWrapper) - copyfileobj(src, dst) +def ensure_unzipped( + source: Union[PermissiveFileSource, ZipPath, BytesReader], folder: Path +): + """unzip a (downloaded) **source** to a file in **folder** if source is a zip archive + otherwise copy **source** to a file in **folder**.""" + if isinstance(source, BytesReader): + weights_reader = source + else: + weights_reader = get_reader(source) - local_weights_file = out_path + out_path = folder / ( + weights_reader.original_file_name or f"file{weights_reader.suffix}" + ) - if zipfile.is_zipfile(local_weights_file): + if zipfile.is_zipfile(weights_reader): + out_path = out_path.with_name(out_path.name + ".unzipped") + out_path.parent.mkdir(exist_ok=True, parents=True) # source itself is a zipfile - out_path = folder / local_weights_file.with_suffix(".unzipped").name - with zipfile.ZipFile(local_weights_file, "r") as f: + with zipfile.ZipFile(weights_reader, "r") as f: f.extractall(out_path) - return out_path else: - return local_weights_file + out_path.parent.mkdir(exist_ok=True, parents=True) + with out_path.open("wb") as f: + copyfileobj(weights_reader, f) + + return out_path + + +def get_suffix(source: Union[ZipPath, FileSource]) -> str: + if isinstance(source, Path): + return source.suffix + elif isinstance(source, ZipPath): + return source.suffix + if isinstance(source, RelativeFilePath): + return source.path.suffix + elif isinstance(source, ZipPath): + return source.suffix + elif isinstance(source, HttpUrl): + if source.path is None: + return "" + else: + return PurePosixPath(source.path).suffix + else: + assert_never(source) diff --git a/bioimageio/core/tensor.py b/bioimageio/core/tensor.py index cb3b3da9..9d69e970 100644 --- a/bioimageio/core/tensor.py +++ b/bioimageio/core/tensor.py @@ -66,7 +66,15 @@ def __array__(self, dtype: DTypeLike = None): return np.asarray(self._data, dtype=dtype) def __getitem__( - self, key: Union[SliceInfo, slice, int, PerAxis[Union[SliceInfo, slice, int]]] + self, + key: Union[ + SliceInfo, + slice, + int, + PerAxis[Union[SliceInfo, slice, int]], + Tensor, + xr.DataArray, + ], ) -> Self: if isinstance(key, SliceInfo): key = slice(*key) @@ -75,11 +83,27 @@ def __getitem__( a: s if isinstance(s, int) else s if isinstance(s, slice) else slice(*s) for a, s in key.items() } + elif isinstance(key, Tensor): + key = key._data + return self.__class__.from_xarray(self._data[key]) - def __setitem__(self, key: PerAxis[Union[SliceInfo, slice]], value: Tensor) -> None: - key = {a: s if isinstance(s, slice) else slice(*s) for a, s in key.items()} - self._data[key] = value._data + def __setitem__( + self, + key: Union[PerAxis[Union[SliceInfo, slice]], Tensor, xr.DataArray], + value: Union[Tensor, xr.DataArray, float, int], + ) -> None: + if isinstance(key, Tensor): + key = key._data + elif isinstance(key, xr.DataArray): + pass + else: + key = {a: s if isinstance(s, slice) else slice(*s) for a, s in key.items()} + + if isinstance(value, Tensor): + value = value._data + + self._data[key] = value def __len__(self) -> int: return len(self.data) diff --git a/bioimageio/core/utils/_type_guards.py b/bioimageio/core/utils/_type_guards.py new file mode 100644 index 00000000..0a33b808 --- /dev/null +++ b/bioimageio/core/utils/_type_guards.py @@ -0,0 +1,8 @@ +"""use these type guards with caution! +They widen the type to T[Any], which is not always correct.""" + +from bioimageio.spec._internal import type_guards + +is_list = type_guards.is_list +is_ndarray = type_guards.is_ndarray +is_tuple = type_guards.is_tuple diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py index ac8886e1..09f54344 100644 --- a/bioimageio/core/weight_converters/keras_to_tensorflow.py +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -1,12 +1,12 @@ import os import shutil from pathlib import Path +from tempfile import TemporaryDirectory from typing import Union, no_type_check from zipfile import ZipFile import tensorflow # pyright: ignore[reportMissingTypeStubs] -from bioimageio.spec._internal.io import download from bioimageio.spec._internal.version_type import Version from bioimageio.spec.common import ZipPath from bioimageio.spec.model.v0_5 import ( @@ -70,7 +70,7 @@ def convert( raise ValueError("Missing Keras Hdf5 weights to convert from.") weight_spec = model_descr.weights.keras_hdf5 - weight_path = download(weight_spec.source).path + weight_reader = weight_spec.get_reader() if weight_spec.tensorflow_version: model_tf_major_ver = int(weight_spec.tensorflow_version.major) @@ -79,30 +79,34 @@ def convert( f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" ) - if tf_major_ver == 1: - if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: - raise NotImplementedError( - "Weight conversion for models with multiple inputs or outputs is not yet implemented." - ) - - input_name = str( - d.id - if isinstance((d := model_descr.inputs[0]), InputTensorDescr) - else d.name - ) - output_name = str( - d.id - if isinstance((d := model_descr.outputs[0]), OutputTensorDescr) - else d.name - ) - return _convert_tf1( - ensure_unzipped(weight_path, Path("bioimageio_unzipped_tf_weights")), - output_path, - input_name, - output_name, + with TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: + local_weights = ensure_unzipped( + weight_reader, Path(temp_dir) / "bioimageio_unzipped_tf_weights" ) - else: - return _convert_tf2(weight_path, output_path) + if tf_major_ver == 1: + if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: + raise NotImplementedError( + "Weight conversion for models with multiple inputs or outputs is not yet implemented." + ) + + input_name = str( + d.id + if isinstance((d := model_descr.inputs[0]), InputTensorDescr) + else d.name + ) + output_name = str( + d.id + if isinstance((d := model_descr.outputs[0]), OutputTensorDescr) + else d.name + ) + return _convert_tf1( + ensure_unzipped(local_weights, Path("bioimageio_unzipped_tf_weights")), + output_path, + input_name, + output_name, + ) + else: + return _convert_tf2(local_weights, output_path) def _convert_tf2( diff --git a/bioimageio/core/weight_converters/torchscript_to_onnx.py b/bioimageio/core/weight_converters/torchscript_to_onnx.py index d58b47ab..774e2875 100644 --- a/bioimageio/core/weight_converters/torchscript_to_onnx.py +++ b/bioimageio/core/weight_converters/torchscript_to_onnx.py @@ -3,7 +3,6 @@ import torch.jit from bioimageio.spec.model.v0_5 import ModelDescr, OnnxWeightsDescr -from bioimageio.spec.utils import download from .. import __version__ from ..digest_spec import get_member_id, get_test_inputs @@ -55,8 +54,8 @@ def convert( ] inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] - weight_path = download(torchscript_descr).path - model = torch.jit.load(weight_path) # type: ignore + weight_reader = torchscript_descr.get_reader() + model = torch.jit.load(weight_reader) # type: ignore model.to("cpu") model = model.eval() # type: ignore diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 7654f5ab..3f63037b 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -18,10 +18,10 @@ build: requirements: host: - - python >=3.8,<3.13 + - python >=3.9,<3.13 - pip run: - - python >=3.8,<3.13 + - python >=3.9,<3.13 {% for dep in setup_py_data['install_requires'] %} - {{ dep.lower() }} {% endfor %} diff --git a/dev/env-dev.yaml b/dev/env-dev.yaml index 13378376..38cbb289 100644 --- a/dev/env-dev.yaml +++ b/dev/env-dev.yaml @@ -5,10 +5,11 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec==0.5.4.1 + - bioimageio.spec==0.5.4.3 - black # - crick # currently requires python<=3.9 - h5py + - httpx - imagecodecs - imageio>=2.5 - jupyter @@ -16,6 +17,7 @@ dependencies: - keras>=3.0,<4 - loguru - matplotlib + - napari - numpy - onnx - onnxruntime @@ -31,7 +33,7 @@ dependencies: - pytest-cov # - python=3.11 # removed - pytorch>=2.1,<3 - - requests + - respx - rich - ruff - ruyaml diff --git a/dev/env-full.yaml b/dev/env-full.yaml index a9dc0132..993a22c5 100644 --- a/dev/env-full.yaml +++ b/dev/env-full.yaml @@ -4,9 +4,8 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec==0.5.4.1 + - bioimageio.spec==0.5.4.3 - black - # - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) - cellpose # for model testing # - crick # currently requires python<=3.9 - h5py @@ -18,6 +17,7 @@ dependencies: - loguru - matplotlib - monai # for model testing + - napari - numpy - onnx - onnxruntime @@ -33,7 +33,8 @@ dependencies: - pytest-cov - python=3.11 # 3.12 not supported by cellpose->fastremap - pytorch>=2.1,<3 - - requests + - httpx + - respx - rich - ruff - ruyaml @@ -45,5 +46,6 @@ dependencies: - typing-extensions - xarray>=2024.01,<2025.3.0 - pip: + # - careamics # for model testing (TODO: install without exact bioimageio.core pin) - git+https://github.com/ChaoningZhang/MobileSAM.git # for model testing - -e .. diff --git a/dev/env-gpu.yaml b/dev/env-gpu.yaml index 7fc2123c..25229679 100644 --- a/dev/env-gpu.yaml +++ b/dev/env-gpu.yaml @@ -4,7 +4,7 @@ channels: - conda-forge - nodefaults dependencies: - - bioimageio.spec==0.5.4.1 + - bioimageio.spec==0.5.4.3 - black - cellpose # for model testing # - crick # currently requires python<=3.9 @@ -30,7 +30,8 @@ dependencies: - pytest - pytest-cov - python=3.11 - - requests + - httpx + - respx - rich - ruff - ruyaml @@ -42,7 +43,7 @@ dependencies: - pip: # - tf2onnx # TODO: add tf2onnx - --extra-index-url https://download.pytorch.org/whl/cu126 - - careamics # TODO: add careamics for model testing (currently pins pydantic to <2.9) + # - careamics # for model testing (TODO: install without exact bioimageio.core pin) - git+https://github.com/ChaoningZhang/MobileSAM.git # for model testing - onnxruntime-gpu - tensorflow diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 6fc6597a..d280bbd5 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -1,3 +1,4 @@ +# DEPRECATED # manipulated copy of env-full.yaml wo dependencies 'for model testing' for python 3.8 name: core-py38 channels: @@ -5,7 +6,7 @@ channels: - nodefaults - pytorch dependencies: - - bioimageio.spec==0.5.4.1 + - bioimageio.spec==0.5.4.3 - black - crick # uncommented - h5py @@ -31,7 +32,8 @@ dependencies: - pytest-cov - python=3.8 # changed - pytorch>=2.1,<3 - - requests + - httpx + - respx - rich - ruff - ruyaml diff --git a/pyproject.toml b/pyproject.toml index 5d58fe72..db264984 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov bioimageio --cov-report xml --cov-append --capture no --doctest-modules --failed-first --ignore dogfood --ignore bioimageio/core/backends --ignore bioimageio/core/weight_converters" +addopts = "--doctest-modules --failed-first --ignore dogfood --ignore bioimageio/core/backends --ignore bioimageio/core/weight_converters" testpaths = ["bioimageio/core", "tests"] [tool.ruff] diff --git a/setup.py b/setup.py index f4e975a5..563e1954 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,6 @@ classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -30,7 +29,7 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.4.1", + "bioimageio.spec ==0.5.4.3", "h5py", "imagecodecs", "imageio>=2.10", @@ -38,7 +37,6 @@ "numpy", "pydantic-settings>=2.5,<3", "pydantic>=2.7.0,<3", - "requests", "ruyaml", "tqdm", "typing-extensions", @@ -51,13 +49,16 @@ ), "tensorflow": ["tensorflow", "keras>=2.15,<4"], "onnx": ["onnxruntime"], - "tests": (test_deps := ["pytest", "pytest-cov"]), # minimal test requirements + "tests": ( # minimal test requirements + test_deps := ["pytest", "pytest-cov", "python-dotenv"] + ), "dev": ( test_deps + pytorch_deps + [ "black", "cellpose", # for model testing + "httpx", "jupyter-black", "jupyter", "matplotlib", @@ -67,7 +68,7 @@ "packaging>=17.0", "pdoc", "pre-commit", - "pyright==1.1.396", + "pyright==1.1.402", "segment-anything", # for model testing "timm", # for model testing # "crick", # currently requires python<=3.9 diff --git a/tests/conftest.py b/tests/conftest.py index 32880b05..dd3bbec0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ from itertools import chain from typing import Dict, List +from dotenv import load_dotenv from loguru import logger from pytest import FixtureRequest, fixture @@ -11,6 +12,7 @@ from bioimageio.spec import __version__ as bioimageio_spec_version enable_determinism() +_ = load_dotenv() try: diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py index fc81034c..b61299aa 100644 --- a/tests/test_bioimageio_collection.py +++ b/tests/test_bioimageio_collection.py @@ -1,11 +1,13 @@ import os -from typing import Any, Collection, Dict, Iterable, Mapping, Tuple +from itertools import chain +from pathlib import Path +from typing import Any, Dict, Iterable, Mapping, Tuple +import httpx import pytest -import requests from pydantic import HttpUrl -from bioimageio.spec import InvalidDescr +from bioimageio.spec import InvalidDescr, settings from bioimageio.spec.common import Sha256 from tests.utils import ParameterSet, expensive_test @@ -13,12 +15,12 @@ def _get_latest_rdf_sources(): - entries: Any = requests.get(BASE_URL + "all_versions.json").json()["entries"] + entries: Any = httpx.get(BASE_URL + "all_versions.json").json()["entries"] ret: Dict[str, Tuple[HttpUrl, Sha256]] = {} for entry in entries: version = entry["versions"][0] ret[f"{entry['concept']}/{version['v']}"] = ( - HttpUrl(version["source"]), # pyright: ignore[reportCallIssue] + HttpUrl(version["source"]), Sha256(version["sha256"]), ) @@ -39,43 +41,138 @@ def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: yield pytest.param(descr_url, sha, key, id=key) -KNOWN_INVALID: Collection[str] = { - "affable-shark/1.1", # onnx weights expect fixed input shape - "affectionate-cow/0.1.0", # custom dependencies - "ambitious-sloth/1.2", # requires inferno - "committed-turkey/1.2", # error deserializing VarianceScaling - "creative-panda/1", # error deserializing Conv2D - "dazzling-spider/0.1.0", # requires careamics - "discreet-rooster/1", # error deserializing VarianceScaling - "discreete-rooster/1", # error deserializing VarianceScaling - "dynamic-t-rex/1", # needs update to 0.5 for scale_linear with axes processing - "easy-going-sauropod/1", # CPU implementation of Conv3D currently only supports the NHWC tensor format. - "efficient-chipmunk/1", # needs plantseg - "emotional-cricket/1.1", # sporadic 403 responses from https://elifesciences.org - "famous-fish/0.1.0", # list index out of range `fl[3]` - "greedy-whale/1", # batch size is actually limited to 1 - "happy-elephant/0.1.0", # list index out of range `fl[3]` - "happy-honeybee/0.1.0", # requires biapy - "heroic-otter/0.1.0", # requires biapy - "humorous-crab/1", # batch size is actually limited to 1 - "humorous-fox/0.1.0", # requires careamics - "humorous-owl/1", # error deserializing GlorotUniform - "idealistic-turtle/0.1.0", # requires biapy - "impartial-shark/1", # error deserializing VarianceScaling - "intelligent-lion/0.1.0", # requires biapy - "joyful-deer/1", # needs update to 0.5 for scale_linear with axes processing - "merry-water-buffalo/0.1.0", # requires biapy - "naked-microbe/1", # unknown layer Convolution2D - "noisy-ox/1", # batch size is actually limited to 1 - "non-judgemental-eagle/1", # error deserializing GlorotUniform - "straightforward-crocodile/1", # needs update to 0.5 for scale_linear with axes processing - "stupendous-sheep/1.1", # requires relativ import of attachment - "stupendous-sheep/1.2", - "venomous-swan/0.1.0", # requires biapy - "wild-rhino/0.1.0", # requires careamics +KNOWN_INVALID: Mapping[str, str] = { + "affectionate-cow/0.1.0": "custom dependencies", + "ambitious-sloth/1.2": "requires inferno", + "appealing-popcorn/1": "missing license", + "appetizing-eggplant/1": "missing license", + "appetizing-peach/1": "missing license", + "authoritative-ballet-shoes/1.13.1": "invalid id", + "biapy/biapy/1": "invalid github user arratemunoz and lmescu", + "bitter-hot-dog/1": "missing license", + "bold-shorts/1.13": "invalid id", + "brisk-scarf/1.16.2": "missing license", + "buttery-apple/1": "missing cite", + "buttery-sandwich/1": "missing license", + "cheerful-cap/1.15.3": "missing license", + "chewy-garlic/1": "missing license", + "classy-googles/1": "missing license", + "committed-turkey/1.2": "error deserializing VarianceScaling", + "convenient-purse/1.14.1": "missing license", + "convenient-t-shirt/1.14.1": "missing license", + "cozy-hiking-boot/1.16.2": "missing license", + "creative-panda/1": "error deserializing Conv2D", + "crunchy-cookie/1": "missing license", + "dazzling-spider/0.1.0": "requires careamics", + "delectable-eggplant/1": "missing license", + "delicious-cheese/1": "missing license", + "determined-hedgehog/1": "wrong output shape?", + "discreet-rooster/1": "error deserializing VarianceScaling", + "discreete-rooster/1": "error deserializing VarianceScaling", + "divine-paella/1": "missing license", + "dl4miceverywhere/DL4MicEverywhere/1": "invalid id", + "dynamic-t-rex/1": "needs update to 0.5 for scale_linear with axes processing", + "easy-going-sauropod/1": ( + "CPU implementation of Conv3D currently only supports the NHWC tensor format." + ), + "efficient-chipmunk/1": "needs plantseg", + "emotional-cricket/1.1": "sporadic 403 responses from https://elifesciences.org", + "exciting-backpack/1.19.1": "missing license", + "exquisite-curry/1": "missing license", + "famous-fish/0.1.0": "list index out of range `fl[3]`", + "fiji/Fiji/1": "invalid id", + "flattering-bikini/1.13.2": "missing license", + "flexible-helmet/1.14.1": "missing license", + "fluffy-popcorn/1": "missing license", + "fluid-glasses/1.17.2": "missing license", + "fruity-sushi/1": "missing license", + "fun-high-heels/1.15.2": "missing license", + "funny-butterfly/1": "Do not specify an axis for scalar gain and offset values.", + "greedy-whale/1": "batch size is actually limited to 1", + "happy-elephant/0.1.0": "list index out of range `fl[3]`", + "happy-honeybee/0.1.0": "requires biapy", + "heroic-otter/0.1.0": "requires biapy", + "hpa/HPA-Classification/1": "invalid id", + "humorous-crab/1": "batch size is actually limited to 1", + "humorous-fox/0.1.0": "requires careamics", + "humorous-owl/1": "error deserializing GlorotUniform", + "icy/icy/1": "invalid github user 'None'", + "idealistic-turtle/0.1.0": "requires biapy", + "imjoy/BioImageIO-Packager/1": "invalid id", + "imjoy/GenericBioEngineApp/1": "invalid documentation suffix", + "imjoy/HPA-Single-Cell/1": "invalid documentation suffix", + "imjoy/ImageJ.JS/1": "invalid documentation suffix", + "imjoy/ImJoy/1": "invalid documentation suffix", + "imjoy/vizarr/1": "invalid documentation suffix", + "impartial-shark/1": "error deserializing VarianceScaling", + "indulgent-sandwich/1": "missing license", + "inspiring-sandal/1.13.3": "missing license", + "intelligent-lion/0.1.0": "requires biapy", + "irresistible-swimsuit/1.14.1": "missing license", + "joyful-deer/1": "needs update to 0.5 for scale_linear with axes processing", + "joyful-top-hat/2.2.1": "missing license", + "juicy-peanut/1": "missing license", + "light-swimsuit/1.13": "missing license", + "limited-edition-crown/1.14.1": "missing license", + "lively-t-shirt/1.13": "missing license", + "lucky-fox/1": ( + "torchscript runtime errro: Given groups=1, weight of size [90, 1, 3, 3], expected input[1, 2, 64, 64] to have 1 channels, but got 2 channels instead" + ), + "luscious-tomato/1": "missing license", + "mellow-broccoli/1": "missing license", + "mellow-takeout/1": "missing cite", + "merry-water-buffalo/0.1.0": "requires biapy", + "mesmerizing-shoe/1.14.1": "missing license", + "naked-microbe/1": "unknown layer Convolution2D", + "nice-peacock/1": "invalid id", + "noisy-ox/1": "batch size is actually limited to 1", + "non-judgemental-eagle/1": "error deserializing GlorotUniform", + "nutty-burrito/1": "missing license", + "nutty-knuckle/1": "missing license", + "opalescent-ribbon/1.15.3": "missing license", + "palatable-curry/1": "missing license", + "polished-t-shirt/1.16.2": "missing license", + "powerful-sandal/1": "missing license", + "regal-ribbon/1.14.1": "missing license", + "resourceful-potato/1": "missing license", + "resplendent-ribbon/2.2.1": "missing license", + "rich-burrito/1": "missing license", + "rich-cheese/1": "missing license", + "savory-cheese/1": "missing license", + "silky-shorts/1.13": "missing license", + "slinky-bikini/1.15.1": "missing license", + "smooth-graduation-hat/1.15.0": "missing license", + "smooth-hat/1.1.0": "invalid id", + "smooth-safety-vest/1.14.1": "missing license, invalid id", + "smooth-scarf/1": "invalid id", + "sparkling-sari/1.0.0": "missing license, invalid id", + "straightforward-crocodile/1": ( + "needs update to 0.5 for scale_linear with axes processing" + ), + "striking-necktie/1.14.1": "invalid id", + "stupendous-sheep/1.1": "requires relativ import of attachment", + "tempting-pizza/1": "missing license", + "timeless-running-shirt/1.13.2": "invalid id, missing license", + "uplifting-backpack/1.14.1": "invalid id, missing license", + "venomous-swan/0.1.0": "requires biapy", + "whimsical-helmet/2.1.2": "invalid id", + "wild-rhino/0.1.0": "requires careamics", + "zero/notebook_preview/1": "missing authors", } +def get_directory_size(path: Path): + total_size = 0 + for dirpath, _, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + total_size += os.path.getsize(fp) + + return total_size + + @pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) def test_rdf_format_to_populate_cache( descr_url: HttpUrl, @@ -84,10 +181,13 @@ def test_rdf_format_to_populate_cache( ): """this test is redundant if `test_rdf` runs, but is used in the CI to populate the cache""" if os.environ.get("BIOIMAGEIO_POPULATE_CACHE") != "1": - pytest.skip("only runs in CI to populate cache") + pytest.skip("BIOIMAGEIO_POPULATE_CACHE != 1") if key in KNOWN_INVALID: - pytest.skip("known failure") + pytest.skip(KNOWN_INVALID[key]) + + if (cache_size := get_directory_size(settings.cache_path)) > 8e9: + pytest.skip(f"reached 8GB cache size limit ({cache_size / 1e9:.2f} GB)") from bioimageio.core import load_description @@ -101,14 +201,58 @@ def test_rdf( sha: Sha256, key: str, ): + from bioimageio.spec import get_conda_env + from bioimageio.spec.model import ModelDescr + if key in KNOWN_INVALID: - pytest.skip("known failure") + pytest.skip(KNOWN_INVALID[key]) + + from bioimageio.core import load_description, load_description_and_test + + descr = load_description( + descr_url, sha256=sha, format_version="latest", perform_io_checks=True + ) + assert not isinstance(descr, InvalidDescr), descr.validation_summary.display() or [ + e.msg for e in descr.validation_summary.errors + ] + + if ( + isinstance(descr, ModelDescr) + and descr.weights.pytorch_state_dict is not None + and descr.weights.pytorch_state_dict.dependencies is not None + ): + conda_env = get_conda_env(entry=descr.weights.pytorch_state_dict) + + def depends_on(dep: str) -> bool: + return any( + chain( + (d.startswith(dep) for d in conda_env.get_pip_deps()), + (cd for cd in conda_env.dependencies if isinstance(cd, str)), + ) + ) - from bioimageio.core import load_description_and_test + for skip_if_depends_on in ( + "biapy", + "git+https://github.com/CAREamics/careamics.git", + "careamics", + "inferno", + "plantseg", + ): + if depends_on(skip_if_depends_on): + pytest.skip(f"requires {skip_if_depends_on}") - descr = load_description_and_test(descr_url, sha256=sha, stop_early=True) + descr = load_description_and_test( + descr, + format_version="latest", + sha256=sha, + stop_early=True, + ) - assert not isinstance(descr, InvalidDescr), descr.validation_summary.display() + assert not isinstance(descr, InvalidDescr), descr.validation_summary.display() or [ + e.msg for e in descr.validation_summary.errors + ] assert ( descr.validation_summary.status == "passed" - ), descr.validation_summary.display() + ), descr.validation_summary.display() or [ + e.msg for e in descr.validation_summary.errors + ]