-
Notifications
You must be signed in to change notification settings - Fork 11
feature/plugin-priority #162
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
e4edd7f
24a55c7
1513fed
df9a7bd
5b60873
970cf58
3e51edc
b441667
ef25ca8
decaeaa
20cb352
c817121
3e8bbb1
2080d3b
2c0e178
847a9a8
aba59f3
3d3f727
b333927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| import datetime | ||
| import logging | ||
| from pathlib import Path | ||
| from typing import Any, Dict, List, Optional, Tuple, Type, Union, get_args | ||
| from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union, get_args | ||
|
|
||
| import bioio_base as biob | ||
| import dask.array as da | ||
|
|
@@ -15,7 +15,13 @@ | |
| from ome_types import OME | ||
|
|
||
| from .ome_utils import generate_ome_channel_id | ||
| from .plugins import PluginEntry, check_type, get_array_like_plugin, get_plugins | ||
| from .plugins import ( | ||
| PluginEntry, | ||
| check_type, | ||
| get_array_like_plugin, | ||
| get_plugins, | ||
| order_plugins_by_priority, | ||
| ) | ||
|
|
||
| ############################################################################### | ||
|
|
||
|
|
@@ -55,6 +61,15 @@ class BioImage(biob.image_container.ImageContainer): | |
| checking for installed plugins on each `BioImage` instance init. | ||
| If True, will use the cache of installed plugins discovered last `BioImage` | ||
| init. | ||
| plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] | ||
| An optional ordered list of Reader classes that defines the priority used | ||
| when multiple plugins declare support for the same file type. | ||
|
|
||
| When provided, the ordering in this list overrides BioIO’s default plugin | ||
| ordering for *that specific BioImage instance*. Only readers that actually | ||
| exist in the installed plugin registry may be included. Any readers not | ||
| present in the list will retain their original relative ordering after all | ||
| explicitly-prioritized readers. | ||
| fs_kwargs: Dict[str, Any] | ||
| Any specific keyword arguments to pass down to the fsspec created filesystem. | ||
| Default: {} | ||
|
|
@@ -101,6 +116,17 @@ class BioImage(biob.image_container.ImageContainer): | |
|
|
||
| >>> img = BioImage("malformed_metadata.ome.tiff", reader=readers.TiffReader) | ||
|
|
||
| Initialize an image and override plugin selection order when multiple readers | ||
| support the same extension. | ||
|
|
||
| >>> from bioio_tifffile import Reader as TiffReader | ||
| >>> from bioio_ome_tiff import Reader as OmeTiffReader | ||
| >>> img = BioImage( | ||
| ... "multi_plugin_file.ome.tiff", | ||
| ... plugin_priority=[TiffReader, OmeTiffReader], | ||
| ... ) | ||
| ... # TiffReader will be tried before OmeTiffReader for this instance. | ||
|
|
||
| Data for a mosaic file is returned pre-stitched (if the base reader supports it). | ||
|
|
||
| >>> img = BioImage("big_mosaic.czi") | ||
|
|
@@ -132,6 +158,7 @@ def determine_plugin( | |
| image: biob.types.ImageLike, | ||
| fs_kwargs: Dict[str, Any] = {}, | ||
| use_plugin_cache: bool = False, | ||
| plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None, | ||
| **kwargs: Any, | ||
| ) -> PluginEntry: | ||
| """ | ||
|
|
@@ -151,6 +178,11 @@ def determine_plugin( | |
| Additional keyword arguments to be passed to the file system handler. | ||
| use_plugin_cache : bool, optional | ||
| Whether to use a cached version of the plugin mapping, by default False. | ||
| plugin_priority : Optional[Sequence[Type[biob.reader.Reader]]], optional | ||
| Optional ordered list of Reader classes defining the order in which | ||
| matching plugins should be tried. This overrides the default ordering | ||
| when multiple plugins support the same extension. Only Reader classes | ||
| from installed plugins may be included. | ||
| **kwargs : Any | ||
| Additional keyword arguments for plugin-specific configurations. | ||
|
|
||
|
|
@@ -172,9 +204,11 @@ def determine_plugin( | |
| optionally using a cached version. | ||
| 2. If the `image` is a file path (str or Path), it checks for a matching | ||
| plugin based on the file extension. | ||
| 3. If the `image` is an array-like object, it attempts to use the | ||
| 4. If multiple plugins support the same extension, the optional | ||
| ``plugin_priority`` list determines the order in which they are tried. | ||
| 5. If the `image` is an array-like object, it attempts to use the | ||
| built-in `ArrayLikeReader`. | ||
| 4. If no suitable plugin is found, raises an `UnsupportedFileFormatError`. | ||
| 6. If no suitable plugin is found, raises an `UnsupportedFileFormatError`. | ||
|
|
||
| Examples | ||
| -------- | ||
|
|
@@ -198,6 +232,7 @@ def determine_plugin( | |
| extension against the known plugins. | ||
| - For each matching plugin, it tries to instantiate a reader and checks | ||
| if it supports the image. | ||
| - If multiple plugins match, optional ``plugin_priority`` takes precedence. | ||
|
||
| - If the image is array-like, it uses a built-in reader designed for | ||
| such objects. | ||
| - Detailed logging is provided for troubleshooting purposes. | ||
|
|
@@ -219,7 +254,12 @@ def determine_plugin( | |
| # Check for extension in plugins_by_ext | ||
| for format_ext, plugins in plugins_by_ext.items(): | ||
| if BioImage._path_has_extension(path, format_ext): | ||
| for plugin in plugins: | ||
| # Reorder plugins for this extension according to user priority | ||
| ordered_plugins = order_plugins_by_priority( | ||
| plugins, plugin_priority | ||
| ) | ||
|
|
||
| for plugin in ordered_plugins: | ||
| ReaderClass = plugin.metadata.get_reader() | ||
| try: | ||
| if ReaderClass.is_supported_image( | ||
|
|
@@ -284,6 +324,7 @@ def _get_reader( | |
| reader: biob.reader.Reader, | ||
| use_plugin_cache: bool, | ||
| fs_kwargs: Dict[str, Any], | ||
| plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None, | ||
| **kwargs: Any, | ||
| ) -> Tuple[biob.reader.Reader, Optional[PluginEntry]]: | ||
| """ | ||
|
|
@@ -302,7 +343,11 @@ def _get_reader( | |
|
|
||
| # Determine reader class based on available plugins | ||
| plugin = BioImage.determine_plugin( | ||
| image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs | ||
| image, | ||
| fs_kwargs=fs_kwargs, | ||
| use_plugin_cache=use_plugin_cache, | ||
| plugin_priority=plugin_priority, | ||
| **kwargs, | ||
| ) | ||
| ReaderClass = plugin.metadata.get_reader() | ||
| return ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs), plugin | ||
|
|
@@ -314,6 +359,7 @@ def __init__( | |
| reconstruct_mosaic: bool = True, | ||
| use_plugin_cache: bool = False, | ||
| fs_kwargs: Dict[str, Any] = {}, | ||
| plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None, | ||
| **kwargs: Any, | ||
| ): | ||
| try: | ||
|
|
@@ -322,6 +368,7 @@ def __init__( | |
| reader, | ||
| use_plugin_cache, | ||
| fs_kwargs, | ||
| plugin_priority=plugin_priority, | ||
| **kwargs, | ||
| ) | ||
| except biob.exceptions.UnsupportedFileFormatError: | ||
|
|
@@ -336,6 +383,7 @@ def __init__( | |
| reader, | ||
| use_plugin_cache, | ||
| fs_kwargs | {"anon": True}, | ||
| plugin_priority=plugin_priority, | ||
| **kwargs, | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ | |
| from dataclasses import dataclass | ||
| from datetime import datetime | ||
| from pathlib import Path | ||
| from typing import Any, get_args | ||
| from typing import Any, Sequence, Type, get_args | ||
|
|
||
| import semver | ||
|
|
||
|
|
@@ -117,6 +117,43 @@ def insert_sorted_by_timestamp(list: List[PluginEntry], item: PluginEntry) -> No | |
| list.append(item) | ||
|
|
||
|
|
||
| def order_plugins_by_priority( | ||
| plugins: List[PluginEntry], | ||
| plugin_priority: Optional[Sequence[Type[Reader]]] = None, | ||
| ) -> List[PluginEntry]: | ||
| """ | ||
| Reorder a list of PluginEntry objects according to a user-provided | ||
| list of Reader classes. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| plugins : List[PluginEntry] | ||
| The candidate plugins for a given extension. | ||
| plugin_priority : Sequence[Type[Reader]], optional | ||
| Reader classes that should be preferred first, e.g. | ||
|
|
||
| from bioio_czi import Reader as CziReader | ||
| from bioio_tifffile import Reader as TiffReader | ||
|
|
||
| plugin_priority = [CziReader, TiffReader] | ||
|
|
||
| Returns | ||
| ------- | ||
| List[PluginEntry] | ||
| Prioritized plugin list. | ||
| """ | ||
| if not plugin_priority: | ||
| return plugins | ||
|
||
|
|
||
| priority_index = {cls: i for i, cls in enumerate(plugin_priority)} | ||
|
|
||
| # From the docs: "The built-in [sorted()](https://docs.python.org/3/library/functions.html#sorted) function is guaranteed to be stable. A sort is stable if it guarantees not to change the relative order of elements that compare equal." | ||
| return sorted( | ||
BrianWhitneyAI marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| plugins, | ||
| key=lambda p: priority_index.get(p.metadata.get_reader(), len(priority_index)), | ||
| ) | ||
|
|
||
|
|
||
| def get_dependency_version_range_for_distribution( | ||
| distribution_name: str, dependency_name: str | ||
| ) -> Tuple[Optional[str], Optional[str]]: | ||
|
|
@@ -344,18 +381,49 @@ def plugin_feasibility_report( | |
| """ | ||
| Generate a feasibility report for each plugin, | ||
| determining if it can handle the specified image. | ||
|
|
||
| For debugging purposes, this function checks every installed plugin’s | ||
| `is_supported_image(...)` implementation — even if the plugin would | ||
| *not* normally be selected by BioImage’s extension-based routing. | ||
|
|
||
| A warning is logged if a plugin *can* read the file but the file’s | ||
| extension is NOT listed in that plugin’s advertised supported extensions. | ||
| In such cases, BioImage will NOT auto-select that plugin for the file, | ||
| but the user may explicitly choose it via the `reader=` parameter. | ||
| """ | ||
| plugins_by_ext = get_plugins(use_cache=use_plugin_cache) | ||
| feasibility_report = {} | ||
| feasibility_report: Dict[str, PluginSupport] = {} | ||
|
|
||
| ext = None | ||
| if isinstance(image, (str, Path)): | ||
| # Strip any query parameters, then extract suffix | ||
| clean_path = str(image).split("?")[0] | ||
| ext = Path(clean_path).suffix.lower() | ||
|
|
||
| # Check each plugin for support | ||
| for plugins in plugins_by_ext.values(): | ||
| for plugin in plugins: | ||
| plugin_name = plugin.entrypoint.name | ||
| feasibility_report[plugin_name] = _check_plugin_support( | ||
| plugin, image, fs_kwargs | ||
| ) | ||
| support = _check_plugin_support(plugin, image, fs_kwargs) | ||
| feasibility_report[plugin_name] = support | ||
|
|
||
| if support.supported and ext is not None: | ||
| advertised_exts = [ | ||
| e.lower() if e.startswith(".") else f".{e.lower()}" | ||
| for e in plugin.metadata.get_supported_extensions() | ||
| ] | ||
|
|
||
| if ext not in advertised_exts: | ||
| ReaderClass = plugin.metadata.get_reader() | ||
|
|
||
| log.warning( | ||
| f"Plugin '{plugin_name}' CAN read the file '{image}', " | ||
| f"but the file extension '{ext}' is NOT listed in its " | ||
| f"get_supported_extensions(): {advertised_exts}. BioImage " | ||
| f"will NOT auto-select this reader based on extension.\n" | ||
| f"To use this reader manually, instantiate BioImage with:\n" | ||
| f" BioImage('{image}', reader={ReaderClass.__name__})" | ||
| ) | ||
| # Additional check for ArrayLike support | ||
| try: | ||
| supported = ArrayLikeReader.is_supported_image(image=image, fs_kwargs=fs_kwargs) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,12 +3,17 @@ | |
|
|
||
| import sys | ||
| from io import StringIO | ||
| from typing import Any, Dict, List | ||
|
|
||
| import numpy as np | ||
| import pytest | ||
| from _pytest.monkeypatch import MonkeyPatch | ||
|
|
||
| import bioio | ||
| import bioio.bio_image as bio_image | ||
| from bioio import BioImage | ||
|
|
||
| from ..plugins import dump_plugins | ||
| from ..plugins import dump_plugins, get_plugins | ||
|
|
||
|
|
||
| def test_dump_plugins(dummy_plugin: str) -> None: | ||
|
|
@@ -39,3 +44,100 @@ def test_plugin_feasibility_report(dummy_plugin: str) -> None: | |
| assert actual_output["ArrayLike"].error is None | ||
| assert actual_output[dummy_plugin].supported is False | ||
| assert expected_error_msg in (actual_output[dummy_plugin].error or "") | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "priority_order, expected_first", | ||
| [ | ||
| # Case 1: AcceptingReader has higher priority than DummyReader | ||
| (["accepting", "dummy"], "accepting"), | ||
| # Case 2: DummyReader has higher priority than AcceptingReader | ||
| (["dummy", "accepting"], "dummy"), | ||
| ], | ||
| ) | ||
| def test_bioimage_plugin_priority_modulates_reader( | ||
| monkeypatch: MonkeyPatch, | ||
| dummy_plugin: str, | ||
| accepting_plugin: str, | ||
| priority_order: List[str], | ||
| expected_first: str, | ||
| ) -> None: | ||
| from accepting_plugin import Reader as AcceptingReader | ||
| from dummy_plugin import Reader as DummyReader | ||
|
|
||
| # Arrange | ||
| plugins_by_ext = get_plugins(use_cache=False) | ||
|
|
||
| dummy_entry = None | ||
| accepting_entry = None | ||
| for plugin_list in plugins_by_ext.values(): | ||
| for plugin in plugin_list: | ||
| reader_cls = plugin.metadata.get_reader() | ||
| if reader_cls is DummyReader: | ||
| dummy_entry = plugin | ||
| elif reader_cls is AcceptingReader: | ||
| accepting_entry = plugin | ||
|
|
||
| # Build a controlled plugin mapping where both readers claim the same extension | ||
| # and where the base order is [Dummy, Accepting]. This lets us see exactly how | ||
| # plugin_priority changes which reader BioImage selects. | ||
| test_ext = ".czi" | ||
| fake_plugins_by_ext = {test_ext: [dummy_entry, accepting_entry]} | ||
|
|
||
| # Patch the module-level get_plugins used inside BioImage.determine_plugin so | ||
| # it sees only our controlled mapping. | ||
| monkeypatch.setattr( | ||
| bio_image, | ||
| "get_plugins", | ||
| lambda use_cache: fake_plugins_by_ext, | ||
| ) | ||
|
||
|
|
||
| # Patch is_supported_image on BOTH readers so they do not hit the base-class | ||
| # path handling (which would enforce file existence and raise FileNotFoundError). | ||
| # We want both readers to *claim* they support this test path. | ||
| def always_supported( | ||
| cls: type[Any], | ||
| image: Any, | ||
| fs_kwargs: Dict[str, Any] | None = None, | ||
| **kwargs: Any, | ||
| ) -> bool: | ||
| return True | ||
|
|
||
| monkeypatch.setattr( | ||
| DummyReader, | ||
| "is_supported_image", | ||
| classmethod(always_supported), | ||
| ) | ||
| monkeypatch.setattr( | ||
| AcceptingReader, | ||
| "is_supported_image", | ||
| classmethod(always_supported), | ||
| ) | ||
|
|
||
| # Patch DummyReader.__init__ so using it doesn't raise NotImplementedError. | ||
| def dummy_init( | ||
| self: Any, | ||
| image: Any, | ||
| fs_kwargs: Dict[str, Any] | None = None, | ||
| **kwargs: Any, | ||
| ) -> None: | ||
| self._fs = None | ||
| self._path = str(image) | ||
|
|
||
| monkeypatch.setattr(DummyReader, "__init__", dummy_init) | ||
|
|
||
| # Map the parameterized string names to the actual Reader classes. | ||
| name_to_cls: Dict[str, type[Any]] = { | ||
| "dummy": DummyReader, | ||
| "accepting": AcceptingReader, | ||
| } | ||
|
|
||
| plugin_priority = [name_to_cls[name] for name in priority_order] | ||
| expected_cls = name_to_cls[expected_first] | ||
| test_path = f"test{test_ext}" | ||
|
|
||
| # Act | ||
| img = BioImage(test_path, plugin_priority=plugin_priority) | ||
|
|
||
| # Assert | ||
| assert isinstance(img.reader, expected_cls) | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just a thought, how difficult would it be to accept the names of the entrypoints here, rather than the
Reader? That way the sequence could be constructed without having to do imports. It's just, simpler perhaps? I suppose the same would be nice if I could pass the entry point name to the reader parameter as well, maybe I should make a new issue? Using the entrypoint name would be much easier for colleagues that are less familiar with finding python objectsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would be hesitant to allow a string parameter here. Importing the Readers directly allows us to tighten our typing and doesn't require a users knowledge of entrypoint logic. Im also not sure we want to point users towards entrypoint logic from a documentation standpoint as we have pushed so hard for the plugin reader structure.