Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions bioio/bio_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import datetime
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union, get_args
from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union, get_args

import bioio_base as biob
import dask.array as da
Expand All @@ -15,7 +15,13 @@
from ome_types import OME

from .ome_utils import generate_ome_channel_id
from .plugins import PluginEntry, check_type, get_array_like_plugin, get_plugins
from .plugins import (
PluginEntry,
check_type,
get_array_like_plugin,
get_plugins,
order_plugins_by_priority,
)

###############################################################################

Expand Down Expand Up @@ -55,6 +61,15 @@ class BioImage(biob.image_container.ImageContainer):
checking for installed plugins on each `BioImage` instance init.
If True, will use the cache of installed plugins discovered last `BioImage`
init.
plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]]
An optional ordered list of Reader classes that defines the priority used
when multiple plugins declare support for the same file type.

When provided, the ordering in this list overrides BioIO’s default plugin
ordering for *that specific BioImage instance*. Only readers that actually
exist in the installed plugin registry may be included. Any readers not
present in the list will retain their original relative ordering after all
explicitly-prioritized readers.
fs_kwargs: Dict[str, Any]
Any specific keyword arguments to pass down to the fsspec created filesystem.
Default: {}
Expand Down Expand Up @@ -101,6 +116,17 @@ class BioImage(biob.image_container.ImageContainer):

>>> img = BioImage("malformed_metadata.ome.tiff", reader=readers.TiffReader)

Initialize an image and override plugin selection order when multiple readers
support the same extension.

>>> from bioio_tifffile import Reader as TiffReader
>>> from bioio_ome_tiff import Reader as OmeTiffReader
>>> img = BioImage(
... "multi_plugin_file.ome.tiff",
... plugin_priority=[TiffReader, OmeTiffReader],
... )
... # TiffReader will be tried before OmeTiffReader for this instance.

Data for a mosaic file is returned pre-stitched (if the base reader supports it).

>>> img = BioImage("big_mosaic.czi")
Expand Down Expand Up @@ -132,6 +158,7 @@ def determine_plugin(
image: biob.types.ImageLike,
fs_kwargs: Dict[str, Any] = {},
use_plugin_cache: bool = False,
plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None,
Copy link
Contributor

@TimMonko TimMonko Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a thought, how difficult would it be to accept the names of the entrypoints here, rather than the Reader? That way the sequence could be constructed without having to do imports. It's just, simpler perhaps? I suppose the same would be nice if I could pass the entry point name to the reader parameter as well, maybe I should make a new issue? Using the entrypoint name would be much easier for colleagues that are less familiar with finding python objects

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be hesitant to allow a string parameter here. Importing the Readers directly allows us to tighten our typing and doesn't require a users knowledge of entrypoint logic. Im also not sure we want to point users towards entrypoint logic from a documentation standpoint as we have pushed so hard for the plugin reader structure.

**kwargs: Any,
) -> PluginEntry:
"""
Expand All @@ -151,6 +178,11 @@ def determine_plugin(
Additional keyword arguments to be passed to the file system handler.
use_plugin_cache : bool, optional
Whether to use a cached version of the plugin mapping, by default False.
plugin_priority : Optional[Sequence[Type[biob.reader.Reader]]], optional
Optional ordered list of Reader classes defining the order in which
matching plugins should be tried. This overrides the default ordering
when multiple plugins support the same extension. Only Reader classes
from installed plugins may be included.
**kwargs : Any
Additional keyword arguments for plugin-specific configurations.

Expand All @@ -172,9 +204,11 @@ def determine_plugin(
optionally using a cached version.
2. If the `image` is a file path (str or Path), it checks for a matching
plugin based on the file extension.
3. If the `image` is an array-like object, it attempts to use the
4. If multiple plugins support the same extension, the optional
``plugin_priority`` list determines the order in which they are tried.
5. If the `image` is an array-like object, it attempts to use the
built-in `ArrayLikeReader`.
4. If no suitable plugin is found, raises an `UnsupportedFileFormatError`.
6. If no suitable plugin is found, raises an `UnsupportedFileFormatError`.

Examples
--------
Expand All @@ -198,6 +232,7 @@ def determine_plugin(
extension against the known plugins.
- For each matching plugin, it tries to instantiate a reader and checks
if it supports the image.
- If multiple plugins match, optional ``plugin_priority`` takes precedence.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an explicit definition of what happens with multiple matches, but plugin_priority is not set? I think we want that, even if the defined order is the first plug alphabetically.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of an order but alphabetical has some specific issues. one being that we would be reccomending bioio-bioformats as our primary reader, which is not what we want to do. A thought that came out of a conversation between kevin and I was that we sort default plugin priority by the length of the extension list. This would move more specific readers to the top and more general readers to the bottom. Then if a user wants something specific they define a priority list or a single reader. This could have some pitfalls one I can think of is that ome-tiff has length 4 and tifffile has length 3. @toloudis what are your thoughts about a default order?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about this priority order?

  • Check extensions in order of length (e.g., if plugins support .c, .b.c, and .a.b.c, then ingesting file.a.b.c will first look for plugins supporting .a.b.c, then .b.c)
    • Among plugins that support the same extension, choose the one that supports the fewest extensions total.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im working on an implementation now for bioio-tiff-glob, bioio-ome-tiff, bioio-tifffile,bioio-bioformats our priority list would be

Extension: .tiff
----------------
[0] entrypoint: bioio-tiff-glob
     Reader: bioio_tiff_glob.reader.Reader
     Supported extensions: ['.tif', '.tiff']

[1] entrypoint: bioio-ome-tiff
     Reader: bioio_ome_tiff.reader.Reader
     Supported extensions: ['.ome.tiff', '.tiff', 'ome.tif', '.tif']

[2] entrypoint: bioio-tifffile
     Reader: bioio_tifffile.reader.Reader
     Supported extensions: ['tif', 'tiff', 'lsm']

[3] entrypoint: bioio-bioformats
     Reader: bioio_bioformats.reader.Reader
     Supported extensions: ['.1sc', '.2fl', '.acff', '.afi', '.afm', '.aim', '.al3d', '.am', '.amiramesh', '.apl', '.arf', '.avi', '.bin', '.bip', '.bmp', '.btf', '.c01', '.cfg', '.ch5', '.cif', '.cr2', '.crw', '.csv', '.cxd', '.czi', '.dat', '.dcm', '.dib', '.dm2', '.dm3', '.dm4', '.dti', '.dv', '.eps ', '.exp', '.fdf', '.fff', '.ffr', '.fits', '.flex', '.fli', '.frm', '.gel', '.gif', '.grey', '.hdf', '.hdr', '.hed', '.his', '.htd', '.html', '.hx', '.i2i', '.ics', '.ids', '.im3', '.img', '.ims', '.inr', '.ipl', '.ipm', '.ipw', '.jp2', '.jpg', '.jpk', '.jpx', '.l2d', '.labels', '.lei', '.lif', '.liff', '.lim', '.lms', '.lsm', '.mdb', '.mnc', '.mng', '.mod', '.mov', '.mrc', '.mrw', '.msr', '.mtb', '.mvd2', '.naf', '.nd', '.nd2', '.ndpi', '.ndpis', '.nef', '.nhdr', '.nii', '.nii.gz', '.nrrd', '.obf', '.obsep', '.oib', '.oif', '.oir', '.ome', '.ome.btf', '.ome.tf2', '.ome.tf8', '.ome.tif', '.ome.tiff', '.ome.xml', '.par', '.pbm', '.pcoraw', '.pcx', '.pds', '.pgm', '.pic', '.pict', '.png', '.pnl', '.ppm', '.pr3', '.psd', '.qptiff', '.r3d', '.raw', '.rcpnl', '.rec', '.scn', '.sdt', '.seq', '.sif', '.sld', '.sm2', '.sm3', '.spc', '.spe', '.spi', '.stk', '.stp', '.svs', '.sxm', '.tf2', '.tf8', '.tfr', '.tga', '.tif', '.tiff', '.tnb', '.top', '.txt', '.v', '.vms', '.vsi', '.vws', '.wat', '.wlz', '.xdce', '.xml', '.xqd', '.xqf', '.xv', '.xys', '.zfp', '.zfr', '.zvi']

This would essentially say the more specific a plugin you install the higher in the priority list ill be. ome-tiff-glob is extremely specific and has 2 families of support .tiff and .tif. bioio-ome-tifff also has 2 families but a larger extension list bioio-tifffile has 3 families and bioio-bioformats has a ton.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So is the intention to have a different priority list for every extension? That seems like a lot of effort for little reward (given the number of relevant extensions)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No we determine priority from ext list for each plugin based on (at bioio level):

  • Fewer extension families first — Plugins that support fewer groups of suffix-related extensions (e.g., .ome.tiff + .tiff is one family) are considered more specific and are ranked higher.
  • If tied, fewer raw extensions first — A plugin advertising fewer total extensions in its get_supported_extensions() list is treated as more focused and ranked higher.
  • If still tied, alphabetical by plugin name — A stable, deterministic tie-breaker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the above description, the index of the plugin is its priority (0 being highest)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The priority approach of @pgarrison resonates with how I like to think about the various plugins, especially where I want the (presumed) most specific plugin to be prioritized first, and seeing how @BrianWhitneyAI laid it out makes me more certain that its both logical and easily understood with minimal documentation (which, right now is actually quite hard). I've followed y'alls discussions for a while, and while I'm unhappy about the current prioritization (i.e. time-of-install) nothing else has stood out as a compelling alternative until this.

I think this proposed prioritization logic would cover all edge cases that I keep running in to, but it is nice that there still is this proposed API for a priority override. It will make troubleshooting a set of reader behaviors more quickly than just trying to pass the correct reader function.

- If the image is array-like, it uses a built-in reader designed for
such objects.
- Detailed logging is provided for troubleshooting purposes.
Expand All @@ -219,7 +254,12 @@ def determine_plugin(
# Check for extension in plugins_by_ext
for format_ext, plugins in plugins_by_ext.items():
if BioImage._path_has_extension(path, format_ext):
for plugin in plugins:
# Reorder plugins for this extension according to user priority
ordered_plugins = order_plugins_by_priority(
plugins, plugin_priority
)

for plugin in ordered_plugins:
ReaderClass = plugin.metadata.get_reader()
try:
if ReaderClass.is_supported_image(
Expand Down Expand Up @@ -284,6 +324,7 @@ def _get_reader(
reader: biob.reader.Reader,
use_plugin_cache: bool,
fs_kwargs: Dict[str, Any],
plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None,
**kwargs: Any,
) -> Tuple[biob.reader.Reader, Optional[PluginEntry]]:
"""
Expand All @@ -302,7 +343,11 @@ def _get_reader(

# Determine reader class based on available plugins
plugin = BioImage.determine_plugin(
image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs
image,
fs_kwargs=fs_kwargs,
use_plugin_cache=use_plugin_cache,
plugin_priority=plugin_priority,
**kwargs,
)
ReaderClass = plugin.metadata.get_reader()
return ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs), plugin
Expand All @@ -314,6 +359,7 @@ def __init__(
reconstruct_mosaic: bool = True,
use_plugin_cache: bool = False,
fs_kwargs: Dict[str, Any] = {},
plugin_priority: Optional[Sequence[Type[biob.reader.Reader]]] = None,
**kwargs: Any,
):
try:
Expand All @@ -322,6 +368,7 @@ def __init__(
reader,
use_plugin_cache,
fs_kwargs,
plugin_priority=plugin_priority,
**kwargs,
)
except biob.exceptions.UnsupportedFileFormatError:
Expand All @@ -336,6 +383,7 @@ def __init__(
reader,
use_plugin_cache,
fs_kwargs | {"anon": True},
plugin_priority=plugin_priority,
**kwargs,
)

Expand Down
78 changes: 73 additions & 5 deletions bioio/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, get_args
from typing import Any, Sequence, Type, get_args

import semver

Expand Down Expand Up @@ -117,6 +117,43 @@ def insert_sorted_by_timestamp(list: List[PluginEntry], item: PluginEntry) -> No
list.append(item)


def order_plugins_by_priority(
plugins: List[PluginEntry],
plugin_priority: Optional[Sequence[Type[Reader]]] = None,
) -> List[PluginEntry]:
"""
Reorder a list of PluginEntry objects according to a user-provided
list of Reader classes.

Parameters
----------
plugins : List[PluginEntry]
The candidate plugins for a given extension.
plugin_priority : Sequence[Type[Reader]], optional
Reader classes that should be preferred first, e.g.

from bioio_czi import Reader as CziReader
from bioio_tifffile import Reader as TiffReader

plugin_priority = [CziReader, TiffReader]

Returns
-------
List[PluginEntry]
Prioritized plugin list.
"""
if not plugin_priority:
return plugins
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we do a default sort here? just to make it deterministic, and not based on whatever drives it now?


priority_index = {cls: i for i, cls in enumerate(plugin_priority)}

# From the docs: "The built-in [sorted()](https://docs.python.org/3/library/functions.html#sorted) function is guaranteed to be stable. A sort is stable if it guarantees not to change the relative order of elements that compare equal."
return sorted(
plugins,
key=lambda p: priority_index.get(p.metadata.get_reader(), len(priority_index)),
)


def get_dependency_version_range_for_distribution(
distribution_name: str, dependency_name: str
) -> Tuple[Optional[str], Optional[str]]:
Expand Down Expand Up @@ -344,18 +381,49 @@ def plugin_feasibility_report(
"""
Generate a feasibility report for each plugin,
determining if it can handle the specified image.

For debugging purposes, this function checks every installed plugin’s
`is_supported_image(...)` implementation — even if the plugin would
*not* normally be selected by BioImage’s extension-based routing.

A warning is logged if a plugin *can* read the file but the file’s
extension is NOT listed in that plugin’s advertised supported extensions.
In such cases, BioImage will NOT auto-select that plugin for the file,
but the user may explicitly choose it via the `reader=` parameter.
"""
plugins_by_ext = get_plugins(use_cache=use_plugin_cache)
feasibility_report = {}
feasibility_report: Dict[str, PluginSupport] = {}

ext = None
if isinstance(image, (str, Path)):
# Strip any query parameters, then extract suffix
clean_path = str(image).split("?")[0]
ext = Path(clean_path).suffix.lower()

# Check each plugin for support
for plugins in plugins_by_ext.values():
for plugin in plugins:
plugin_name = plugin.entrypoint.name
feasibility_report[plugin_name] = _check_plugin_support(
plugin, image, fs_kwargs
)
support = _check_plugin_support(plugin, image, fs_kwargs)
feasibility_report[plugin_name] = support

if support.supported and ext is not None:
advertised_exts = [
e.lower() if e.startswith(".") else f".{e.lower()}"
for e in plugin.metadata.get_supported_extensions()
]

if ext not in advertised_exts:
ReaderClass = plugin.metadata.get_reader()

log.warning(
f"Plugin '{plugin_name}' CAN read the file '{image}', "
f"but the file extension '{ext}' is NOT listed in its "
f"get_supported_extensions(): {advertised_exts}. BioImage "
f"will NOT auto-select this reader based on extension.\n"
f"To use this reader manually, instantiate BioImage with:\n"
f" BioImage('{image}', reader={ReaderClass.__name__})"
)
# Additional check for ArrayLike support
try:
supported = ArrayLikeReader.is_supported_image(image=image, fs_kwargs=fs_kwargs)
Expand Down
104 changes: 103 additions & 1 deletion bioio/tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@

import sys
from io import StringIO
from typing import Any, Dict, List

import numpy as np
import pytest
from _pytest.monkeypatch import MonkeyPatch

import bioio
import bioio.bio_image as bio_image
from bioio import BioImage

from ..plugins import dump_plugins
from ..plugins import dump_plugins, get_plugins


def test_dump_plugins(dummy_plugin: str) -> None:
Expand Down Expand Up @@ -39,3 +44,100 @@ def test_plugin_feasibility_report(dummy_plugin: str) -> None:
assert actual_output["ArrayLike"].error is None
assert actual_output[dummy_plugin].supported is False
assert expected_error_msg in (actual_output[dummy_plugin].error or "")


@pytest.mark.parametrize(
"priority_order, expected_first",
[
# Case 1: AcceptingReader has higher priority than DummyReader
(["accepting", "dummy"], "accepting"),
# Case 2: DummyReader has higher priority than AcceptingReader
(["dummy", "accepting"], "dummy"),
],
)
def test_bioimage_plugin_priority_modulates_reader(
monkeypatch: MonkeyPatch,
dummy_plugin: str,
accepting_plugin: str,
priority_order: List[str],
expected_first: str,
) -> None:
from accepting_plugin import Reader as AcceptingReader
from dummy_plugin import Reader as DummyReader

# Arrange
plugins_by_ext = get_plugins(use_cache=False)

dummy_entry = None
accepting_entry = None
for plugin_list in plugins_by_ext.values():
for plugin in plugin_list:
reader_cls = plugin.metadata.get_reader()
if reader_cls is DummyReader:
dummy_entry = plugin
elif reader_cls is AcceptingReader:
accepting_entry = plugin

# Build a controlled plugin mapping where both readers claim the same extension
# and where the base order is [Dummy, Accepting]. This lets us see exactly how
# plugin_priority changes which reader BioImage selects.
test_ext = ".czi"
fake_plugins_by_ext = {test_ext: [dummy_entry, accepting_entry]}

# Patch the module-level get_plugins used inside BioImage.determine_plugin so
# it sees only our controlled mapping.
monkeypatch.setattr(
bio_image,
"get_plugins",
lambda use_cache: fake_plugins_by_ext,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we define a default plugin order more explicitly, maybe this test can rely on that instead of modifying get_plugins


# Patch is_supported_image on BOTH readers so they do not hit the base-class
# path handling (which would enforce file existence and raise FileNotFoundError).
# We want both readers to *claim* they support this test path.
def always_supported(
cls: type[Any],
image: Any,
fs_kwargs: Dict[str, Any] | None = None,
**kwargs: Any,
) -> bool:
return True

monkeypatch.setattr(
DummyReader,
"is_supported_image",
classmethod(always_supported),
)
monkeypatch.setattr(
AcceptingReader,
"is_supported_image",
classmethod(always_supported),
)

# Patch DummyReader.__init__ so using it doesn't raise NotImplementedError.
def dummy_init(
self: Any,
image: Any,
fs_kwargs: Dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
self._fs = None
self._path = str(image)

monkeypatch.setattr(DummyReader, "__init__", dummy_init)

# Map the parameterized string names to the actual Reader classes.
name_to_cls: Dict[str, type[Any]] = {
"dummy": DummyReader,
"accepting": AcceptingReader,
}

plugin_priority = [name_to_cls[name] for name in priority_order]
expected_cls = name_to_cls[expected_first]
test_path = f"test{test_ext}"

# Act
img = BioImage(test_path, plugin_priority=plugin_priority)

# Assert
assert isinstance(img.reader, expected_cls)
Loading