diff --git a/docs/release/release_v1.7.md b/docs/release/release_v1.7.md index 4e39477e..7f5bef86 100644 --- a/docs/release/release_v1.7.md +++ b/docs/release/release_v1.7.md @@ -4,7 +4,7 @@ ### Highlights -- Better multipage TIF file support, including import to NPY format without Javabridge/Bioformats +- Javabridge/Bioformats has been replaced with multipage TIF file support - Minimum supported Python version is now 3.10 to reduce maintenance burden and improve testing on more recent versions of Python ### Changes @@ -29,8 +29,9 @@ #### I/O -- TIF files can be imported without Javabridge/Bioformats by loading a TIF image directly with the flag, `--savefig npy` (#738, #753) +- TIF files can be imported without Javabridge/Bioformats in the GUI ("Import" tab) or by loading a TIF image directly with the flag, `--savefig npy` (#738, #753, #756) - Read and write `PhysicalSpacingX`-style TIF resolutions (#753) +- Exports to TIF are now multichannel TIF files (#756) - Fixed issues with loading certain TIF files' metadata (#738, #754) - Fixed saving/loading rescaled images using Numpy 2 (#738) diff --git a/magmap/atlas/transformer.py b/magmap/atlas/transformer.py index f236fa52..d0c6833a 100644 --- a/magmap/atlas/transformer.py +++ b/magmap/atlas/transformer.py @@ -12,8 +12,7 @@ from magmap.cv import chunking, cv_nd from magmap.settings import config -from magmap.io import importer -from magmap.io import libmag +from magmap.io import importer, libmag, np_io from magmap.plot import plot_3d _logger = config.logger.getChild(__name__) @@ -279,17 +278,10 @@ def transpose_img( rescaled_shape = np.concatenate(([1], rescaled_shape)) print(f"rescaled_shape: {rescaled_shape}") - # WORKAROUND: fix error in Numpy 2 when shape for open_memmap contains - # np.int64 values instead of primitive int (see: - # https://github.com/numpy/numpy/issues/28334) - def fix_shape(shape): - import operator - return [operator.index(s) for s in rescaled_shape] - # rescale chunks directly into memmap-backed array to minimize RAM usage image5d_transposed = np.lib.format.open_memmap( filename_image5d_npz, mode="w+", dtype=sub_rois[0, 0, 0].dtype, - shape=tuple(fix_shape(rescaled_shape))) + shape=np_io.fix_memmap_shape(rescaled_shape)) chunking.merge_split_stack2(sub_rois, None, offset, image5d_transposed) if rescale is not None: diff --git a/magmap/gui/visualizer.py b/magmap/gui/visualizer.py index 6770d1d7..ce522a17 100644 --- a/magmap/gui/visualizer.py +++ b/magmap/gui/visualizer.py @@ -4057,7 +4057,7 @@ def setup_import(md): # extracted values res = md[config.MetaKeys.RESOLUTIONS] if res is not None: - self._import_res = [res[::-1]] + self._import_res = res[:, ::-1] mag = md[config.MetaKeys.MAGNIFICATION] if mag is not None: self._import_mag = mag @@ -4066,6 +4066,10 @@ def setup_import(md): self._import_zoom = zoom shape = md[config.MetaKeys.SHAPE] if shape is not None: + shape = np.array(shape) + if len(shape) < 5: + # expects channel dimension + shape = np.append(shape, 1) self._import_shape = [shape[::-1]] dtype_str = md[config.MetaKeys.DTYPE] @@ -4093,7 +4097,7 @@ def setup_import(md): except TypeError: print("Could not find data type for {}".format(dtype_str)) - if shape and dtype_str: + if shape is not None and dtype_str: # signal ready import self._update_import_feedback( "Ready to import. Please check microscope metadata " diff --git a/magmap/io/importer.py b/magmap/io/importer.py index f10bbdb0..40f055ab 100644 --- a/magmap/io/importer.py +++ b/magmap/io/importer.py @@ -19,7 +19,7 @@ import glob import pprint import re -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union from xml import etree as et import numpy as np @@ -41,10 +41,6 @@ # Java cannot be initialized, or a RuntimeError if Java home dir not found jb = None bf = None - _logger.warn( - "%s could not be found, so there will be error when attempting to " - "import images into Numpy format", - e.name if isinstance(e, ImportError) else "Java") # pixel type enumeration based on: # http://downloads.openmicroscopy.org/bio-formats-cpp/5.1.8/api/classome_1_1xml_1_1model_1_1enums_1_1PixelType.html @@ -918,68 +914,96 @@ def _is_raw(path): return os.path.splitext(path)[1].lower() == ".raw" -def setup_import_metadata(chl_paths, channel=None, series=None, z_max=-1): +def _is_tif(path): + """Check if a path is a TIF file based on extension. + + Args: + path (str): Path to check + + Returns: + bool: True if ``path``'s extension is TIF, case insensitive. + + """ + return os.path.splitext(path)[1].lower() in (".tif", ".tiff") + + +def setup_import_metadata( + chl_paths: Dict[Any, List[str]], channel: Optional[List[int]] = None, + series: Optional[int] = None, z_max: int =-1 + ) -> Dict[config.MetaKeys, Any]: """Extract metadata and determine output image shape for importing multipage file(s). Args: - chl_paths (dict[Any, List[str]]): Ordered dictionary of channel + chl_paths: Ordered dictionary of channel numbers to sequences of image file paths to import. - channel (List[int]): Sequence of channel indices to import; defaults + channel: Sequence of channel indices to import; defaults to None to import all channels. - series (int): Series index to load. Defaults to None, which will use 0. - z_max (int): Number of z-planes to load; defaults to -1 to load all. + series: Series index to load. Defaults to None, which will use 0. + z_max: Number of z-planes to load; defaults to -1 to load all. Returns: - dict[:obj:`config.MetaKeys`]: Dictionary of metadata. RAW files will + Dictionary of metadata. RAW files will simply return a metadata dictionary populated with None values. """ - print("Extracting metadata for image import, may take awhile...") + _logger.info("Extracting metadata for image import, may take awhile...") if series is None: series = 0 path = tuple(chl_paths.values())[0][0] md = dict.fromkeys(config.MetaKeys) - if _is_raw(path) or not is_javabridge_loaded(): - # RAW files will need to have metadata supplied manually; return - # based on this extension to avoid startup time for Javabridge - return md - - start_jvm() - jb.attach() - shape = None - try: - # get available embedded metadata via Bioformats - names, sizes, md = parse_ome_raw(bf.get_omexml_metadata(path)) - - # unlike config.resolutions, keep only single list for simplicity - res = md[config.MetaKeys.RESOLUTIONS] - if res and len(res) > series: - md[config.MetaKeys.RESOLUTIONS] = res[series] - if sizes and len(sizes) > series: - shape = list(sizes[series]) - except jb.JavaException as err: - print(err) - - if shape is None: + if _is_tif(path): + # extract TIF metadata by loading the first file + img5d = np_io.read_tif(path) + md = img5d.meta + if img5d.img is not None and md is not None: + md[config.MetaKeys.DTYPE] = img5d.img.dtype + + elif _is_raw(path): + # RAW files will need to have metadata supplied manually + _logger.info("RAW file detected, skipping metadata extraction") + + elif is_javabridge_loaded(): + # load metadata via Bioformats; Javabridge startup may take awhile + start_jvm() + jb.attach() + shape = None try: - # fall back to getting a subset of metadata, also through Bioformats - # TODO: see if necessary or improves performance - sizes, dtype = find_sizes(path) - if dtype: - md[config.MetaKeys.DTYPE] = dtype.name - shape = list(sizes[0]) - except (jb.JavaException, AttributeError) as err: - # Python-Bioformats (v1.1) attempts to access currently non-existing - # message attribute in JavaException from Javabridge (v1.0.18) + # get available embedded metadata via Bioformats + names, sizes, md = parse_ome_raw(bf.get_omexml_metadata(path)) + + # unlike config.resolutions, keep only single list for simplicity + # TODO: remove now that downstream expects full 2D res list? + res = md[config.MetaKeys.RESOLUTIONS] + if res and len(res) > series: + md[config.MetaKeys.RESOLUTIONS] = res[series] + if sizes and len(sizes) > series: + shape = list(sizes[series]) + except jb.JavaException as err: print(err) + + if shape is None: + try: + # fall back to getting a subset of metadata via Bioformats + # TODO: see if necessary or improves performance + sizes, dtype = find_sizes(path) + if dtype: + md[config.MetaKeys.DTYPE] = dtype.name + shape = list(sizes[0]) + except (jb.JavaException, AttributeError) as err: + # Python-Bioformats (v1.1) gets currently non-existing + # message attribute in JavaException from Javabridge (v1.0.18) + print(err) + + if shape: + shape = _update_shape_for_channels(shape, chl_paths, channel)[1] + if z_max != -1: + shape[1] = z_max + md[config.MetaKeys.SHAPE] = shape + jb.detach() - if shape: - shape = _update_shape_for_channels(shape, chl_paths, channel)[1] - if z_max != -1: - shape[1] = z_max - md[config.MetaKeys.SHAPE] = shape - jb.detach() + else: + _logger.info("Metadata could not be extracted for %s", path) return md @@ -1012,8 +1036,11 @@ def _update_shape_for_channels(shape, chl_paths, channel): return shape_in, shape_out -def import_multiplane_images(chl_paths, prefix, import_md, series=None, - offset=0, channel=None, fn_feedback=None): +def import_multiplane_images( + chl_paths: Dict[Any, List[str]], prefix: str, + import_md: Dict[config.MetaKeys, Any], series: Optional[int] = None, + offset: int = 0, channel: Optional[List[int]] = None, + fn_feedback: Optional[Callable] = None) -> "np_io.Image5d": """Imports single or multiplane file(s) into Numpy format. For multichannel images, this import currently supports either a single @@ -1023,27 +1050,23 @@ def import_multiplane_images(chl_paths, prefix, import_md, series=None, files to bypass keeping the full input or output image in RAM. Args: - chl_paths (dict[Any, List[str]]): Ordered dictionary of channel + chl_paths: Ordered dictionary of channel numbers to sequences of image file paths to import. - prefix (str): Ouput base path. - import_md (dict[:obj:`config.MetaKeys`]): Import metadata dictionary, + prefix: Ouput base path. + import_md: Import metadata dictionary, used to set up the shape, data type (for RAW file import), and output image metadata (resolutions, zoom, magnification). - series (int): Series index to load. Defaults to None, which will use 0. - offset (int): z-plane offset from which to start importing. - Defaults to 0. - channel (List[int]): Sequence of channel indices to import; defaults + series: Series index to load. Defaults to None, which will use 0. + offset: z-plane offset from which to start importing. + channel: Sequence of channel indices to import; defaults to None to import all channels. - fn_feedback (func): Callback function to give feedback strings - during import; defaults to None. + fn_feedback: Callback function to give feedback strings + during import. Returns: - :obj:`np_io.Image5d: The 5D image object. + The 5D image object. """ - if not is_javabridge_loaded(): - return None - time_start = time() if series is None: series = 0 @@ -1089,8 +1112,20 @@ def import_multiplane_images(chl_paths, prefix, import_md, series=None, img_raw = None libmag.printcb( "Loading file {} for import".format(img_path), fn_feedback) - if not _is_raw(img_path): - # open non-RAW image with Python-Bioformats + if _is_tif(img_path): + # load TIF file + img5d = np_io.read_tif(img_path) + if img5d.img is not None: + img_raw = img5d.img[0] # assume first time point + + elif _is_raw(img_path): + # open image file as a RAW 3D array + img_raw = np.memmap( + img_path, dtype=import_md[config.MetaKeys.DTYPE], + shape=tuple(shape_in[1:]), mode="r") + + elif is_javabridge_loaded(): + # open with Python-Bioformats try: if not jb_attached: # start JVM and attach to current thread @@ -1100,11 +1135,6 @@ def import_multiplane_images(chl_paths, prefix, import_md, series=None, rdr = bf.ImageReader(img_path, perform_init=True) except (jb.JavaException, AttributeError) as err: print(err) - if rdr is None: - # open image file as a RAW 3D array - img_raw = np.memmap( - img_path, dtype=import_md[config.MetaKeys.DTYPE], - shape=tuple(shape_in[1:]), mode="r") len_shape = len(shape) len_shape_in = len(shape_in) @@ -1119,8 +1149,8 @@ def import_multiplane_images(chl_paths, prefix, import_md, series=None, "loading planes from time {}, z {}, channel {}" .format(t, z, chl_load), fn_feedback) if img_raw is not None: - # access plane from RAW memmapped file - img = (img_raw[z, ..., chl_load] if len_shape_in >= 5 + # access plane from RAW or TIF file + img = (img_raw[z, ..., chl_load] if img_raw.ndim >= 4 else img_raw[z]) else: # read plane with Bioformats reader; chl_load may be @@ -1137,7 +1167,7 @@ def import_multiplane_images(chl_paths, prefix, import_md, series=None, os.path.dirname(filename_image5d), exist_ok=True) image5d = np.lib.format.open_memmap( filename_image5d, mode="w+", dtype=img.dtype, - shape=shape) + shape=np_io.fix_memmap_shape(shape)) print("setting image5d array for series {} with shape: " "{}".format(series, image5d.shape)) diff --git a/magmap/io/np_io.py b/magmap/io/np_io.py index daaeeb0d..a48a2549 100644 --- a/magmap/io/np_io.py +++ b/magmap/io/np_io.py @@ -3,6 +3,7 @@ """Import/export for Numpy-based archives such as ``.npy`` and ``.npz`` formats. """ import ast +import operator import os import pathlib import pprint @@ -590,6 +591,21 @@ def add_metadata(): return img5d +def fix_memmap_shape(shape: np.ndarray | Sequence) -> Tuple[int, ...]: + """Fix shape tuple for Numpy 2 memmap if it contains np.int64 values. + + Args: + shape: Shape array. + + Returns: + Shape tuple with primitive int values. + + """ + # WORKAROUND: fix error in Numpy 2 when shape for open_memmap contains + # np.int64 values instead of primitive int (see: + # https://github.com/numpy/numpy/issues/28334) + return tuple([operator.index(s) for s in shape]) + def get_num_channels( img: Optional[np.ndarray] = None, is_3d: bool = False) -> int: @@ -846,7 +862,8 @@ def write_npy( def write_tif( - img5d: "Image5d", path: Union[str, pathlib.Path], **kwargs: Any): + img5d: "Image5d", path: Union[str, pathlib.Path], + sep_chls: bool = False, **kwargs: Any): """Write a NumPy array to TIF files. Each channel will be exported to a separate file. @@ -856,25 +873,20 @@ def write_tif( path: Base output path. If ``image5d`` has multiple channels, they will be exported to files with ``_ch_`` appended just before the extension. + sep_chls: True to export each channel to a separate file. kwargs: Arguments passed to :meth:`tifffile.imwrite`. """ - image5d = img5d.img - if image5d is None or img5d.meta is None: + if img5d is None or img5d.img is None or img5d.meta is None: _logger.error("No image5d to write to TIF files") return - nchls = get_num_channels(image5d) - for i in range(nchls): - # export the given channel to a separate file, adding the channel to - # the filename if multiple channels exist - img_chl = image5d if image5d.ndim <= 4 else image5d[..., i] - out_path = pathlib.Path(libmag.make_out_path( - f"{path}{f'_ch_{i}' if nchls > 1 else ''}.tif", - combine_prefix=True)).resolve() + def write(): + # write the current img_chl to out_path pathlib.Path.mkdir(out_path.parent.resolve(), exist_ok=True) libmag.backup_file(out_path) + nonlocal img_chl if "imagej" in kwargs and kwargs["imagej"]: # ImageJ format assumes dimension order of TZCYXS img_chl = img_chl[:, :, np.newaxis] @@ -909,3 +921,22 @@ def write_tif( tifffile.imwrite( out_path, img_chl, resolution=res_tif, metadata=metadata, photometric="minisblack", **kwargs) + _logger.info("Saved '%s'", out_path) + + image5d = img5d.img + if sep_chls: + nchls = get_num_channels(image5d) + for i in range(nchls): + # export the given channel to a separate file, adding the channel to + # the filename if multiple channels exist + img_chl = image5d if image5d.ndim <= 4 else image5d[..., i] + out_path = pathlib.Path(libmag.make_out_path( + f"{path}{f'_ch_{i}' if nchls > 1 else ''}.tif", + combine_prefix=True)).resolve() + write() + else: + # export all channels to the same file + img_chl = image5d + out_path = pathlib.Path(libmag.make_out_path( + f"{path}.tif", combine_prefix=True)).resolve() + write()