1- from itertools import product
1+ import json
22from os import PathLike
33from pathlib import Path
4- from typing import Set , Tuple , Union
4+ from typing import Iterator , NamedTuple , Set , Tuple , Union
55
66import numpy as np
77from pims import FramesSequenceND
1414 ) from e
1515
1616
17+ # This indexing function is adapted from zarr-python to work with tiffile's aszarr function
18+ # See https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/core/indexing.py
19+ class _ChunkProjection (NamedTuple ):
20+ chunk_coords : tuple [int , ...]
21+ chunk_selection : tuple [Union [slice , int ], ...]
22+ out_selection : tuple [Union [slice , None ], ...]
23+
24+
25+ def _chunk_indexing (
26+ selection : tuple [Union [slice , int ], ...],
27+ shape : tuple [int , ...],
28+ chunk_shape : tuple [int , ...],
29+ ) -> Iterator [_ChunkProjection ]:
30+ from itertools import product
31+
32+ class ChunkDimProjection (NamedTuple ):
33+ dim_chunk_ix : int
34+ dim_chunk_sel : Union [slice , int ]
35+ dim_out_sel : Union [slice , None ]
36+
37+ def ceildiv (a : int , b : int ) -> int :
38+ return - (a // - b )
39+
40+ def slice_dim_indexer (
41+ dim_sel : slice , dim_len : int , dim_chunk_len : int
42+ ) -> Iterator [ChunkDimProjection ]:
43+ start , stop , step = dim_sel .indices (dim_len )
44+ assert step == 1
45+
46+ # figure out the range of chunks we need to visit
47+ dim_chunk_ix_from = start // dim_chunk_len
48+ dim_chunk_ix_to = ceildiv (stop , dim_chunk_len )
49+
50+ # iterate over chunks in range
51+ for dim_chunk_ix in range (dim_chunk_ix_from , dim_chunk_ix_to ):
52+ # compute offsets for chunk within overall array
53+ dim_offset = dim_chunk_ix * dim_chunk_len
54+ dim_limit = min (dim_len , (dim_chunk_ix + 1 ) * dim_chunk_len )
55+
56+ # determine chunk length, accounting for trailing chunk
57+ dim_chunk_len = dim_limit - dim_offset
58+
59+ if start < dim_offset :
60+ # selection starts before current chunk
61+ dim_chunk_sel_start = 0
62+ remainder = (dim_offset - start ) % 1
63+ if remainder :
64+ dim_chunk_sel_start += 1 - remainder
65+ # compute number of previous items, provides offset into output array
66+ dim_out_offset = dim_offset - start
67+
68+ else :
69+ # selection starts within current chunk
70+ dim_chunk_sel_start = start - dim_offset
71+ dim_out_offset = 0
72+
73+ if stop > dim_limit :
74+ # selection ends after current chunk
75+ dim_chunk_sel_stop = dim_chunk_len
76+
77+ else :
78+ # selection ends within current chunk
79+ dim_chunk_sel_stop = stop - dim_offset
80+
81+ dim_chunk_sel = slice (dim_chunk_sel_start , dim_chunk_sel_stop , 1 )
82+ dim_chunk_nitems = dim_chunk_sel_stop - dim_chunk_sel_start
83+ dim_out_sel = slice (dim_out_offset , dim_out_offset + dim_chunk_nitems )
84+
85+ yield ChunkDimProjection (dim_chunk_ix , dim_chunk_sel , dim_out_sel )
86+
87+ def int_dim_indexer (
88+ dim_sel : int , dim_chunk_len : int
89+ ) -> Iterator [ChunkDimProjection ]:
90+ dim_chunk_ix = dim_sel // dim_chunk_len
91+ dim_offset = dim_chunk_ix * dim_chunk_len
92+ yield ChunkDimProjection (dim_chunk_ix , dim_sel - dim_offset , None )
93+
94+ # setup per-dimension indexers
95+ dim_indexers = [
96+ slice_dim_indexer (dim_sel , dim_len , dim_chunk_len )
97+ if isinstance (dim_sel , slice )
98+ else int_dim_indexer (dim_sel , dim_chunk_len )
99+ for dim_sel , dim_len , dim_chunk_len in zip (selection , shape , chunk_shape )
100+ ]
101+
102+ for dim_projections in product (* dim_indexers ):
103+ chunk_coords = tuple (p .dim_chunk_ix for p in dim_projections )
104+ chunk_selection = tuple (p .dim_chunk_sel for p in dim_projections )
105+ out_selection = tuple (
106+ p .dim_out_sel for p in dim_projections if p .dim_out_sel is not None
107+ )
108+
109+ yield _ChunkProjection (chunk_coords , chunk_selection , out_selection )
110+
111+
17112class PimsTiffReader (FramesSequenceND ):
18113 @classmethod
19114 def class_exts (cls ) -> Set [str ]:
@@ -22,7 +117,6 @@ def class_exts(cls) -> Set[str]:
22117 # class_priority is used in pims to pick the reader with the highest priority.
23118 # We decided to use a custom reader for tiff files to support images with more than 3 dimensions out of the box.
24119 # Default is 10, and bioformats priority is 2.
25- # Our custom reader for imagej_tiff has priority 20.
26120 # See http://soft-matter.github.io/pims/v0.6.1/custom_readers.html#plugging-into-pims-s-open-function
27121 class_priority = 19
28122
@@ -55,66 +149,59 @@ def __init__(self, path: PathLike) -> None:
55149 else :
56150 self ._register_get_frame (self .get_frame_2D , "yx" )
57151
152+ expected_page_count = int (
153+ np .prod ([self .sizes [axis ] for axis in self ._other_axes ])
154+ )
155+ self ._page_mode = len (_tiff .pages ) == expected_page_count
156+
58157 def get_frame_2D (self , ** ind : int ) -> np .ndarray :
59158 _tiff = tifffile .TiffFile (self .path ).series [0 ]
60159
160+ # We are using aszarr because it provides a chunked interface
161+ # to the tiff file's content. However, we don't want to add
162+ # zarr-python as a dependency. So we just implement the indexing
163+ # ourselves and rely on the fact that tifffile isn't using more
164+ # complex zarr features such as compressors, filters, F-order, fillvalue etc.
165+ zarr_store = _tiff .aszarr ()
166+ zarray = json .loads (zarr_store [".zarray" ])
167+
168+ assert zarray ["zarr_format" ] == 2
169+ assert zarray ["order" ] == "C"
170+ assert np .dtype (zarray ["dtype" ]) == self ._dtype
171+ assert zarray .get ("compressor" ) is None
172+ assert zarray .get ("filters" ) in (None , [])
173+ assert zarray ["fill_value" ] == 0
174+ array_shape = tuple (zarray ["shape" ])
175+ chunk_shape = tuple (zarray ["chunks" ])
176+
177+ # Prepare output array for this frame
61178 out_shape = tuple (self .sizes [axis ] for axis in self .bundle_axes )
62179 out = np .zeros (out_shape , dtype = self ._dtype )
63180
64- # Axes that are present in the tiff page
65- page_axes = tuple (
66- axis for axis in self ._tiff_axes if axis not in self ._other_axes
67- )
68181 # Axes that need to be broadcasted from page to output
69182 broadcast_axes = tuple (
70183 axis
71184 for axis in self ._tiff_axes
72185 if axis in self .bundle_axes and axis not in self ._other_axes
73186 )
74187
75- page_indices = product (
76- * [
77- range (self .sizes [axis ])
78- if axis in self .bundle_axes
79- else range (ind [axis ], ind [axis ] + 1 )
80- for axis in self ._other_axes
81- ]
188+ # Prepare selection of the data to read for this frame
189+ selection : tuple [Union [slice , int ], ...] = tuple (
190+ slice (None ) if axis in broadcast_axes else ind [axis ]
191+ for axis in self ._tiff_axes
82192 )
83193
84- # We iterate over all tiff pages to find the pages that are relevant for this frame
85- for page_ind in page_indices :
86- this_ind = {axis : index for axis , index in zip (self ._other_axes , page_ind )}
87-
88- i = 0
89- for j , axis in enumerate (self ._other_axes ):
90- i += this_ind [axis ] * np .prod (
91- [self .sizes [axis ] for axis in self ._other_axes [j + 1 :]],
92- dtype = int ,
93- )
94-
95- # Prepare selectors
96- page_selector_list : list [Union [slice , int ]] = []
97- for axis in page_axes :
98- if axis in self .bundle_axes :
99- page_selector_list .append (slice (None ))
100- else :
101- page_selector_list .append (ind [axis ])
102- page_selector = tuple (page_selector_list )
103-
104- out_selector_list : list [Union [slice , int ]] = []
105- for axis in self .bundle_axes :
106- if axis in broadcast_axes :
107- out_selector_list .append (slice (None )) # broadcast
108- else :
109- out_selector_list .append (
110- this_ind [axis ]
111- ) # set page in a slice of the output
112- out_selector = tuple (out_selector_list )
113-
114- page = _tiff .asarray (key = i )
115- assert len (out_selector ) == out .ndim
116- assert len (page_selector ) == page .ndim
117- out [out_selector ] = page [page_selector ]
194+ for chunk_projection in _chunk_indexing (selection , array_shape , chunk_shape ):
195+ # read data from zarr store
196+ chunk_data = (
197+ zarr_store ["." .join (map (str , chunk_projection .chunk_coords ))]
198+ .ravel ()
199+ .reshape (chunk_shape )
200+ )
201+ # write in output array
202+ out [chunk_projection .out_selection ] = chunk_data [
203+ chunk_projection .chunk_selection
204+ ]
118205
119206 return out
120207
0 commit comments