Skip to content

Commit ce9c8b0

Browse files
authored
Fixed PimsTiffReader (#1212)
* wip fixed PimsTiffReader * types, lint * fixes? * SequentialExecutor * fixes issues * improved out shape * types for 3.9 * more efficient * changelog
1 parent 28bfb37 commit ce9c8b0

File tree

4 files changed

+172
-95
lines changed

4 files changed

+172
-95
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
2020
### Changed
2121
- Removes vcr-py from developer dependencies for testing and adds proxay for recording and replaying API requests. [#1198](https://github.com/scalableminds/webknossos-libs/pull/1198)
2222
- Removed the CZI installation extra from `pip install webknossos[all]` by default. Users need to manually install it with `pip install --extra-index-url https://pypi.scm.io/simple/ webknossos[czi]`. [#1219](https://github.com/scalableminds/webknossos-libs/pull/1219)
23+
- Refactored the PimsTiffReader to read the data directly from the tiff file without creating a memmap-able copy first. This greatly reduces the time and storage requirements for converting large tiff files. [#1212](https://github.com/scalableminds/webknossos-libs/pull/1212)
2324

2425
### Fixed
2526
- Fixed unpickling of the SSL_Context to allow for a second or third pickling. [#1223](https://github.com/scalableminds/webknossos-libs/pull/1223)

webknossos/tests/dataset/test_add_layer_from_images.py

Lines changed: 90 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import httpx
1010
import numpy as np
1111
import pytest
12+
from cluster_tools import SequentialExecutor
1213
from tifffile import TiffFile
1314

1415
import webknossos as wk
@@ -38,20 +39,22 @@ def test_compare_tifffile(tmp_path: Path) -> None:
3839
for z_index in range(0, data.shape[-1]):
3940
with TiffFile("testdata/tiff/test.0200.tiff") as tif_file:
4041
comparison_slice = tif_file.asarray().T
41-
assert np.array_equal(data[:, :, z_index], comparison_slice)
42+
np.testing.assert_array_equal(data[:, :, z_index], comparison_slice)
4243

4344

4445
def test_compare_nd_tifffile(tmp_path: Path) -> None:
4546
ds = wk.Dataset(tmp_path, (1, 1, 1))
46-
layer = ds.add_layer_from_images(
47-
"testdata/4D/4D_series/4D-series.ome.tif",
48-
layer_name="color",
49-
category="color",
50-
topleft=(2, 55, 100, 100),
51-
data_format="zarr3",
52-
chunk_shape=(8, 8, 8),
53-
chunks_per_shard=(8, 8, 8),
54-
)
47+
with SequentialExecutor() as executor:
48+
layer = ds.add_layer_from_images(
49+
"testdata/4D/4D_series/4D-series.ome.tif",
50+
layer_name="color",
51+
category="color",
52+
topleft=(2, 55, 100, 100),
53+
data_format="zarr3",
54+
chunk_shape=(8, 8, 8),
55+
chunks_per_shard=(8, 8, 8),
56+
executor=executor,
57+
)
5558
assert layer.bounding_box.topleft == wk.VecInt(
5659
2, 55, 100, 100, axes=("t", "z", "y", "x")
5760
)
@@ -62,7 +65,9 @@ def test_compare_nd_tifffile(tmp_path: Path) -> None:
6265
"testdata/4D/4D_series/4D-series.ome.tif"
6366
).asarray()
6467
read_first_channel_from_dataset = layer.get_finest_mag().read()[0]
65-
assert np.array_equal(read_with_tifffile_reader, read_first_channel_from_dataset)
68+
np.testing.assert_array_equal(
69+
read_with_tifffile_reader, read_first_channel_from_dataset
70+
)
6671

6772

6873
REPO_IMAGES_ARGS: List[
@@ -184,19 +189,16 @@ def test_compare_nd_tifffile(tmp_path: Path) -> None:
184189
]
185190

186191

187-
@pytest.mark.parametrize(
188-
"path, kwargs, dtype, num_channels, num_layers, size", REPO_IMAGES_ARGS
189-
)
190-
def test_repo_images(
192+
def _test_repo_images(
191193
tmp_path: Path,
192-
path: str,
194+
path: Union[str, list[Path]],
193195
kwargs: Dict,
194196
dtype: str,
195197
num_channels: int,
196198
num_layers: int,
197199
size: Tuple[int, ...],
198200
) -> wk.Dataset:
199-
with wk.utils.get_executor_for_args(None) as executor:
201+
with SequentialExecutor() as executor:
200202
ds = wk.Dataset(tmp_path, (1, 1, 1))
201203
layer = ds.add_layer_from_images(
202204
path,
@@ -216,6 +218,21 @@ def test_repo_images(
216218
return ds
217219

218220

221+
@pytest.mark.parametrize(
222+
"path, kwargs, dtype, num_channels, num_layers, size", REPO_IMAGES_ARGS
223+
)
224+
def test_repo_images(
225+
tmp_path: Path,
226+
path: str,
227+
kwargs: Dict,
228+
dtype: str,
229+
num_channels: int,
230+
num_layers: int,
231+
size: Tuple[int, ...],
232+
) -> None:
233+
_test_repo_images(tmp_path, path, kwargs, dtype, num_channels, num_layers, size)
234+
235+
219236
def download_and_unpack(
220237
url: Union[str, List[str]], out_path: Path, filename: Union[str, List[str]]
221238
) -> None:
@@ -245,7 +262,7 @@ def download_and_unpack(
245262
copy(download_file.name, out_path / filename_i)
246263

247264

248-
BIOFORMATS_ARGS = [
265+
BIOFORMATS_ARGS: list[tuple[str, str, dict, str, int, tuple[int, int, int], int]] = [
249266
(
250267
"https://samples.scif.io/wtembryo.zip",
251268
"wtembryo.mov",
@@ -294,10 +311,7 @@ def download_and_unpack(
294311
]
295312

296313

297-
@pytest.mark.parametrize(
298-
"url, filename, kwargs, dtype, num_channels, size, num_layers", BIOFORMATS_ARGS
299-
)
300-
def test_bioformats(
314+
def _test_bioformats(
301315
tmp_path: Path,
302316
url: str,
303317
filename: str,
@@ -326,9 +340,36 @@ def test_bioformats(
326340
return ds
327341

328342

343+
@pytest.mark.parametrize(
344+
"url, filename, kwargs, dtype, num_channels, size, num_layers", BIOFORMATS_ARGS
345+
)
346+
def test_bioformats(
347+
tmp_path: Path,
348+
url: str,
349+
filename: str,
350+
kwargs: Dict,
351+
dtype: str,
352+
num_channels: int,
353+
size: Tuple[int, int, int],
354+
num_layers: int,
355+
) -> None:
356+
_test_bioformats(
357+
tmp_path, url, filename, kwargs, dtype, num_channels, size, num_layers
358+
)
359+
360+
329361
# All scif images used here are published with CC0 license,
330362
# see https://scif.io/images.
331-
TEST_IMAGES_ARGS = [
363+
TEST_IMAGES_ARGS: list[
364+
tuple[
365+
Union[str, list[str]],
366+
Union[str, list[str]],
367+
dict,
368+
str,
369+
int,
370+
tuple[int, int, int],
371+
]
372+
] = [
332373
(
333374
"https://static.webknossos.org/data/webknossos-libs/slice_0420.dm4",
334375
"slice_0420.dm4",
@@ -409,10 +450,7 @@ def test_bioformats(
409450
]
410451

411452

412-
@pytest.mark.parametrize(
413-
"url, filename, kwargs, dtype, num_channels, size", TEST_IMAGES_ARGS
414-
)
415-
def test_test_images(
453+
def _test_test_images(
416454
tmp_path: Path,
417455
url: Union[str, List[str]],
418456
filename: Union[str, List[str]],
@@ -461,34 +499,49 @@ def test_test_images(
461499
assert l_normal.num_channels == num_channels
462500
assert l_normal.bounding_box.size.to_tuple() == size
463501
if l_bio is not None:
464-
assert np.array_equal(
502+
np.testing.assert_array_equal(
465503
l_bio.get_finest_mag().read(), l_normal.get_finest_mag().read()
466504
)
467505
return ds
468506

469507

508+
@pytest.mark.parametrize(
509+
"url, filename, kwargs, dtype, num_channels, size", TEST_IMAGES_ARGS
510+
)
511+
def test_test_images(
512+
tmp_path: Path,
513+
url: Union[str, List[str]],
514+
filename: Union[str, List[str]],
515+
kwargs: Dict,
516+
dtype: str,
517+
num_channels: int,
518+
size: Tuple[int, int, int],
519+
) -> None:
520+
_test_test_images(tmp_path, url, filename, kwargs, dtype, num_channels, size)
521+
522+
470523
if __name__ == "__main__":
471524
time = lambda: strftime("%Y-%m-%d_%H-%M-%S", gmtime()) # noqa: E731
472525

473-
for repo_images_args in REPO_IMAGES_ARGS:
526+
for repo_image in REPO_IMAGES_ARGS:
474527
with TemporaryDirectory() as tempdir:
475-
image_path = repo_images_args[0]
528+
image_path = repo_image[0]
476529
if isinstance(image_path, list):
477530
image_path = str(image_path[0])
478531
name = "".join(filter(str.isalnum, image_path))
479-
print(*repo_images_args)
532+
print(repo_image)
480533
print(
481-
test_repo_images(Path(tempdir), *repo_images_args)
534+
_test_repo_images(Path(tempdir), *repo_image)
482535
.upload(f"test_repo_images_{name}_{time()}")
483536
.url
484537
)
485538

486-
for bioformats_args in BIOFORMATS_ARGS:
539+
for bioformat_image in BIOFORMATS_ARGS:
487540
with TemporaryDirectory() as tempdir:
488-
name = "".join(filter(str.isalnum, bioformats_args[1]))
489-
print(*bioformats_args)
541+
name = "".join(filter(str.isalnum, bioformat_image[1]))
542+
print(bioformat_image)
490543
print(
491-
test_bioformats(Path(tempdir), *bioformats_args)
544+
_test_bioformats(Path(tempdir), *bioformat_image)
492545
.upload(f"test_bioformats_{name}_{time()}")
493546
.url
494547
)
@@ -498,7 +551,7 @@ def test_test_images(
498551
name = "".join(filter(str.isalnum, test_images_args[1]))
499552
print(*test_images_args)
500553
print(
501-
test_test_images(Path(tempdir), *test_images_args)
554+
_test_test_images(Path(tempdir), *test_images_args)
502555
.upload(f"test_test_images_{name}_{time()}")
503556
.url
504557
)

webknossos/tests/dataset/test_from_images.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
import pytest
8+
from cluster_tools import SequentialExecutor
89
from tifffile import TiffFile
910

1011
import webknossos as wk
@@ -36,7 +37,7 @@ def test_compare_tifffile(tmp_path: Path) -> None:
3637
for z_index in range(0, data.shape[-1]):
3738
with TiffFile(TESTDATA_DIR / "tiff" / "test.0000.tiff") as tif_file:
3839
comparison_slice = tif_file.asarray().T
39-
assert np.array_equal(data[:, :, z_index], comparison_slice)
40+
np.testing.assert_array_equal(data[:, :, z_index], comparison_slice)
4041

4142

4243
def test_multiple_multitiffs(tmp_path: Path) -> None:
@@ -95,11 +96,13 @@ def test_no_slashes_in_layername(tmp_path: Path) -> None:
9596
)
9697

9798
for strategy in Dataset.ConversionLayerMapping:
98-
dataset = wk.Dataset.from_images(
99-
tmp_path / "tiff",
100-
tmp_path / str(strategy),
101-
voxel_size=(10, 10, 10),
102-
map_filepath_to_layer_name=strategy,
103-
)
104-
105-
assert all("/" not in layername for layername in dataset.layers)
99+
with SequentialExecutor() as executor:
100+
dataset = wk.Dataset.from_images(
101+
tmp_path / "tiff",
102+
tmp_path / str(strategy),
103+
voxel_size=(10, 10, 10),
104+
map_filepath_to_layer_name=strategy,
105+
executor=executor,
106+
)
107+
108+
assert all("/" not in layername for layername in dataset.layers)

0 commit comments

Comments
 (0)