Skip to content

Commit 068c2db

Browse files
authored
from_images: add python-native support for dm3 & dm4 (#842)
* from_images: add python-native support for dm3 & dm4 * add changelog entry * apply PR feedback * reduce memory usage by allowing z=1 chunks * more test logging * less mem usage in test_compare_tifffile * formatting * Revert "more test logging" This reverts commit 5673b21. * run gc after every test
1 parent 5739a3e commit 068c2db

File tree

13 files changed

+1651
-100
lines changed

13 files changed

+1651
-100
lines changed

webknossos/Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1515
### Breaking Changes
1616

1717
### Added
18+
- `Dataset.from_images` and `dataset.add_layer_from_images` have new features: [#842](https://github.com/scalableminds/webknossos-libs/pull/842)
19+
* `dm3` and `dm4` datasets can be read without bioformats now.
20+
* It's possible to completely disable the bioformats adapter by setting `use_bioformats` to False.
21+
* Lists of images can now be handled with other readers, before only images supported by skimage worked in lists.
1822

1923
### Changed
2024

webknossos/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,10 @@ Please see the [respective documentation page](https://docs.webknossos.org/webkn
4444
## License
4545
[AGPLv3](https://www.gnu.org/licenses/agpl-3.0.html)
4646
Copyright [scalable minds](https://scalableminds.com)
47+
48+
## Test Data Credits
49+
Excerpts for testing purposes have been sampled from:
50+
51+
* Dow Jacobo Hossain Siletti Hudspeth (2018). **Connectomics of the zebrafish's lateral-line neuromast reveals wiring and miswiring in a simple microcircuit.** eLife. [DOI:10.7554/eLife.33988](https://elifesciences.org/articles/33988)
52+
* Zheng Lauritzen Perlman Robinson Nichols Milkie Torrens Price Fisher Sharifi Calle-Schuler Kmecova Ali Karsh Trautman Bogovic Hanslovsky Jefferis Kazhdan Khairy Saalfeld Fetter Bock (2018). **A Complete Electron Microscopy Volume of the Brain of Adult Drosophila melanogaster.** Cell. [DOI:10.1016/j.cell.2018.06.019](https://www.cell.com/cell/fulltext/S0092-8674(18)30787-6). License: [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/)
53+
* Bosch Ackels Pacureanu et al (2022). **Functional and multiscale 3D structural investigation of brain tissue through correlative in vivo physiology, synchrotron microtomography and volume electron microscopy.** Nature Communications. [DOI:10.1038/s41467-022-30199-6](https://www.nature.com/articles/s41467-022-30199-6)

webknossos/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export_vars
1010
# this will ensure that the current directory is added to sys.path
1111
# (which is standard python behavior). This is necessary so that the imports
1212
# refer to the checked out (and potentially modified) code.
13-
PYTEST="poetry run python -m pytest --suppress-no-test-exit-code"
13+
PYTEST="poetry run python -m pytest --suppress-no-test-exit-code -s -vvv"
1414

1515

1616
if [ $# -gt 0 ] && [ "$1" = "--refresh-snapshots" ]; then

webknossos/tests/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gc
12
import json
23
import re
34
import warnings
@@ -37,6 +38,11 @@ def pytest_make_parametrize_id(config: Any, val: Any, argname: str) -> Any:
3738
return None
3839

3940

41+
@pytest.fixture(autouse=True)
42+
def ensure_gc() -> None:
43+
gc.collect()
44+
45+
4046
### HYPOTHESIS STRATEGIES (library to test many combinations for data class input)
4147

4248

webknossos/tests/dataset/test_add_layer_from_images.py

Lines changed: 97 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from shutil import copy
33
from tempfile import NamedTemporaryFile, TemporaryDirectory
44
from time import gmtime, strftime
5-
from typing import Any, Dict, List, Tuple, Union
5+
from typing import Any, Dict, List, Optional, Tuple, Union
66
from zipfile import BadZipFile, ZipFile
77

88
import httpx
@@ -120,6 +120,7 @@ def test_repo_images(
120120
layer_name=layer_name,
121121
compress=True,
122122
executor=executor,
123+
use_bioformats=False,
123124
**kwargs,
124125
)
125126
assert l.dtype_per_channel == np.dtype(dtype)
@@ -131,24 +132,33 @@ def test_repo_images(
131132
return ds
132133

133134

134-
def download_and_unpack(url: str, out_path: Path, filename: str) -> None:
135-
with NamedTemporaryFile() as download_file:
136-
with httpx.stream("GET", url) as response:
137-
total = int(response.headers["Content-Length"])
135+
def download_and_unpack(
136+
url: Union[str, List[str]], out_path: Path, filename: Union[str, List[str]]
137+
) -> None:
138+
if isinstance(url, str):
139+
assert isinstance(filename, str)
140+
url = [url]
141+
filename = [filename]
142+
for url_i, filename_i in zip(url, filename):
143+
with NamedTemporaryFile() as download_file:
144+
with httpx.stream("GET", url_i) as response:
145+
total = int(response.headers["Content-Length"])
138146

139-
with wk.utils.get_rich_progress() as progress:
140-
download_task = progress.add_task("Download Image Data", total=total)
141-
for chunk in response.iter_bytes():
142-
download_file.write(chunk)
143-
progress.update(
144-
download_task, completed=response.num_bytes_downloaded
147+
with wk.utils.get_rich_progress() as progress:
148+
download_task = progress.add_task(
149+
"Download Image Data", total=total
145150
)
146-
try:
147-
with ZipFile(download_file, "r") as zip_file:
148-
zip_file.extractall(out_path)
149-
except BadZipFile:
150-
out_path.mkdir(parents=True, exist_ok=True)
151-
copy(download_file.name, out_path / filename)
151+
for chunk in response.iter_bytes():
152+
download_file.write(chunk)
153+
progress.update(
154+
download_task, completed=response.num_bytes_downloaded
155+
)
156+
try:
157+
with ZipFile(download_file, "r") as zip_file:
158+
zip_file.extractall(out_path)
159+
except BadZipFile:
160+
out_path.mkdir(parents=True, exist_ok=True)
161+
copy(download_file.name, out_path / filename_i)
152162

153163

154164
BIOFORMATS_ARGS = [
@@ -241,7 +251,44 @@ def test_bioformats(
241251
return ds
242252

243253

254+
# All scif images used here are published with CC0 license,
255+
# see https://scif.io/images.
244256
TEST_IMAGES_ARGS = [
257+
(
258+
"https://static.webknossos.org/data/webknossos-libs/slice_0420.dm4",
259+
"slice_0420.dm4",
260+
{"data_format": "zarr"}, # using zarr to allow z=1 chunking
261+
"uint16",
262+
1,
263+
(8192, 8192, 1),
264+
),
265+
(
266+
"https://static.webknossos.org/data/webknossos-libs/slice_0073.dm3",
267+
"slice_0073.dm3",
268+
{"data_format": "zarr"}, # using zarr to allow z=1 chunking
269+
"uint16",
270+
1,
271+
(4096, 4096, 1),
272+
),
273+
(
274+
[
275+
"https://static.webknossos.org/data/webknossos-libs/slice_0073.dm3",
276+
"https://static.webknossos.org/data/webknossos-libs/slice_0074.dm3",
277+
],
278+
["slice_0073.dm3", "slice_0074.dm3"],
279+
{"data_format": "zarr"}, # using zarr to allow smaller chunking
280+
"uint16",
281+
1,
282+
(4096, 4096, 2),
283+
),
284+
(
285+
"https://samples.scif.io/dnasample1.zip",
286+
"dnasample1.dm3",
287+
{"data_format": "zarr"}, # using zarr to allow z=1 chunking
288+
"int16",
289+
1,
290+
(4096, 4096, 1),
291+
),
245292
(
246293
# published with CC0 license, taken from
247294
# https://doi.org/10.6084/m9.figshare.c.3727411_D391.v1
@@ -292,41 +339,56 @@ def test_bioformats(
292339
)
293340
def test_test_images(
294341
tmp_path: Path,
295-
url: str,
296-
filename: str,
342+
url: Union[str, List[str]],
343+
filename: Union[str, List[str]],
297344
kwargs: Dict,
298345
dtype: str,
299346
num_channels: int,
300347
size: Tuple[int, int, int],
301348
) -> wk.Dataset:
302349
unzip_path = tmp_path / "unzip"
303350
download_and_unpack(url, unzip_path, filename)
351+
path: Union[Path, List[Path]]
352+
if isinstance(filename, list):
353+
layer_name = filename[0] + "..."
354+
path = [unzip_path / i for i in filename]
355+
else:
356+
layer_name = filename
357+
path = unzip_path / filename
304358
ds = wk.Dataset(tmp_path / "ds", (1, 1, 1))
305359
with wk.utils.get_executor_for_args(None) as executor:
306-
l_bio = ds.add_layer_from_images(
307-
str(unzip_path / filename),
308-
layer_name="bioformats_" + filename,
309-
compress=True,
310-
executor=executor,
311-
use_bioformats=True,
312-
**kwargs,
313-
)
314-
assert l_bio.dtype_per_channel == np.dtype(dtype)
315-
assert l_bio.num_channels == num_channels
316-
assert l_bio.bounding_box == wk.BoundingBox(topleft=(0, 0, 0), size=size)
360+
l_bio: Optional[wk.Layer]
361+
try:
362+
l_bio = ds.add_layer_from_images(
363+
path,
364+
layer_name="bioformats_" + layer_name,
365+
compress=True,
366+
executor=executor,
367+
use_bioformats=True,
368+
**kwargs,
369+
)
370+
except Exception as e:
371+
print(e)
372+
l_bio = None
373+
else:
374+
assert l_bio.dtype_per_channel == np.dtype(dtype)
375+
assert l_bio.num_channels == num_channels
376+
assert l_bio.bounding_box == wk.BoundingBox(topleft=(0, 0, 0), size=size)
317377
l_normal = ds.add_layer_from_images(
318-
str(unzip_path / filename),
319-
layer_name="normal_" + filename,
378+
path,
379+
layer_name="normal_" + layer_name,
320380
compress=True,
321381
executor=executor,
382+
use_bioformats=False,
322383
**kwargs,
323384
)
324385
assert l_normal.dtype_per_channel == np.dtype(dtype)
325386
assert l_normal.num_channels == num_channels
326387
assert l_normal.bounding_box == wk.BoundingBox(topleft=(0, 0, 0), size=size)
327-
assert np.array_equal(
328-
l_bio.get_finest_mag().read(), l_normal.get_finest_mag().read()
329-
)
388+
if l_bio is not None:
389+
assert np.array_equal(
390+
l_bio.get_finest_mag().read(), l_normal.get_finest_mag().read()
391+
)
330392
return ds
331393

332394

webknossos/tests/dataset/test_dataset.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2525,7 +2525,7 @@ def test_aligned_downsampling(data_format: DataFormat, output_path: Path) -> Non
25252525
ds_path = copy_simple_dataset(data_format, output_path, "aligned_downsampling")
25262526
dataset = Dataset.open(ds_path)
25272527
input_layer = dataset.get_layer("color")
2528-
input_layer.downsample()
2528+
input_layer.downsample(coarsest_mag=Mag(2))
25292529
test_layer = dataset.add_layer(
25302530
layer_name="color_2",
25312531
category="color",
@@ -2539,20 +2539,17 @@ def test_aligned_downsampling(data_format: DataFormat, output_path: Path) -> Non
25392539
# assuming the layer has 3 channels:
25402540
data=(np.random.rand(3, 24, 24, 24) * 255).astype(np.uint8),
25412541
)
2542-
test_layer.downsample()
2542+
test_layer.downsample(coarsest_mag=Mag(2))
25432543

25442544
assert (ds_path / "color_2" / "1").exists()
25452545
assert (ds_path / "color_2" / "2").exists()
2546-
assert (ds_path / "color_2" / "4").exists()
25472546

25482547
if data_format == DataFormat.Zarr:
25492548
assert (ds_path / "color_2" / "1" / ".zarray").exists()
25502549
assert (ds_path / "color_2" / "2" / ".zarray").exists()
2551-
assert (ds_path / "color_2" / "4" / ".zarray").exists()
25522550
else:
25532551
assert (ds_path / "color_2" / "1" / "header.wkw").exists()
25542552
assert (ds_path / "color_2" / "2" / "header.wkw").exists()
2555-
assert (ds_path / "color_2" / "4" / "header.wkw").exists()
25562553

25572554
assure_exported_properties(dataset)
25582555

@@ -2565,9 +2562,8 @@ def test_guided_downsampling(data_format: DataFormat, output_path: Path) -> None
25652562
input_layer = input_dataset.get_layer("color")
25662563
# Adding additional mags to the input dataset for testing
25672564
input_layer.get_or_add_mag("2-2-1")
2568-
input_layer.get_or_add_mag("4-4-2")
25692565
input_layer.redownsample()
2570-
assert len(input_layer.mags) == 3
2566+
assert len(input_layer.mags) == 2
25712567
# Use the mag with the best resolution
25722568
finest_input_mag = input_layer.get_finest_mag()
25732569

@@ -2589,7 +2585,7 @@ def test_guided_downsampling(data_format: DataFormat, output_path: Path) -> None
25892585
# Downsampling the layer to the magnification used in the input dataset
25902586
output_layer.downsample(
25912587
from_mag=output_mag.mag,
2592-
coarsest_mag=Mag("8-8-4"),
2588+
coarsest_mag=Mag("4-4-2"),
25932589
align_with_other_layers=input_dataset,
25942590
)
25952591
for mag in input_layer.mags:
@@ -2598,18 +2594,15 @@ def test_guided_downsampling(data_format: DataFormat, output_path: Path) -> None
25982594
assert (output_ds_path / "color" / "1").exists()
25992595
assert (output_ds_path / "color" / "2-2-1").exists()
26002596
assert (output_ds_path / "color" / "4-4-2").exists()
2601-
assert (output_ds_path / "color" / "8-8-4").exists()
26022597

26032598
if data_format == DataFormat.Zarr:
26042599
assert (output_ds_path / "color" / "1" / ".zarray").exists()
26052600
assert (output_ds_path / "color" / "2-2-1" / ".zarray").exists()
26062601
assert (output_ds_path / "color" / "4-4-2" / ".zarray").exists()
2607-
assert (output_ds_path / "color" / "8-8-4" / ".zarray").exists()
26082602
else:
26092603
assert (output_ds_path / "color" / "1" / "header.wkw").exists()
26102604
assert (output_ds_path / "color" / "2-2-1" / "header.wkw").exists()
26112605
assert (output_ds_path / "color" / "4-4-2" / "header.wkw").exists()
2612-
assert (output_ds_path / "color" / "8-8-4" / "header.wkw").exists()
26132606

26142607
assure_exported_properties(input_dataset)
26152608

webknossos/tests/dataset/test_from_images.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def test_compare_tifffile(tmp_path: Path) -> None:
1313
(1, 1, 1),
1414
compress=True,
1515
layer_category="segmentation",
16+
chunks_per_shard=(8, 8, 8),
1617
map_filepath_to_layer_name=wk.Dataset.ConversionLayerMapping.ENFORCE_SINGLE_LAYER,
1718
)
1819
assert len(ds.layers) == 1

webknossos/webknossos/dataset/_utils/pims_czi_reader.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,20 @@ def __init__(self, path: PathLike, czi_channel: int = 0) -> None:
6161
# not propagating axes of length one
6262
continue
6363
self._init_axis(axis, length)
64-
czi_pixel_type = czi_file.get_channel_pixel_type(self.czi_channel)
65-
if czi_pixel_type.startswith("Bgra"):
64+
self._czi_pixel_type = czi_file.get_channel_pixel_type(self.czi_channel)
65+
if self._czi_pixel_type.startswith("Bgra"):
6666
self._init_axis("c", 4)
67-
elif czi_pixel_type.startswith("Bgr"):
67+
elif self._czi_pixel_type.startswith("Bgr"):
6868
self._init_axis("c", 3)
69-
elif czi_pixel_type.startswith("Gray"):
69+
elif self._czi_pixel_type.startswith("Gray"):
7070
self._init_axis("c", 1)
71-
elif czi_pixel_type == "Invalid":
71+
elif self._czi_pixel_type == "Invalid":
7272
raise ValueError(
7373
f"czi_channel {self.czi_channel} does not exist in {self.path}"
7474
)
7575
else:
7676
raise ValueError(
77-
f"Got unsupported czi pixel-type {czi_pixel_type} in {self.path}"
77+
f"Got unsupported czi pixel-type {self._czi_pixel_type} in {self.path}"
7878
)
7979

8080
self._register_get_frame(self.get_frame_2D, "yxc")
@@ -90,10 +90,7 @@ def available_czi_channels(self) -> List[int]:
9090

9191
@property # potential @cached_property for py3.8+
9292
def pixel_type(self) -> np.dtype:
93-
with self.czi_file() as czi_file:
94-
return np.dtype(
95-
PIXEL_TYPE_TO_DTYPE[czi_file.get_channel_pixel_type(self.czi_channel)]
96-
)
93+
return np.dtype(PIXEL_TYPE_TO_DTYPE[self._czi_pixel_type])
9794

9895
def get_frame_2D(self, **ind: int) -> np.ndarray:
9996
plane = {k.upper(): v for k, v in ind.items()}

0 commit comments

Comments
 (0)