Skip to content

Commit 14021b9

Browse files
normanrzjstriebel
andauthored
Provide direct access to an underlying Zarr array (#792)
* Provide direct access to an underlying Zarr array * fixes + tests + changelog * tests * Update webknossos/webknossos/dataset/mag_view.py Co-authored-by: Jonathan Striebel <[email protected]> * adds zarr+dask example * simpler dask sample * doc * use to_slices * test * exclude from vcr Co-authored-by: Jonathan Striebel <[email protected]>
1 parent 468eed8 commit 14021b9

File tree

9 files changed

+162
-2
lines changed

9 files changed

+162
-2
lines changed

docs/mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ nav:
7676
- webknossos-py/examples/upload_image_data.md
7777
- webknossos-py/examples/download_image_data.md
7878
- webknossos-py/examples/remote_datasets.md
79+
- webknossos-py/examples/zarr_and_dask.md
7980
- Annotation Examples:
8081
- webknossos-py/examples/apply_merger_mode.md
8182
- webknossos-py/examples/learned_segmenter.md
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Zarr and Dask interoperability
2+
3+
This example shows how to access the underlying [Zarr](https://zarr.dev) array of a [remote datasets](../../api/webknossos/dataset/dataset.md#RemoteDataset). Using the Zarr array allows to use other libraries, such as [Dask](https://www.dask.org/) for parallel processing.
4+
5+
```python
6+
--8<--
7+
webknossos/examples/zarr_and_dask.py
8+
--8<--
9+
```

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1515
### Breaking Changes
1616

1717
### Added
18+
- Added direct access to an underlying Zarr array with the `MagView.get_zarr_array()` method. [#792](https://github.com/scalableminds/webknossos-libs/pull/792)
1819

1920
### Changed
2021

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import dask.array as da
2+
3+
import webknossos as wk
4+
5+
6+
def main() -> None:
7+
# Remote datasets are read-only, but can be used similar to normal datasets:
8+
l4_sample_dataset = wk.Dataset.open_remote(
9+
"https://webknossos.org/datasets/scalable_minds/l4_sample"
10+
)
11+
12+
layer = l4_sample_dataset.get_layer("color")
13+
mag = layer.get_finest_mag()
14+
15+
zarr_array = mag.get_zarr_array()
16+
dask_array = da.from_array(zarr_array, chunks=(1, 256, 256, 256))[
17+
(0,) + layer.bounding_box.to_slices()
18+
]
19+
20+
mean_value = dask_array.mean().compute()
21+
print("Mean:", mean_value)
22+
23+
24+
if __name__ == "__main__":
25+
main()

webknossos/poetry.lock

Lines changed: 84 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webknossos/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ all = ["pims","tifffile","imagecodecs","JPype1",]
7070
[tool.poetry.dev-dependencies]
7171
# autoflake
7272
black = "21.7b0"
73+
dask = "2022.02.0"
7374
hypothesis = "^6.35.0"
7475
icecream = "^2.1.1"
7576
inducoapi = "2.0.0"

webknossos/tests/test_dataset.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,26 @@ def test_view_write(data_format: DataFormat, output_path: Path) -> None:
421421
assert np.array_equal(data, write_data)
422422

423423

424+
@pytest.mark.parametrize("output_path", [TESTOUTPUT_DIR, REMOTE_TESTOUTPUT_DIR])
425+
def test_direct_zarr_access(output_path: Path) -> None:
426+
ds_path = copy_simple_dataset(DataFormat.Zarr, output_path)
427+
mag = Dataset.open(ds_path).get_layer("color").get_mag("1")
428+
429+
np.random.seed(1234)
430+
431+
# write: zarr, read: wk
432+
write_data = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
433+
mag.get_zarr_array()[:, 0:10, 0:10, 0:10] = write_data
434+
data = mag.read(absolute_offset=(0, 0, 0), size=(10, 10, 10))
435+
assert np.array_equal(data, write_data)
436+
437+
# write: wk, read: zarr
438+
write_data = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
439+
mag.write(write_data, absolute_offset=(0, 0, 0))
440+
data = mag.get_zarr_array()[:, 0:10, 0:10, 0:10]
441+
assert np.array_equal(data, write_data)
442+
443+
424444
@pytest.mark.parametrize("data_format,output_path", DATA_FORMATS_AND_OUTPUT_PATHS)
425445
def test_view_write_out_of_bounds(data_format: DataFormat, output_path: Path) -> None:
426446
ds_path = copy_simple_dataset(

webknossos/tests/test_examples.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,3 +222,13 @@ def test_remote_datasets() -> None:
222222
assert ds.url == "http://localhost:9000/datasets/Organization_X/e2006_knossos"
223223
ds.tags = ["test"]
224224
assert ds in wk.Dataset.get_remote_datasets(tags=["test"]).values()
225+
226+
227+
@pytest.mark.block_network(allowed_hosts=[".*"])
228+
@pytest.mark.vcr(ignore_hosts=["webknossos.org", "data-humerus.webknossos.org"])
229+
def test_zarr_and_dask() -> None:
230+
import examples.zarr_and_dask as example
231+
232+
(mean_value,) = exec_main_and_get_vars(example, "mean_value")
233+
234+
assert 124 < mean_value < 125

webknossos/webknossos/dataset/mag_view.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
from uuid import uuid4
88

99
import numpy as np
10+
import zarr
1011
from upath import UPath
1112

1213
from ..geometry import BoundingBox, Mag, Vec3Int, Vec3IntLike
1314
from ..utils import get_executor_for_args, is_fs_path, rmtree, wait_and_ensure_success
14-
from ._array import ArrayInfo, BaseArray
15+
from ._array import ArrayInfo, BaseArray, ZarrArray
1516
from .properties import MagViewProperties
1617

1718
if TYPE_CHECKING:
@@ -121,6 +122,15 @@ def path(self) -> Path:
121122
def name(self) -> str:
122123
return self._mag.to_layer_name()
123124

125+
def get_zarr_array(self) -> zarr.Array:
126+
"""
127+
Directly access the underlying Zarr array. Only available for Zarr-based datasets.
128+
"""
129+
array_wrapper = self._array
130+
if not isinstance(array_wrapper, ZarrArray):
131+
raise ValueError("Cannot get the zarr array for wkw datasets.")
132+
return array_wrapper._zarray
133+
124134
def write(
125135
self,
126136
data: np.ndarray,

0 commit comments

Comments
 (0)