Skip to content

Commit eb6810a

Browse files
Make Zarr3 default and purge deprecations (#1242)
* make Zarr3 default DataFormat * compress=True * remove deprecations * test fixes * down to 16 * changelog * test fixes * fix /test_dataset_add_remote_mag_and_layer.py * stuff * ci * ci * error on fork * ci * ci * less alignment checks * allow_unaligned * ci.yml aktualisieren * ci testing * ci * sequential tests * ci * ci * ci * ci * parameterize python for kubernetes dockerfile * test * change defaults * mirrored test images * mp logging * mp debugging * debug * debug * debug * debugging * pyproject.toml * py3.12 * debugging * wip * all python versions * revert debug changes in cluster_tools * fixes * larger ci runner * default ci runner * rm pytest-timeout * test * Revert "rm pytest-timeout" This reverts commit 6bc2185. * Revert "test" This reverts commit 8d57971. * ci * ci * ci * ci * ci * ci * properly implement SequentialExecutor * ci * changelog * allow_unaligned wip * ci * wip * fix tests * fix test * examples * longer sleep in slurm test * format * longer sleep in slurm test * Apply suggestions from code review Co-authored-by: Philipp Otto <[email protected]> * add methods * more robust patch * comment * derive_nd_bounding_box_from_shape * refactor Dataset.open to not require an additional IOop * cassettes * format * lint * docs * MagLike + mag in write_layer * doc * docstring * change default data_format in cli * docs * pr feedback --------- Co-authored-by: Philipp Otto <[email protected]>
1 parent 2285942 commit eb6810a

File tree

71 files changed

+17740
-8320
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+17740
-8320
lines changed

.github/workflows/ci.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ jobs:
157157
strategy:
158158
max-parallel: 4
159159
matrix:
160-
python-version: ["3.13", "3.12", "3.11", "3.10", "3.9"]
160+
python-version: ["3.12", "3.13", "3.11", "3.10", "3.9"]
161161
group: [1, 2, 3]
162162
fail-fast: false
163163
defaults:
@@ -193,11 +193,16 @@ jobs:
193193
if: ${{ matrix.group == 1 && matrix.python-version == '3.11' }}
194194
run: ./typecheck.sh
195195

196+
- name: Patch Python standard library to assert that fork is not allowed
197+
run: |
198+
sed -i '/def _launch/a\ \ \ \ \ \ \ \ raise Exception("fork is not allowed.")' /home/runner/.local/share/uv/python/cpython-${{ matrix.python-version }}.*-linux-x86_64-gnu/lib/python${{ matrix.python-version }}/multiprocessing/popen_fork.py
199+
cat /home/runner/.local/share/uv/python/cpython-${{ matrix.python-version }}.*-linux-x86_64-gnu/lib/python${{ matrix.python-version }}/multiprocessing/popen_fork.py
200+
196201
- name: Python tests
197202
timeout-minutes: 30
198203
env:
199-
WK_TOKEN: ${{ secrets.WK_TOKEN }}
200-
run: ./test.sh -vv -p no:faulthandler --splits 3 --group ${{ matrix.group }} --splitting-algorithm least_duration
204+
PYTHON_VERSION: ${{ matrix.python-version }}
205+
run: ./test.sh --splits 3 --group ${{ matrix.group }} --splitting-algorithm least_duration
201206

202207
- name: Check if git is dirty
203208
run: |

cluster_tools/cluster_tools/_utils/multiprocessing_logging_handler.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import functools
22
import logging
33
import multiprocessing
4+
import os
45
import sys
56
import threading
67
import traceback
@@ -33,7 +34,11 @@ def __init__(self, name: str, wrapped_handler: logging.Handler) -> None:
3334
self.setFormatter(self.wrapped_handler.formatter)
3435
self.filters = self.wrapped_handler.filters
3536

36-
self._manager = multiprocessing.Manager()
37+
# Make sure to use a multiprocessing context with
38+
# explicit start method to avoid unwanted forks
39+
self._manager = multiprocessing.get_context(
40+
os.environ.get("MULTIPROCESSING_DEFAULT_START_METHOD", "spawn")
41+
).Manager()
3742
self.queue = self._manager.Queue(-1)
3843
self._is_closed = False
3944
# Use thread to asynchronously receive messages from the queue
@@ -86,14 +91,15 @@ def decrement_usage(self) -> None:
8691
root_logger.addHandler(self.wrapped_handler)
8792

8893
self._is_closed = True
89-
self._queue_thread.join()
94+
self._queue_thread.join(30)
9095
self._manager.shutdown()
96+
self.wrapped_handler.close()
9197
super().close()
9298

9399
def close(self) -> None:
94100
if not self._is_closed:
95101
self._is_closed = True
96-
self._queue_thread.join()
102+
self._queue_thread.join(30)
97103
self._manager.shutdown()
98104
self.wrapped_handler.close()
99105
super().close()

cluster_tools/cluster_tools/executors/multiprocessing_.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,7 @@ def _execute_and_persist_function(
185185
def map_to_futures(
186186
self,
187187
fn: Callable[[_S], _T],
188-
args: Iterable[
189-
_S
190-
], # TODO change: allow more than one arg per call #noqa: FIX002 Line contains TODO
188+
args: Iterable[_S],
191189
output_pickle_path_getter: Optional[Callable[[_S], os.PathLike]] = None,
192190
) -> List[Future[_T]]:
193191
if output_pickle_path_getter is not None:
@@ -217,11 +215,6 @@ def forward_log(self, fut: Future[_T]) -> _T:
217215
return fut.result()
218216

219217
def shutdown(self, wait: bool = True, *, cancel_futures: bool = False) -> None:
220-
if cancel_futures:
221-
# cancel_futures was added in Python 3.9, ignoring it as 3.8 is supported:
222-
logging.warning(
223-
"The provided cancel_futures argument is ignored by MultiprocessingExecutor."
224-
)
225-
super().shutdown(wait=wait)
218+
super().shutdown(wait=wait, cancel_futures=cancel_futures)
226219
if self._mp_logging_handler_pool is not None:
227220
self._mp_logging_handler_pool.close()
Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,30 @@
1-
from concurrent.futures import Future
2-
from multiprocessing.context import BaseContext
1+
import warnings
2+
from collections.abc import Iterable, Iterator
3+
from concurrent.futures import Executor, Future, as_completed
4+
from os import PathLike
35
from pathlib import Path
4-
from typing import Any, Callable, Optional, Tuple, TypeVar, cast
6+
from typing import Any, Callable, Optional, TypeVar, cast
57

68
from typing_extensions import ParamSpec
79

810
from cluster_tools._utils.warning import enrich_future_with_uncaught_warning
911
from cluster_tools.executors.multiprocessing_ import CFutDict, MultiprocessingExecutor
1012

1113
_T = TypeVar("_T")
14+
_S = TypeVar("_S")
1215
_P = ParamSpec("_P")
1316

1417

15-
# Strictly speaking, this executor doesn't need to inherit from MultiprocessingExecutor
16-
# but could inherit from futures.Executor instead. However, this would require to duplicate
17-
# quite a few methods to adhere to the executor protocol (as_completed, map_to_futures, map, forward_log, shutdown).
18-
class SequentialExecutor(MultiprocessingExecutor):
18+
class SequentialExecutor(Executor):
1919
"""
2020
The same as MultiprocessingExecutor, but synchronous and uses only one core.
2121
"""
2222

2323
def __init__(
2424
self,
25-
*,
26-
start_method: Optional[str] = None,
27-
mp_context: Optional[BaseContext] = None,
28-
initializer: Optional[Callable] = None,
29-
initargs: Tuple = (),
3025
**__kwargs: Any,
3126
) -> None:
32-
super().__init__(
33-
max_workers=1,
34-
start_method=start_method,
35-
mp_context=mp_context,
36-
initializer=initializer,
37-
initargs=initargs,
38-
)
27+
pass
3928

4029
def submit( # type: ignore[override]
4130
self,
@@ -61,3 +50,55 @@ def submit( # type: ignore[override]
6150
fut.set_result(result)
6251
enrich_future_with_uncaught_warning(fut)
6352
return fut
53+
54+
@classmethod
55+
def as_completed(cls, futures: list[Future[_T]]) -> Iterator[Future[_T]]:
56+
return as_completed(futures)
57+
58+
def map_to_futures(
59+
self,
60+
fn: Callable[[_S], _T],
61+
args: Iterable[_S],
62+
output_pickle_path_getter: Optional[Callable[[_S], PathLike]] = None,
63+
) -> list[Future[_T]]:
64+
if output_pickle_path_getter is not None:
65+
futs = [
66+
self.submit( # type: ignore[call-arg]
67+
fn,
68+
arg,
69+
__cfut_options={
70+
"output_pickle_path": output_pickle_path_getter(arg)
71+
},
72+
)
73+
for arg in args
74+
]
75+
else:
76+
futs = [self.submit(fn, arg) for arg in args]
77+
78+
return futs
79+
80+
def map( # type: ignore[override]
81+
self,
82+
fn: Callable[[_S], _T],
83+
iterables: Iterable[_S],
84+
timeout: Optional[float] = None,
85+
chunksize: Optional[int] = None,
86+
) -> Iterator[_T]:
87+
if timeout is not None:
88+
warnings.warn(
89+
"timeout is not implemented for SequentialExecutor.map",
90+
category=UserWarning,
91+
)
92+
if chunksize is not None:
93+
warnings.warn(
94+
"chunksize is not implemented for SequentialExecutor.map",
95+
category=UserWarning,
96+
)
97+
for item in iterables:
98+
yield fn(item)
99+
100+
def forward_log(self, fut: Future[_T]) -> _T:
101+
return fut.result()
102+
103+
def shutdown(self, wait: bool = True, *, cancel_futures: bool = False) -> None:
104+
pass

cluster_tools/tests/test_all.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
import pytest
1111

12+
import cluster_tools.executors
13+
import cluster_tools.schedulers
14+
1215
if TYPE_CHECKING:
1316
from distributed import LocalCluster
1417

@@ -253,6 +256,9 @@ def test_unordered_sleep(exc: cluster_tools.Executor) -> None:
253256

254257
with exc:
255258
durations = [5, 0]
259+
# Slurm can be a bit slow to start up, so we need to increase the sleep time
260+
if isinstance(exc, cluster_tools.SlurmExecutor):
261+
durations = [20, 0]
256262
futures = [exc.submit(sleep, n) for n in durations]
257263
# For synchronous executors, the futures should be completed after submit returns.
258264
# .as_completed() would return them in reverse order in that case.

docs/src/cli/index.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ The WEBKNOSSOS CLI offers many useful commands to work with WEBKNOSSOS datasets:
1111
- `webknossos convert-knossos`: Converts a KNOSSOS dataset to a WEBKNOSSOS dataset
1212
- `webknossos convert-raw`: Converts a RAW image file to a WEBKNOSSOS dataset
1313
- `webknossos convert-zarr`: Converts a Zarr dataset to a WEBKNOSSOS dataset
14-
- `webknossos download`: Download a dataset from a WEBKNOSSOS server as WKW format
14+
- `webknossos download`: Download a dataset from a WEBKNOSSOS server
1515
- `webknossos downsample`: Downsample a WEBKNOSSOS dataset
1616
- `webknossos merge-fallback`: Merge a volume layer of a WEBKNOSSOS dataset with an annotation
1717
- `webknossos upload`: Upload a local WEBKNOSSOS dataset to a remote location
1818
- `webknossos upsample`: Upsample a WEBKNOSSOS dataset
19+
- `webknossos export-as-tiff`: Export a part of a WEBKNOSSOS dataset as a TIFF sequence
1920

2021
## Supported input formats
2122

webknossos/Changelog.md

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,98 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1313
[Commits](https://github.com/scalableminds/webknossos-libs/compare/v0.16.10...HEAD)
1414

1515
### Breaking Changes
16+
- Changed writing behavior. There is a new argument `allow_resize` for `MagView.write`, which defaults to `False`. If set to `True`, the bounding box of the underlying `Layer` will be resized to fit the to-be-written data. That largely mirrors the previous behavior. However, it is not safe for concurrent operations, so it is disabled by default. It is recommended to set the `Layer.bounding_box` to the desired size before writing. Additionally, by default, writes need to be aligned with the underlying shard grid to guard against concurrency issues and avoid performance footguns. There is a new argument `allow_unaligned`, which defaults to `False`. If set to `True`, the check for shard alignment is skipped.
17+
- Removed deprecated functions, properties and arguments:
18+
- Functions:
19+
- `open_annotation`, use `Annotation.load()` instead
20+
- `Dataset.get_color_layer`, use `Dataset.get_color_layers()` instead
21+
- `Dataset.get_segmentation_layer`, use `Dataset.get_segmentation_layers()` instead
22+
- `Dataset.create`, use `Dataset.__init__` instead
23+
- `Dataset.get_or_create`, use `Dataset.__init__` with `exist_ok=True` instead
24+
- `Layer.get_best_mag`, use `Layer.get_finest_mag` instead
25+
- `View.read_bbox`, use `read` with `relative_bounding_box` or `absolute_bounding_box` instead
26+
- `View.__enter__` and `View.__exit__`, context managers are not needed anymore
27+
- `open_nml`, use `Skeleton.load()` instead
28+
- `Group.add_graph`, use `Group.add_tree` instead
29+
- `Group.get_max_graph_id`, use `Group.get_max_tree_id` instead
30+
- `Group.flattened_graphs`, use `Group.flattened_trees` instead
31+
- `Group.get_graph_by_id`, use `Group.get_tree_by_id` instead
32+
- `Skeleton.from_path`, use `Skeleton.load()` instead
33+
- `Skeleton.write`, use `Skeleton.save()` instead
34+
- Properties:
35+
- `Annotation.username`, use `Annotation.owner_name` instead
36+
- `Annotation.scale`, use `Annotation.voxel_size` instead
37+
- `Annotation.user_id`, use `Annotation.owner_id` instead
38+
- `ArrayInfo.shard_size`, use `ArrayInfo.shard_shape` instead
39+
- `Dataset.scale`, use `Dataset.voxel_size` instead
40+
- `MagView.global_offset`, always `(0, 0, 0, ...)`
41+
- `MagView.size`, use `mag_view.bounding_box.in_mag(mag_view.mag).bottomright`
42+
- `MagViewProperties.resolution`, use `MagViewProperties.mag` instead
43+
- `LayerProperties.resolutions`, use `LayerProperties.mags` instead
44+
- `View.header`, use `View.info` instead
45+
- `View.global_offset`, use `view.bounding_box.in_mag(view.mag).topleft` instead
46+
- `View.size`, use `view.bounding_box.in_mag(view.mag).size` instead
47+
- `Group.graphs`, use `Group.trees`
48+
- `Skeleton.scale`, use `Skeleton.voxel_size` instead
49+
- Arguments:
50+
- `annotation_type` in `Annotation.download`, not needed anymore
51+
- `annotation_type` in `Annotation.open_as_remote_dataset`, not needed anymore
52+
- `size` in `BufferedSliceReader.__init__`, use `relative_bounding_box` or `absolute_bounding_box` instead
53+
- `offset` in `BufferedSliceReader.__init__`, use `relative_bounding_box` or `absolute_bounding_box` instead
54+
- `offset` in `BufferedSliceWriter.__init__`, use `relative_bounding_box` or `absolute_bounding_box` instead
55+
- `json_update_allowed` in `BufferedSliceWriter.__init__`, not supported anymore
56+
- `offset` in `BufferedSliceWriter.reset_offset`, use `relative_offset` or `absolute_offset` instead
57+
- `scale` in `Dataset.__init__`, use `voxel_size` or `voxel_size_with_unit` instead
58+
- `dtype` in `Dataset.add_layer`, use `dtype_per_channel` instead
59+
- `dtype` in `Dataset.get_or_add_layer`, use `dtype_per_channel` instead
60+
- `chunk_size` in `Dataset.add_layer_from_images`, use `chunk_shape` instead
61+
- `chunk_size` in `Dataset.copy_dataset`, use `chunk_shape` instead
62+
- `block_len` in `Dataset.copy_dataset`, use `chunk_shape` instead
63+
- `file_len` in `Dataset.copy_dataset`, use `chunks_per_shard` instead
64+
- `args` in `Dataset.copy_dataset`, use `executor` instead
65+
- `chunk_size` in `Layer.add_mag`, use `chunk_shape` instead
66+
- `block_len` in `Layer.add_mag`, use `chunk_shape` instead
67+
- `file_len` in `Layer.add_mag`, use `chunks_per_shard` instead
68+
- `chunk_size` in `Layer.get_or_add_mag`, use `chunk_shape` instead
69+
- `block_len` in `Layer.get_or_add_mag`, use `chunk_shape` instead
70+
- `file_len` in `Layer.get_or_add_mag`, use `chunks_per_shard` instead
71+
- `args` in `Layer.downsample`, use `executor` instead
72+
- `args` in `Layer.downsample_mag`, use `executor` instead
73+
- `args` in `Layer.redownsample`, use `executor` instead
74+
- `args` in `Layer.downsample_mag_list`, use `executor` instead
75+
- `args` in `Layer.downsample_mag_list`, use `executor` instead
76+
- `buffer_edge_len` in `Layer.upsample`, use `buffer_shape` instead
77+
- `args` in `Layer.upsample`, use `executor` instead
78+
- `min_mag` in `Layer.upsample`, use `finest_mag` instead
79+
- `offset` in `MagView.write`, use `relative_offset`, `absolute_offset`, `relative_bounding_box`, or `absolute_bounding_box` instead
80+
- `json_update_allowed` in `MagView.write`, use `allow_resize` instead
81+
- `args` in `MagView.compress`, use `executor` instead
82+
- `offset` in `View.write`, use `relative_offset`, `absolute_offset`, `relative_bounding_box`, or `absolute_bounding_box` instead
83+
- `json_update_allowed` in `View.write`, not supported anymore
84+
- `offset` in `View.read`, use `relative_offset`, `absolute_offset`, `relative_bounding_box`, or `absolute_bounding_box` instead
85+
- `offset` in `View.get_view`, use `relative_offset`, `absolute_offset`, `relative_bounding_box`, or `absolute_bounding_box` instead
86+
- `offset` in `View.get_buffered_slice_writer`, use `relative_offset`, `absolute_offset`, `relative_bounding_box`, or `absolute_bounding_box` instead
87+
- `offset` in `View.get_buffered_slice_reader`, use `relative_bounding_box`, or `absolute_bounding_box` instead
88+
- `size` in `View.get_buffered_slice_reader`, use `relative_bounding_box`, or `absolute_bounding_box` instead
89+
- `chunk_size` in `View.for_each_chunk`, use `chunk_shape` instead
90+
- `source_chunk_size` in `View.for_zipped_chunks`, use `source_chunk_shape` instead
91+
- `target_chunk_size` in `View.for_zipped_chunks`, use `target_chunk_shape` instead
92+
- `args` in `View.content_is_equal`, use `executor` instead
93+
- Classes:
94+
- `Graph`, use `Tree` instead
95+
- Changed defaults:
96+
- `exist_ok` in `Dataset.__init__` is now `False`
97+
- `compress` in `Dataset.from_images` is now `True`
98+
- `compress` in `Dataset.add_layer_from_images` is now `True`
99+
- `DEFAULT_DATA_FORMAT` is now `Zarr3`
100+
- `compress` in `Layer.add_mag` is now `True`
101+
- `compress` in `Layer.upsample` is now `True`
102+
- `buffer_size` in `View.get_buffered_slice_reader` is now computed from the shard shape
103+
- `buffer_size` in `View.get_buffered_slice_writer` is now computed from the shard shape
104+
- Added arguments:
105+
- `allow_resize` in `MagView.write` with default `False`
106+
- `allow_unaligned` in `MagView.write` with default `False`
107+
16108

17109
### Added
18110

webknossos/examples/apply_merger_mode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def main() -> None:
5959
dtype_per_layer=in_layer.dtype_per_layer,
6060
largest_segment_id=in_layer.largest_segment_id,
6161
)
62-
out_mag1 = out_layer.add_mag("1", compress=True)
62+
out_mag1 = out_layer.add_mag("1")
6363
out_layer.bounding_box = in_layer.bounding_box
6464

6565
###################

webknossos/examples/convert_4d_tiff.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,12 @@ def main() -> None:
4343
# data.shape -> (1, 2, 5, 100, 400) # first value is the channel dimension
4444

4545
# Write some data to a given position
46-
mag_view.write(data, absolute_bounding_box=read_bbox.offset((2, 0, 0, 0)))
46+
mag_view.write(
47+
data,
48+
absolute_bounding_box=read_bbox.offset((2, 0, 0, 0)),
49+
allow_resize=True,
50+
allow_unaligned=True,
51+
)
4752

4853

4954
if __name__ == "__main__":

webknossos/examples/dataset_usage.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@ def main() -> None:
2222

2323
dataset = wk.Dataset("testoutput/my_new_dataset", voxel_size=(1, 1, 1))
2424
layer = dataset.add_layer(
25-
layer_name="color", category="color", dtype_per_channel="uint8", num_channels=3
25+
layer_name="color",
26+
category="color",
27+
dtype_per_channel="uint8",
28+
num_channels=3,
29+
bounding_box=wk.BoundingBox((10, 20, 30), (512, 512, 32)),
2630
)
2731
mag1 = layer.add_mag("1")
2832
mag2 = layer.add_mag("2")
@@ -37,11 +41,13 @@ def main() -> None:
3741
absolute_offset=(10, 20, 30),
3842
# assuming the layer has 3 channels:
3943
data=(np.random.rand(3, 512, 512, 32) * 255).astype(np.uint8),
44+
allow_unaligned=True,
4045
)
4146

4247
mag2.write(
4348
absolute_offset=(10, 20, 30),
4449
data=(np.random.rand(3, 256, 256, 16) * 255).astype(np.uint8),
50+
allow_unaligned=True,
4551
)
4652

4753
##########################

0 commit comments

Comments
 (0)