Skip to content

Commit b2d156c

Browse files
authored
make largest_segment_id optional (#925)
* Remove assertions to make largest_segment_id optional. * Adapt tests to accept None for largest_segment_id. * Add update_largest_segment_id method to determine and save id. * Refactor and add test. * Implement requested changes. * Implement requested changes. * Update docstring.
1 parent 7a1acf1 commit b2d156c

File tree

6 files changed

+130
-31
lines changed

6 files changed

+130
-31
lines changed

webknossos/Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1515
### Breaking Changes
1616

1717
### Added
18+
- `View` has a `map_chunk` method now to run a function on each chunk and collect the results in a list.
1819

1920
### Changed
21+
- As WEBKNOSSOS does not require the largest segment id. It is also not mandatory in the WEBKNOSSOS libs anymore. [#917](https://github.com/scalableminds/webknossos-libs/issues/917) The method `SegmentationLayer.refresh_largest_segment_id` was added to lookup the highest value in segmentation data and set `largest_segment_id` accordingly.
22+
- The `convert` command of the cli now has a `--category` flag, to select the LayerCategoryType.
2023

2124
### Fixed
2225

webknossos/tests/dataset/test_dataset.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -787,24 +787,16 @@ def test_open_dataset_without_num_channels_in_properties() -> None:
787787
assure_exported_properties(ds)
788788

789789

790-
def test_largest_segment_id_requirement() -> None:
790+
def test_no_largest_segment_id() -> None:
791791
ds_path = prepare_dataset_path(DataFormat.WKW, TESTOUTPUT_DIR)
792792
ds = Dataset(ds_path, voxel_size=(10, 10, 10))
793793

794-
with pytest.raises(AssertionError):
795-
ds.add_layer("segmentation", SEGMENTATION_CATEGORY)
796-
797-
largest_segment_id = 10
798-
ds.add_layer(
799-
"segmentation",
800-
SEGMENTATION_CATEGORY,
801-
largest_segment_id=largest_segment_id,
802-
).add_mag(Mag(1))
794+
ds.add_layer("segmentation", SEGMENTATION_CATEGORY).add_mag(Mag(1))
803795

804796
ds = Dataset.open(ds_path)
797+
805798
assert (
806-
cast(SegmentationLayer, ds.get_layer("segmentation")).largest_segment_id
807-
== largest_segment_id
799+
cast(SegmentationLayer, ds.get_layer("segmentation")).largest_segment_id is None
808800
)
809801

810802
assure_exported_properties(ds)
@@ -2211,6 +2203,26 @@ def test_get_largest_segment_id() -> None:
22112203
assure_exported_properties(ds)
22122204

22132205

2206+
def test_refresh_largest_segment_id() -> None:
2207+
ds_path = prepare_dataset_path(DataFormat.WKW, TESTOUTPUT_DIR)
2208+
ds = Dataset(ds_path, voxel_size=(1, 1, 1))
2209+
2210+
segmentation_layer = cast(
2211+
SegmentationLayer,
2212+
ds.add_layer("segmentation", SEGMENTATION_CATEGORY),
2213+
)
2214+
mag = segmentation_layer.add_mag(Mag(1))
2215+
2216+
assert segmentation_layer.largest_segment_id is None
2217+
2218+
write_data = (np.random.rand(10, 20, 30) * 255).astype(np.uint8)
2219+
mag.write(data=write_data)
2220+
2221+
segmentation_layer.refresh_largest_segment_id()
2222+
2223+
assert segmentation_layer.largest_segment_id == np.max(write_data, initial=0)
2224+
2225+
22142226
def test_get_or_add_layer_by_type() -> None:
22152227
ds_path = prepare_dataset_path(DataFormat.WKW, TESTOUTPUT_DIR)
22162228
ds = Dataset(ds_path, voxel_size=(1, 1, 1))

webknossos/tests/dataset/test_dataset_deprecated.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -520,25 +520,16 @@ def test_open_dataset_without_num_channels_in_properties() -> None:
520520
assure_exported_properties(ds)
521521

522522

523-
def test_largest_segment_id_requirement() -> None:
523+
def test_no_largest_segment_id() -> None:
524524
path = TESTOUTPUT_DIR / "largest_segment_id"
525525
rmtree(path)
526526
ds = Dataset(path, scale=(10, 10, 10))
527527

528-
with pytest.raises(AssertionError):
529-
ds.add_layer("segmentation", SEGMENTATION_CATEGORY)
530-
531-
largest_segment_id = 10
532-
ds.add_layer(
533-
"segmentation",
534-
SEGMENTATION_CATEGORY,
535-
largest_segment_id=largest_segment_id,
536-
).add_mag(Mag(1))
528+
ds.add_layer("segmentation", SEGMENTATION_CATEGORY).add_mag(Mag(1))
537529

538530
ds = Dataset.open(path)
539531
assert (
540-
cast(SegmentationLayer, ds.get_layer("segmentation")).largest_segment_id
541-
== largest_segment_id
532+
cast(SegmentationLayer, ds.get_layer("segmentation")).largest_segment_id is None
542533
)
543534

544535
assure_exported_properties(ds)

webknossos/webknossos/dataset/dataset.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -769,8 +769,6 @@ def add_layer(
769769
Creates a new layer called `layer_name` and adds it to the dataset.
770770
The dtype can either be specified per layer or per channel.
771771
If neither of them are specified, `uint8` per channel is used as default.
772-
When creating a "Segmentation Layer" (`category="segmentation"`),
773-
the parameter `largest_segment_id` also has to be specified.
774772
775773
Creates the folder `layer_name` in the directory of `self.path`.
776774
@@ -837,10 +835,6 @@ def add_layer(
837835
self._properties.data_layers += [layer_properties]
838836
self._layers[layer_name] = Layer(self, layer_properties)
839837
elif category == SEGMENTATION_CATEGORY:
840-
assert (
841-
"largest_segment_id" in kwargs
842-
), f"Failed to create segmentation layer {layer_name}: the parameter 'largest_segment_id' was not specified, which is necessary for segmentation layers."
843-
844838
segmentation_layer_properties: SegmentationLayerProperties = (
845839
SegmentationLayerProperties(
846840
**(

webknossos/webknossos/dataset/layer.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from upath import UPath
1515

1616
from webknossos.dataset.sampling_modes import SamplingModes
17-
from webknossos.dataset.view import _copy_job
17+
from webknossos.dataset.view import View, _copy_job
1818
from webknossos.geometry import BoundingBox, Mag, Vec3Int, Vec3IntLike
1919

2020
from ._array import ArrayException, BaseArray, DataFormat
@@ -1158,3 +1158,23 @@ def category(self) -> LayerCategoryType:
11581158

11591159
def _get_largest_segment_id_maybe(self) -> Optional[int]:
11601160
return self.largest_segment_id
1161+
1162+
def _get_largest_segment_id(self, view: View) -> int:
1163+
return np.max(view.read(), initial=0)
1164+
1165+
def refresh_largest_segment_id(
1166+
self, chunk_shape: Optional[Vec3Int] = None, executor: Optional[Executor] = None
1167+
) -> None:
1168+
"""Sets the largest segment id to the highest value in the data.
1169+
largest_segment_id is set to `None` if the data is empty."""
1170+
1171+
try:
1172+
chunk_results = self.get_finest_mag().map_chunk(
1173+
self._get_largest_segment_id,
1174+
chunk_shape=chunk_shape,
1175+
executor=executor,
1176+
progress_desc="Searching largest segment id",
1177+
)
1178+
self.largest_segment_id = max(chunk_results)
1179+
except ValueError:
1180+
self.largest_segment_id = None

webknossos/webknossos/dataset/view.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Dict,
1010
Iterable,
1111
Iterator,
12+
List,
1213
Optional,
1314
Tuple,
1415
Type,
@@ -203,6 +204,26 @@ def write(
203204
This parameter used to be relative for `View` and absolute for `MagView`,
204205
and specified in the mag of the respective view.
205206
207+
Writing data to a segmentation layer manually does not automatically update the largest_segment_id. To set
208+
the largest segment id properly run the `refresh_largest_segment_id` method on your layer or set the
209+
`largest_segment_id` property manually..
210+
211+
Example:
212+
213+
```python
214+
ds = Dataset(DS_PATH, voxel_size=(1, 1, 1))
215+
216+
segmentation_layer = cast(
217+
SegmentationLayer,
218+
ds.add_layer("segmentation", SEGMENTATION_CATEGORY),
219+
)
220+
mag = segmentation_layer.add_mag(Mag(1))
221+
222+
mag.write(data=MY_NP_ARRAY)
223+
224+
segmentation_layer.refresh_largest_segment_id()
225+
```
226+
206227
Note that writing compressed data which is not aligned with the blocks on disk may result in
207228
diminished performance, as full blocks will automatically be read to pad the write actions.
208229
"""
@@ -809,6 +830,64 @@ def some_work(args: Tuple[View, int], some_parameter: int) -> None:
809830
executor.map_to_futures(func_per_chunk, job_args), progress_desc
810831
)
811832

833+
def map_chunk(
834+
self,
835+
func_per_chunk: Callable[["View"], Any],
836+
chunk_shape: Optional[Vec3IntLike] = None, # in Mag(1)
837+
executor: Optional[Executor] = None,
838+
progress_desc: Optional[str] = None,
839+
) -> List[Any]:
840+
"""
841+
The view is chunked into multiple sub-views of size `chunk_shape` (in Mag(1)),
842+
by default one chunk per file.
843+
Then, `func_per_chunk` is performed on each sub-view and the results are collected
844+
in a list.
845+
Additional parameters for `func_per_chunk` can be specified using `functools.partial`.
846+
The computation of each chunk has to be independent of each other.
847+
Therefore, the work can be parallelized with `executor`.
848+
849+
If the `View` is of type `MagView` only the bounding box from the properties is chunked.
850+
851+
Example:
852+
```python
853+
from webknossos.utils import named_partial
854+
855+
def some_work(view: View, some_parameter: int) -> None:
856+
# perform operations on the view
857+
...
858+
859+
# ...
860+
# let 'mag1' be a `MagView`
861+
func = named_partial(some_work, some_parameter=42)
862+
results = mag1.map_chunk(
863+
func,
864+
)
865+
```
866+
"""
867+
868+
if chunk_shape is None:
869+
chunk_shape = self._get_file_dimensions_mag1()
870+
else:
871+
chunk_shape = Vec3Int(chunk_shape)
872+
self._check_chunk_shape(chunk_shape, read_only=self.read_only)
873+
874+
job_args = []
875+
for chunk in self.bounding_box.chunk(chunk_shape, chunk_shape):
876+
chunk_view = self.get_view(
877+
absolute_offset=chunk.topleft,
878+
size=chunk.size,
879+
)
880+
job_args.append(chunk_view)
881+
882+
# execute the work for each chunk
883+
with get_executor_for_args(None, executor) as executor:
884+
results = wait_and_ensure_success(
885+
executor.map_to_futures(func_per_chunk, job_args),
886+
progress_desc=progress_desc,
887+
)
888+
889+
return results
890+
812891
def for_zipped_chunks(
813892
self,
814893
func_per_chunk: Callable[[Tuple["View", "View", int]], None],

0 commit comments

Comments
 (0)