Skip to content

Commit 1426ec1

Browse files
authored
Fix parallel buffered slice writer bug (#922)
* Extend traceback to get exception of future. * Implement pad flag for webknossos cli convert subcommand. * Implement hotfix solution for fitting bbox without pad flag. * Update changelog. * Add update_bbox argument and propagate it from from_images to actual write of view. * Implement requested changes. * Minor changes to default values. * Run formatter. * Add filelock dependency and start to change to SoftFileLock implementation. * Implement filelock (upaths are not supported yet). * Adapted implementation of SoftFileLock. Still does not support filelock for s3 buckets. * Reverted changes with filelock and make some notes for further implementation ideas. * Add json_update_allowed bool. * Add paragraph in docs and implement requested changes.
1 parent de73bac commit 1426ec1

File tree

10 files changed

+45
-13
lines changed

10 files changed

+45
-13
lines changed

docs/src/webknossos-py/examples/dataset_usage.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,13 @@ which themselves can comprise multiple [magnifications represented via `MagView`
1414
webknossos/examples/dataset_usage.py
1515
--8<--
1616
```
17+
18+
## Parallel Access of WEBKNOSSOS Datasets
19+
20+
Please consider these restrictions when accessing a WEBKNOSSOS dataset in a multiprocessing-context:
21+
22+
- When writing shards in parallel, `json_update_allowed` should be set to `False` to disable the automatic update of the bounding box metadata. Otherwise, race conditions may happen. The user is responsible for updating the bounding box manually.
23+
- When writing to chunks in shards, one chunk may only be written to by one actor at any time.
24+
- When writing to compressed shards, one shard may only be written to by one actor at any time.
25+
- For Zarr datasets, parallel write access to shards is not allowed at all.
26+
- Reading in parallel without concurrent writes is fine.

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1919
### Changed
2020

2121
### Fixed
22+
- Fixed a bug where parallel access to the properties json leads to an JsonDecodeError in the webknossos CLI [#919](https://github.com/scalableminds/webknossos-libs/issues/919)
2223

2324

2425
## [0.13.4](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.13.4) - 2023-08-14

webknossos/webknossos/cli/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def main(
103103
source,
104104
target,
105105
voxel_size,
106-
name=name,
106+
name,
107107
data_format=data_format,
108108
executor=executor,
109109
compress=compress,

webknossos/webknossos/cli/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
upsample,
1818
)
1919

20-
app = typer.Typer(no_args_is_help=True)
20+
app = typer.Typer(no_args_is_help=True, pretty_exceptions_short=False)
2121

2222
app.command("check-equality")(check_equality.main)
2323
app.command("compress")(compress.main)

webknossos/webknossos/dataset/_utils/buffered_slice_writer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def __init__(
3434
self,
3535
view: "View",
3636
offset: Optional[Vec3IntLike] = None,
37+
# json_update_allowed enables the update of the bounding box and rewriting of the properties json.
38+
# It should be False when parallel access is intended.
39+
json_update_allowed: bool = True,
3740
# buffer_size specifies, how many slices should be aggregated until they are flushed.
3841
buffer_size: int = 32,
3942
dimension: int = 2, # z
@@ -48,6 +51,7 @@ def __init__(
4851
self.buffer_size = buffer_size
4952
self.dtype = self.view.get_dtype()
5053
self.use_logging = use_logging
54+
self.json_update_allowed = json_update_allowed
5155
if offset is None and relative_offset is None and absolute_offset is None:
5256
relative_offset = Vec3Int.zeros()
5357
if offset is not None:
@@ -129,6 +133,7 @@ def _write_buffer(self) -> None:
129133
offset=buffer_start.add_or_none(self.offset),
130134
relative_offset=buffer_start_mag1.add_or_none(self.relative_offset),
131135
absolute_offset=buffer_start_mag1.add_or_none(self.absolute_offset),
136+
json_update_allowed=self.json_update_allowed,
132137
)
133138

134139
except Exception as exc:

webknossos/webknossos/dataset/_utils/pims_images.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ def copy_to_view(
481481
) -> Tuple[Tuple[int, int], Optional[int]]:
482482
"""Copies the images according to the passed arguments to the given mag_view.
483483
args is expected to be the start and end of the z-range, meant for usage with an executor.
484+
copy_to_view returns an iterable of image shapes and largest segment ids. When using this
485+
method a manual update of the bounding box and the largest segment id might be necessary.
484486
"""
485487
z_start, z_end = args
486488
shapes = []
@@ -496,6 +498,9 @@ def copy_to_view(
496498
with mag_view.get_buffered_slice_writer(
497499
relative_offset=(0, 0, z_start * mag_view.mag.z),
498500
buffer_size=mag_view.info.chunk_shape.z,
501+
# copy_to_view is typically used in a multiprocessing-context. Therefore the
502+
# buffered slice writer should not update the json file to avoid race conditions.
503+
json_update_allowed=False,
499504
) as writer:
500505
for image_slice in images[z_start:z_end]:
501506
image_slice = np.array(image_slice)

webknossos/webknossos/dataset/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1249,7 +1249,7 @@ def add_layer_from_images(
12491249
if pims_images.expected_shape != actual_size:
12501250
warnings.warn(
12511251
"[WARNING] Some images are larger than expected, smaller slices are padded with zeros now. "
1252-
+ f"New size is {actual_size}, expected {pims_images.expected_shape}.",
1252+
+ f"New size is {actual_size}, expected {pims_images.expected_shape}."
12531253
)
12541254
if first_layer is None:
12551255
first_layer = layer

webknossos/webknossos/dataset/mag_view.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ def write(
145145
self,
146146
data: np.ndarray,
147147
offset: Optional[Vec3IntLike] = None, # deprecated, relative, in current mag
148+
json_update_allowed: bool = True,
148149
*,
149150
relative_offset: Optional[Vec3IntLike] = None, # in mag1
150151
absolute_offset: Optional[Vec3IntLike] = None, # in mag1
@@ -177,10 +178,14 @@ def write(
177178

178179
# Only update the layer's bbox if we are actually larger
179180
# than the mag-aligned, rounded up bbox (self.bounding_box):
180-
if not self.bounding_box.contains_bbox(mag1_bbox):
181+
if json_update_allowed and not self.bounding_box.contains_bbox(mag1_bbox):
181182
self.layer.bounding_box = self.layer.bounding_box.extended_by(mag1_bbox)
182183

183-
super().write(data, absolute_offset=mag1_bbox.topleft)
184+
super().write(
185+
data,
186+
absolute_offset=mag1_bbox.topleft,
187+
json_update_allowed=json_update_allowed,
188+
)
184189

185190
def read(
186191
self,

webknossos/webknossos/dataset/view.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ def write(
193193
self,
194194
data: np.ndarray,
195195
offset: Optional[Vec3IntLike] = None, # deprecated, relative, in current mag
196+
json_update_allowed: bool = True,
196197
*,
197198
relative_offset: Optional[Vec3IntLike] = None, # in mag1
198199
absolute_offset: Optional[Vec3IntLike] = None, # in mag1
@@ -264,9 +265,10 @@ def write(
264265
abs_mag1_offset=absolute_offset,
265266
current_mag_size=Vec3Int(data.shape[-3:]),
266267
)
267-
assert self.bounding_box.contains_bbox(
268-
mag1_bbox
269-
), f"The bounding box to write {mag1_bbox} is larger than the view's bounding box {self.bounding_box}"
268+
if json_update_allowed:
269+
assert self.bounding_box.contains_bbox(
270+
mag1_bbox
271+
), f"The bounding box to write {mag1_bbox} is larger than the view's bounding box {self.bounding_box}"
270272

271273
if len(data.shape) == 4 and data.shape[0] == 1:
272274
data = data[0] # remove channel dimension for single-channel data
@@ -654,6 +656,9 @@ def get_buffered_slice_writer(
654656
offset: Optional[Vec3IntLike] = None,
655657
buffer_size: int = 32,
656658
dimension: int = 2, # z
659+
# json_update_allowed enables the update of the bounding box and rewriting of the properties json.
660+
# It should be False when parallel access is intended.
661+
json_update_allowed: bool = True,
657662
*,
658663
relative_offset: Optional[Vec3IntLike] = None, # in mag1
659664
absolute_offset: Optional[Vec3IntLike] = None, # in mag1
@@ -695,6 +700,7 @@ def get_buffered_slice_writer(
695700
return BufferedSliceWriter(
696701
view=self,
697702
offset=offset,
703+
json_update_allowed=json_update_allowed,
698704
buffer_size=buffer_size,
699705
dimension=dimension,
700706
relative_offset=relative_offset,

webknossos/webknossos/utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -309,19 +309,19 @@ def __len__(self) -> int:
309309

310310
class NDArrayLike(Protocol):
311311
def __getitem__(self, selection: Tuple[slice, ...]) -> np.ndarray:
312-
pass
312+
...
313313

314314
def __setitem__(self, selection: Tuple[slice, ...], value: np.ndarray) -> None:
315-
pass
315+
...
316316

317317
@property
318318
def shape(self) -> Tuple[int, ...]:
319-
pass
319+
...
320320

321321
@property
322322
def ndim(self) -> int:
323-
pass
323+
...
324324

325325
@property
326326
def dtype(self) -> np.dtype:
327-
pass
327+
...

0 commit comments

Comments
 (0)