Skip to content

Commit 0d318d4

Browse files
authored
Add support for in-memory datasets (e.g. useful for testing) (#1310)
* Add support for in-memory datasets (e.g. useful for testing) * Better error handling on dataset creation * format, write changelog
1 parent ba64a09 commit 0d318d4

File tree

5 files changed

+71
-18
lines changed

5 files changed

+71
-18
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1515
### Breaking Changes
1616

1717
### Added
18+
- Add support for in-memory zarr3 datasets (using the Tensorstore in-memory KvStore driver). [#1310](https://github.com/scalableminds/webknossos-libs/pull/1310)
1819

1920
### Changed
2021
- Added url parsing in webknossos download CLI command, to get webknossos url without environment variable. [#1299](https://github.com/scalableminds/webknossos-libs/pull/1299)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import numpy as np
2+
3+
import webknossos as wk
4+
from tests.utils import TestTemporaryDirectoryNonLocal
5+
6+
7+
def test_create_dataset_remote_storage() -> None:
8+
"""Test creating a dataset with remote storage."""
9+
# Create a temporary directory for the dataset
10+
# with tempfile.TemporaryDirectory() as temp_dir:
11+
with TestTemporaryDirectoryNonLocal() as temp_dir:
12+
dataset = wk.Dataset(temp_dir / "ds", voxel_size=(10, 10, 10), exist_ok=True)
13+
layer = dataset.add_layer(
14+
"color",
15+
wk.COLOR_CATEGORY,
16+
data_format="zarr3",
17+
bounding_box=wk.BoundingBox((0, 0, 0), (16, 16, 16)),
18+
)
19+
mag1 = layer.add_mag(1)
20+
mag1.write(np.ones((16, 16, 16), dtype="uint8"))
21+
ds = wk.Dataset.open(temp_dir / "ds")
22+
read_data = ds.get_layer("color").get_mag(1).read()
23+
assert read_data.shape == (1, 16, 16, 16)
24+
assert read_data.dtype == np.uint8
25+
assert np.all(read_data == 1)

webknossos/tests/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import uuid
2+
from collections.abc import Generator
3+
from contextlib import contextmanager
4+
5+
from upath import UPath
6+
7+
8+
@contextmanager
9+
def TestTemporaryDirectoryNonLocal() -> Generator[UPath, None, None]:
10+
"""Gives a temporary directory as UPath which does not use the "local" protocol (local file system).
11+
Useful for testing functionality that uses non-local UPaths.
12+
Currently implemented to use an in-memory file system. (no persistence across lifetime of the process)."""
13+
random_prefix = str(uuid.uuid4())
14+
temp_dir = UPath(f"memory:///{random_prefix}")
15+
temp_dir.mkdir(parents=True, exist_ok=True)
16+
yield temp_dir

webknossos/webknossos/dataset/_array.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from ..utils import is_fs_path
2323
from .data_format import DataFormat
2424

25+
TS_CONTEXT = tensorstore.Context()
26+
2527

2628
def _is_power_of_two(num: int) -> bool:
2729
return num & (num - 1) == 0
@@ -442,6 +444,14 @@ def _make_kvstore(path: Path) -> str | dict[str, str | list[str]]:
442444
else:
443445
kvstore_spec["aws_credentials"] = {"type": "default"}
444446
return kvstore_spec
447+
elif isinstance(path, UPath) and path.protocol == "memory":
448+
# use memory driver (in-memory file systems), e.g. useful for testing
449+
# attention: this is not a persistent storage and it does not support
450+
# multiprocessing since memory is not shared between processes
451+
return {
452+
"driver": "memory",
453+
"path": path.path,
454+
}
445455
else:
446456
return {
447457
"driver": "file",
@@ -470,6 +480,7 @@ def _open(cls, path: Path) -> Self:
470480
},
471481
open=True,
472482
create=False,
483+
context=TS_CONTEXT,
473484
).result() # check that everything exists
474485
return cls(path, _array)
475486
except Exception as exc:
@@ -532,7 +543,8 @@ def resize(self, new_bbox: NDBoundingBox) -> None:
532543
{
533544
"driver": str(self.data_format),
534545
"kvstore": self._make_kvstore(self._path),
535-
}
546+
},
547+
context=TS_CONTEXT,
536548
).result()
537549
if array.domain != current_array.domain:
538550
raise RuntimeError(
@@ -632,7 +644,8 @@ def _array(self) -> tensorstore.TensorStore:
632644
{
633645
"driver": str(self.data_format),
634646
"kvstore": self._make_kvstore(self._path),
635-
}
647+
},
648+
context=TS_CONTEXT,
636649
).result()
637650
except Exception as e:
638651
raise ArrayException(
@@ -780,7 +793,8 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "Zarr3Array":
780793
],
781794
},
782795
"create": True,
783-
}
796+
},
797+
context=TS_CONTEXT,
784798
).result()
785799
return cls(path, _array)
786800

@@ -856,7 +870,8 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "Zarr2Array":
856870
"dimension_separator": "/",
857871
},
858872
"create": True,
859-
}
873+
},
874+
context=TS_CONTEXT,
860875
).result()
861876
return cls(path, _array)
862877

webknossos/webknossos/dataset/dataset.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -352,20 +352,16 @@ def __init__(
352352
else:
353353
assert not read_only
354354

355-
dataset_path_exists = False
356-
dataset_path_is_empty = False
357-
try:
358-
dataset_path_is_empty = next(self.path.iterdir(), None) is None
359-
dataset_path_exists = True
360-
except NotADirectoryError:
361-
dataset_path_exists = True
362-
except FileNotFoundError:
363-
dataset_path_exists = False
364-
365-
if dataset_path_exists and not dataset_path_is_empty:
366-
raise RuntimeError(
367-
f"Creation of Dataset at {self.path} failed, because a file or folder already exists at this path."
368-
)
355+
if self.path.exists():
356+
if self.path.is_dir():
357+
if next(self.path.iterdir(), None) is not None:
358+
raise RuntimeError(
359+
f"Creation of Dataset at {self.path} failed, because a non-empty folder already exists at this path."
360+
)
361+
else:
362+
raise NotADirectoryError(
363+
f"Creation of Dataset at {self.path} failed, because the given path already exists but is not a directory."
364+
)
369365
# Create directories on disk and write datasource-properties.json
370366
try:
371367
self.path.mkdir(parents=True, exist_ok=True)

0 commit comments

Comments
 (0)