Skip to content

Commit d79488c

Browse files
MichaelBuessemeyerMichael Büßemeyerdaniel-wermarkbader
authored
Add option to add remote mags and remote layer to an existing dataset (#1187)
* WIP: Add method to add remote mag * Add first version working to add remote layers and mags to an existing dataset * WIP: make remote path support more consistent & fix linting and so on * format & fix linting * fruther improve add remote mag / layer * add tests for add remote mag / layer * rename is_remote to is_remote_mag for more consistency * maybe fix tests * remove debug print * fix some tests & use proper strip_trailing_slash on upaths and no custom implementation * fix formatting * dont write ome metadata for remote layers * rename remote to foreign in parts of this pr * fix ci and remove prints & format code * format * refactor code * apply feedback * change back to remote naming scheme of newly added methods * fix layer.path property method * remove unused `is_remote_dataset` property from dataset * remove commented out tests from test_dataset_add_remote_mag_and_layer.py which behave inconsistent * define is_remote_path as opposite of is_fs_path * disable flaky test * re-add flaky add remote layer/mag tests * Update webknossos/tests/dataset/test_dataset_add_remote_mag_and_layer.py Co-authored-by: Mark Bader <[email protected]> * Update webknossos/tests/dataset/test_dataset_add_remote_mag_and_layer.py Co-authored-by: Mark Bader <[email protected]> --------- Co-authored-by: Michael Büßemeyer <[email protected]> Co-authored-by: Michael Büßemeyer <[email protected]> Co-authored-by: Daniel <[email protected]> Co-authored-by: Mark Bader <[email protected]>
1 parent 832cfc6 commit d79488c

File tree

9 files changed

+5341
-18
lines changed

9 files changed

+5341
-18
lines changed

webknossos/tests/dataset/cassettes/test_dataset_add_remote_mag_and_layer/test_add_remote_layer_from_object.yaml

Lines changed: 2858 additions & 0 deletions
Large diffs are not rendered by default.

webknossos/tests/dataset/cassettes/test_dataset_add_remote_mag_and_layer/test_add_remote_mags_from_mag_view.yaml

Lines changed: 2144 additions & 0 deletions
Large diffs are not rendered by default.

webknossos/tests/dataset/test_dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@ def test_create_dataset_with_layer_and_mag(
227227
assure_exported_properties(ds)
228228

229229

230-
@pytest.mark.skip("This test fails currently, maybe due to the issue with vcr-py.")
230+
@pytest.mark.skip(
231+
reason="The test is flaky as sometimes fetching the file https://ngff.openmicroscopy.org/0.4/schemas/image.schema does fail. Disable it for now."
232+
)
231233
@pytest.mark.parametrize("output_path", [TESTOUTPUT_DIR, REMOTE_TESTOUTPUT_DIR])
232234
def test_ome_ngff_metadata(output_path: Path) -> None:
233235
ds_path = prepare_dataset_path(DataFormat.Zarr, output_path)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from pathlib import Path
2+
from tempfile import TemporaryDirectory
3+
from typing import Iterator
4+
5+
import pytest
6+
from upath import UPath
7+
8+
import webknossos as wk
9+
from webknossos import Dataset, MagView
10+
from webknossos.utils import is_remote_path
11+
12+
pytestmark = [pytest.mark.with_vcr]
13+
14+
15+
@pytest.fixture(scope="module")
16+
def sample_bbox() -> wk.BoundingBox:
17+
return wk.BoundingBox((2807, 4352, 1794), (10, 10, 10))
18+
19+
20+
@pytest.fixture(scope="module")
21+
def sample_remote_dataset(sample_bbox: wk.BoundingBox) -> Iterator[wk.Dataset]:
22+
url = "https://webknossos.org/datasets/scalable_minds/l4_sample_dev"
23+
with TemporaryDirectory() as temp_dir:
24+
yield wk.Dataset.download(url, path=Path(temp_dir) / "ds", bbox=sample_bbox)
25+
26+
27+
@pytest.fixture(scope="module")
28+
def sample_remote_mags() -> list[wk.MagView]:
29+
mag_urls = [
30+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/1/",
31+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/2-2-1/",
32+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/4-4-2/",
33+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/1/",
34+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/2-2-1/",
35+
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/4-4-2/",
36+
]
37+
mags = [MagView._ensure_mag_view(url) for url in mag_urls]
38+
return mags
39+
40+
41+
@pytest.fixture(scope="module")
42+
def sample_remote_layer() -> list[wk.Layer]:
43+
remote_dataset_url = "https://webknossos.org/datasets/scalable_minds/l4_sample_dev"
44+
remote_dataset = Dataset.open_remote(remote_dataset_url)
45+
return list(remote_dataset.layers.values())
46+
47+
48+
def test_add_remote_mags_from_mag_view(
49+
sample_remote_mags: list[wk.MagView], sample_remote_dataset: wk.Dataset
50+
) -> None:
51+
for remote_mag in sample_remote_mags:
52+
mag_path = remote_mag.path
53+
layer_type = remote_mag.layer.category
54+
assert is_remote_path(mag_path), "Remote mag does not have remote path."
55+
layer_name = f"test_remote_layer_{mag_path.parent.name}_{mag_path.name}_object"
56+
new_layer = sample_remote_dataset.add_layer(
57+
layer_name,
58+
layer_type,
59+
data_format=remote_mag.info.data_format,
60+
dtype_per_channel=remote_mag.get_dtype(),
61+
)
62+
new_layer.add_remote_mag(remote_mag)
63+
added_mag = sample_remote_dataset.layers[layer_name].mags[remote_mag.mag]
64+
# checking whether the added_mag.path matches the mag_url with or without a trailing slash.
65+
assert (
66+
added_mag.path == mag_path or added_mag.path == mag_path.parent
67+
), "Added remote mag's path does not match remote path of mag added."
68+
69+
70+
@pytest.mark.skip(
71+
reason="The test is flaky when trying to fetch the required datasource-properties.json from data-humerus.webknossos.org. Disable it for now."
72+
)
73+
def test_add_remote_mags_from_path(
74+
sample_remote_mags: list[wk.MagView],
75+
sample_remote_dataset: wk.Dataset,
76+
) -> None:
77+
for remote_mag in sample_remote_mags:
78+
mag_path = remote_mag.path
79+
layer_type = remote_mag.layer.category
80+
assert is_remote_path(mag_path), "Remote mag does not have remote path."
81+
# Additional .parent calls are needed as the first .parent only removes the trailing slash.
82+
layer_name = f"test_remote_layer_{mag_path.parent.name}_{mag_path.name}_path"
83+
new_layer = sample_remote_dataset.add_layer(
84+
layer_name,
85+
layer_type,
86+
data_format=remote_mag.info.data_format,
87+
dtype_per_channel=remote_mag.get_dtype(),
88+
)
89+
new_layer.add_remote_mag(str(remote_mag.path))
90+
added_mag = sample_remote_dataset.layers[layer_name].mags[remote_mag.mag]
91+
# checking whether the added_mag.path matches the mag_url with or without a trailing slash.
92+
assert (
93+
added_mag.path == mag_path or added_mag.path == mag_path.parent
94+
), "Added remote mag's path does not match remote path of mag added."
95+
96+
97+
def test_add_remote_layer_from_object(
98+
sample_remote_layer: list[wk.Layer], sample_remote_dataset: wk.Dataset
99+
) -> None:
100+
for layer in sample_remote_layer:
101+
assert is_remote_path(layer.path), "Remote mag does not have remote path."
102+
layer_name = f"test_remote_layer_{layer.category}_object"
103+
sample_remote_dataset.add_remote_layer(layer, layer_name)
104+
new_layer = sample_remote_dataset.layers[layer_name]
105+
assert (
106+
is_remote_path(new_layer.path)
107+
and layer.path.as_uri() == new_layer.path.as_uri()
108+
), "Added layer should have a remote path matching the remote layer added."
109+
110+
111+
@pytest.mark.skip(
112+
reason="The test is flaky when trying to fetch the required datasource-properties.json from data-humerus.webknossos.org. Disable it for now."
113+
)
114+
def test_add_remote_layer_from_path(
115+
sample_remote_layer: list[wk.Layer],
116+
sample_remote_dataset: wk.Dataset,
117+
) -> None:
118+
for layer in sample_remote_layer:
119+
assert is_remote_path(layer.path), "Remote mag does not have remote path."
120+
layer_name = f"test_remote_layer_{layer.category}_path"
121+
sample_remote_dataset.add_remote_layer(UPath(layer.path), layer_name)
122+
new_layer = sample_remote_dataset.layers[layer_name]
123+
assert (
124+
is_remote_path(new_layer.path) and new_layer.path == layer.path
125+
), "Added layer should have a remote path matching the remote layer added."

webknossos/webknossos/dataset/dataset.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
get_executor_for_args,
7777
infer_metadata_type,
7878
is_fs_path,
79+
is_remote_path,
7980
named_partial,
8081
rmtree,
8182
strip_trailing_slash,
@@ -352,6 +353,15 @@ def __init__(
352353

353354
layer = self._initialize_layer_from_properties(layer_properties)
354355
self._layers[layer_properties.name] = layer
356+
if layer.is_remote_to_dataset:
357+
# The mags of remote layers need to have their path properly set.
358+
for mag in layer.mags:
359+
mag_prop = next(
360+
mag_prop
361+
for mag_prop in layer_properties.mags
362+
if mag_prop.mag == mag
363+
)
364+
mag_prop.path = str(layer.mags[mag].path)
355365

356366
if dataset_existed_already:
357367
if voxel_size_with_unit is None:
@@ -1690,6 +1700,49 @@ def add_symlink_layer(
16901700
self._export_as_json()
16911701
return self.layers[new_layer_name]
16921702

1703+
def add_remote_layer(
1704+
self,
1705+
foreign_layer: Union[str, UPath, Layer],
1706+
new_layer_name: Optional[str] = None,
1707+
) -> Layer:
1708+
"""
1709+
Adds a layer of another dataset to this dataset.
1710+
The relevant information from the `datasource-properties.json` of the other dataset is copied to this dataset.
1711+
Note: If the other dataset modifies its bounding box afterwards, the change does not affect this properties
1712+
(or vice versa).
1713+
If new_layer_name is None, the name of the foreign layer is used.
1714+
"""
1715+
self._ensure_writable()
1716+
foreign_layer = Layer._ensure_layer(foreign_layer)
1717+
1718+
if new_layer_name is None:
1719+
new_layer_name = foreign_layer.name
1720+
1721+
if new_layer_name in self.layers.keys():
1722+
raise IndexError(
1723+
f"Cannot add foreign layer {foreign_layer}. This dataset already has a layer called {new_layer_name}."
1724+
)
1725+
assert (
1726+
foreign_layer.dataset.path != self.path
1727+
), "Cannot add layer with the same origin dataset as foreign layer"
1728+
foreign_layer_path = foreign_layer.path
1729+
1730+
assert is_remote_path(
1731+
foreign_layer_path
1732+
), f"Cannot add foreign layer {foreign_layer_path} as it is not remote. Try using dataset.add_copy_layer instead."
1733+
1734+
layer_properties = copy.deepcopy(foreign_layer._properties)
1735+
for mag in layer_properties.mags:
1736+
mag.path = str(foreign_layer.mags[mag.mag].path)
1737+
layer_properties.name = new_layer_name
1738+
self._properties.data_layers += [layer_properties]
1739+
self._layers[new_layer_name] = self._initialize_layer_from_properties(
1740+
layer_properties
1741+
)
1742+
1743+
self._export_as_json()
1744+
return self.layers[new_layer_name]
1745+
16931746
def add_fs_copy_layer(
16941747
self,
16951748
foreign_layer: Union[str, Path, Layer],

0 commit comments

Comments
 (0)