Skip to content

Commit b46a2f2

Browse files
Add shallow_copy_dataset method (#437)
* add shallow_copy_dataset and test * add test for make_relative and fix make_relative bugs * fix typing * correct comments * fix lint * add_layer/mag_for_existing_files * use add_mag_for_existing_files * implement feedback * correct comments * add changelog Co-authored-by: Jonathan Striebel <[email protected]>
1 parent 568b5c8 commit b46a2f2

File tree

6 files changed

+208
-47
lines changed

6 files changed

+208
-47
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1111

1212
### Breaking Changes
1313
### Added
14+
- The Dataset class now has a new method: add_shallow_copy. [#437](https://github.com/scalableminds/webknossos-libs/pull/437)
1415
### Changed
1516
- The `Vec3Int` constructor now asserts that its components are whole numbers also in numpy case. [#434](https://github.com/scalableminds/webknossos-libs/pull/434)
1617
- Updated scikit-image dependency to 0.18.3. [#435](https://github.com/scalableminds/webknossos-libs/pull/435)

webknossos/tests/test_dataset.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,9 +1232,10 @@ def test_add_symlink_mag(tmp_path: Path) -> None:
12321232
original_layer.add_mag(1).write(
12331233
data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)
12341234
)
1235-
original_layer.add_mag(2).write(
1236-
data=(np.random.rand(5, 10, 15) * 255).astype(np.uint8)
1237-
)
1235+
original_mag_2 = original_layer.add_mag(2)
1236+
original_mag_2.write(data=(np.random.rand(5, 10, 15) * 255).astype(np.uint8))
1237+
original_mag_4 = original_layer.add_mag(4)
1238+
original_mag_4.write(data=(np.random.rand(2, 5, 7) * 255).astype(np.uint8))
12381239

12391240
ds = Dataset.create(tmp_path / "link", scale=(1, 1, 1))
12401241
layer = ds.add_layer("color", LayerCategories.COLOR_TYPE, dtype_per_channel="uint8")
@@ -1245,10 +1246,11 @@ def test_add_symlink_mag(tmp_path: Path) -> None:
12451246
assert tuple(layer.bounding_box.topleft) == (6, 6, 6)
12461247
assert tuple(layer.bounding_box.size) == (10, 20, 30)
12471248

1248-
symlink_mag = layer.add_symlink_mag(tmp_path / "original" / "color" / "2")
1249+
symlink_mag_2 = layer.add_symlink_mag(original_mag_2)
1250+
symlink_mag_4 = layer.add_symlink_mag(original_mag_4.path)
12491251

12501252
assert (tmp_path / "link" / "color" / "1").exists()
1251-
assert len(layer._properties.wkw_resolutions) == 2
1253+
assert len(layer._properties.wkw_resolutions) == 3
12521254

12531255
assert tuple(layer.bounding_box.topleft) == (0, 0, 0)
12541256
assert tuple(layer.bounding_box.size) == (16, 26, 36)
@@ -1257,9 +1259,9 @@ def test_add_symlink_mag(tmp_path: Path) -> None:
12571259
# Note: The written data is fully inside the bounding box of the original data.
12581260
# This is important because the bounding box of the foreign layer would not be updated if we use the linked dataset to write outside of its original bounds.
12591261
write_data = (np.random.rand(5, 5, 5) * 255).astype(np.uint8)
1260-
symlink_mag.write(offset=(0, 0, 0), data=write_data)
1262+
symlink_mag_2.write(offset=(0, 0, 0), data=write_data)
12611263

1262-
assert np.array_equal(symlink_mag.read(size=(5, 5, 5))[0], write_data)
1264+
assert np.array_equal(symlink_mag_2.read(size=(5, 5, 5))[0], write_data)
12631265
assert np.array_equal(original_layer.get_mag(2).read(size=(5, 5, 5))[0], write_data)
12641266

12651267
assure_exported_properties(ds)
@@ -1275,7 +1277,8 @@ def test_add_copy_mag(tmp_path: Path) -> None:
12751277
data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)
12761278
)
12771279
original_data = (np.random.rand(5, 10, 15) * 255).astype(np.uint8)
1278-
original_layer.add_mag(2).write(data=original_data)
1280+
original_mag_2 = original_layer.add_mag(2)
1281+
original_mag_2.write(data=original_data)
12791282

12801283
ds = Dataset.create(tmp_path / "link", scale=(1, 1, 1))
12811284
layer = ds.add_layer("color", LayerCategories.COLOR_TYPE, dtype_per_channel="uint8")
@@ -1286,7 +1289,7 @@ def test_add_copy_mag(tmp_path: Path) -> None:
12861289
assert tuple(layer.bounding_box.topleft) == (6, 6, 6)
12871290
assert tuple(layer.bounding_box.size) == (10, 20, 30)
12881291

1289-
copy_mag = layer.add_copy_mag(tmp_path / "original" / "color" / "2")
1292+
copy_mag = layer.add_copy_mag(original_mag_2)
12901293

12911294
assert (tmp_path / "link" / "color" / "1").exists()
12921295
assert len(layer._properties.wkw_resolutions) == 2
@@ -1359,6 +1362,46 @@ def test_outdated_dtype_parameter() -> None:
13591362
)
13601363

13611364

1365+
@pytest.mark.parametrize("make_relative", [True, False])
1366+
def test_dataset_shallow_copy(make_relative: bool) -> None:
1367+
delete_dir(TESTOUTPUT_DIR / "original_dataset")
1368+
delete_dir(TESTOUTPUT_DIR / "copy_dataset")
1369+
ds = Dataset.create(TESTOUTPUT_DIR / "original_dataset", (1, 1, 1))
1370+
original_layer_1 = ds.add_layer(
1371+
"color", LayerCategories.COLOR_TYPE, dtype_per_layer=np.uint8, num_channels=1
1372+
)
1373+
original_layer_1.add_mag(1)
1374+
original_layer_1.add_mag("2-2-1")
1375+
original_layer_2 = ds.add_layer(
1376+
"segmentation",
1377+
LayerCategories.SEGMENTATION_TYPE,
1378+
dtype_per_layer=np.uint32,
1379+
largest_segment_id=0,
1380+
)
1381+
original_layer_2.add_mag(4)
1382+
mappings_path = original_layer_2.path / "mappings"
1383+
os.makedirs(mappings_path)
1384+
open(mappings_path / "agglomerate_view.hdf5", "w").close()
1385+
1386+
shallow_copy_of_ds = ds.shallow_copy_dataset(
1387+
TESTOUTPUT_DIR / "copy_dataset", make_relative=make_relative
1388+
)
1389+
shallow_copy_of_ds.get_layer("color").add_mag(Mag("4-4-1"))
1390+
assert (
1391+
len(Dataset(TESTOUTPUT_DIR / "original_dataset").get_layer("color").mags) == 2
1392+
), "Adding a new mag should not affect the original dataset"
1393+
assert (
1394+
len(Dataset(TESTOUTPUT_DIR / "copy_dataset").get_layer("color").mags) == 3
1395+
), "Expecting all mags from original dataset and new downsampled mag"
1396+
assert os.path.exists(
1397+
TESTOUTPUT_DIR
1398+
/ "copy_dataset"
1399+
/ "segmentation"
1400+
/ "mappings"
1401+
/ "agglomerate_view.hdf5"
1402+
), "Expecting mappings to exist in shallow copy"
1403+
1404+
13621405
def test_dataset_conversion() -> None:
13631406
origin_ds_path = TESTOUTPUT_DIR / "conversion" / "origin_wk"
13641407
converted_ds_path = TESTOUTPUT_DIR / "conversion" / "converted_wk"

webknossos/webknossos/dataset/dataset.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import wkw
1515

1616
from webknossos.geometry import BoundingBox, Vec3Int
17-
from webknossos.utils import get_executor_for_args
17+
from webknossos.utils import copy_directory_with_symlinks, get_executor_for_args
1818

1919
from .layer import (
2020
Layer,
@@ -348,6 +348,27 @@ def add_layer_like(self, other_layer: Layer, layer_name: str) -> Layer:
348348
self._export_as_json()
349349
return self._layers[layer_name]
350350

351+
def add_layer_for_existing_files(
352+
self, layer_name: str, category: str, **kwargs: Any
353+
) -> Layer:
354+
assert layer_name not in self.layers, f"Layer {layer_name} already exists!"
355+
mag_headers = list((self.path / layer_name).glob("*/header.wkw"))
356+
assert (
357+
len(mag_headers) != 0
358+
), f"Could not find any header.wkw files in {self.path / layer_name}, cannot add layer."
359+
with wkw.Dataset.open(str(mag_headers[0].parent)) as wkw_dataset:
360+
header = wkw_dataset.header
361+
layer = self.add_layer(
362+
layer_name,
363+
category=category,
364+
num_channels=header.num_channels,
365+
dtype_per_channel=header.voxel_type,
366+
**kwargs,
367+
)
368+
for mag_dir in layer.path.iterdir():
369+
layer.add_mag_for_existing_files(mag_dir.name)
370+
return layer
371+
351372
def get_segmentation_layer(self) -> SegmentationLayer:
352373
"""
353374
Returns the only segmentation layer.
@@ -535,6 +556,36 @@ def copy_dataset(
535556
new_ds._export_as_json()
536557
return new_ds
537558

559+
def shallow_copy_dataset(
560+
self,
561+
new_dataset_path: Path,
562+
name: Optional[str] = None,
563+
make_relative: bool = False,
564+
) -> "Dataset":
565+
"""
566+
Create a new dataset at the given path. Link all mags of all existing layers.
567+
In addition, link all other directories in all layer directories
568+
to make this method robust against additional files e.g. layer/mappings/agglomerate_view.hdf5.
569+
This method becomes useful when exposing a dataset to webknossos.
570+
"""
571+
new_dataset = Dataset.create(
572+
new_dataset_path, scale=self.scale, name=name or self.name
573+
)
574+
for layer_name, layer in self.layers.items():
575+
new_layer = new_dataset.add_layer_like(layer, layer_name)
576+
for mag_view in layer.mags.values():
577+
new_layer.add_symlink_mag(mag_view, make_relative)
578+
579+
# copy all other directories with a dir scan
580+
copy_directory_with_symlinks(
581+
layer.path,
582+
new_layer.path,
583+
ignore=[str(mag) for mag in layer.mags] + [PROPERTIES_FILE_NAME],
584+
make_relative=make_relative,
585+
)
586+
587+
return new_dataset
588+
538589
def _get_layer_by_category(self, category: str) -> Layer:
539590
assert (
540591
category == LayerCategories.COLOR_TYPE

webknossos/webknossos/dataset/layer.py

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import copy
21
import logging
32
import math
43
import operator
@@ -310,6 +309,29 @@ def add_mag(
310309

311310
return self._mags[mag]
312311

312+
def add_mag_for_existing_files(
313+
self,
314+
mag: Union[int, str, list, tuple, np.ndarray, Mag],
315+
) -> MagView:
316+
"""
317+
Creates a new mag based on already existing files.
318+
319+
Raises an IndexError if the specified `mag` does not exists.
320+
"""
321+
mag = Mag(mag)
322+
assert (
323+
mag not in self.mags
324+
), f"Cannot add mag {mag} as it already exists for layer {self}"
325+
self._setup_mag(mag)
326+
mag_view = self._mags[mag]
327+
cube_length = mag_view.header.file_len * mag_view.header.block_len
328+
self._properties.wkw_resolutions.append(
329+
MagViewProperties(resolution=mag, cube_length=cube_length)
330+
)
331+
self.dataset._export_as_json()
332+
333+
return mag_view
334+
313335
def get_or_add_mag(
314336
self,
315337
mag: Union[int, str, list, tuple, np.ndarray, Mag],
@@ -372,71 +394,93 @@ def delete_mag(self, mag: Union[int, str, list, tuple, np.ndarray, Mag]) -> None
372394
rmtree(full_path)
373395

374396
def _add_foreign_mag(
375-
self, foreign_mag_path: Path, symlink: bool, make_relative: bool
397+
self,
398+
foreign_mag_view_or_path: Union[os.PathLike, str, MagView],
399+
symlink: bool,
400+
make_relative: bool,
401+
extend_layer_bounding_box: bool = True,
376402
) -> MagView:
377-
mag_name = foreign_mag_path.name
378-
mag = Mag(mag_name)
379-
operation = "symlink" if symlink else "copy"
380-
if mag in self.mags.keys():
381-
raise IndexError(
382-
f"Cannot {operation} {foreign_mag_path}. This dataset already has a mag called {mag_name}."
403+
"""
404+
The foreign mag is (shallow) copied and the existing mag is added to the datasource-properties.json.
405+
If extend_layer_bounding_box is true, the self.bounding_box will be extended
406+
by the bounding box of the layer the foreign mag belongs to.
407+
"""
408+
409+
if isinstance(foreign_mag_view_or_path, MagView):
410+
foreign_mag_view = foreign_mag_view_or_path
411+
else:
412+
# local import to prevent circular dependency
413+
from .dataset import Dataset
414+
415+
foreign_mag_view_path = Path(foreign_mag_view_or_path)
416+
foreign_mag_view = (
417+
Dataset(foreign_mag_view_path.parent.parent)
418+
.get_layer(foreign_mag_view_path.parent.name)
419+
.get_mag(foreign_mag_view_path.name)
383420
)
384421

422+
self._assert_mag_does_not_exist_yet(foreign_mag_view.mag)
423+
385424
foreign_normalized_mag_path = (
386-
Path(os.path.relpath(foreign_mag_path, self.dataset.path))
425+
Path(os.path.relpath(foreign_mag_view.path, self.path))
387426
if make_relative
388-
else foreign_mag_path
427+
else Path(os.path.abspath(foreign_mag_view.path))
389428
)
390429

391430
if symlink:
392431
os.symlink(
393432
foreign_normalized_mag_path,
394-
join(self.dataset.path, self.name, mag_name),
433+
join(self.dataset.path, self.name, str(foreign_mag_view.mag)),
395434
)
396435
else:
397436
shutil.copytree(
398437
foreign_normalized_mag_path,
399-
join(self.dataset.path, self.name, mag_name),
438+
join(self.dataset.path, self.name, str(foreign_mag_view.mag)),
400439
)
401440

402-
# copy the properties of the layer into the properties of this dataset
403-
from .dataset import Dataset # local import to prevent circular dependency
404-
405-
original_layer = Dataset(foreign_mag_path.parent.parent).get_layer(
406-
foreign_mag_path.parent.name
407-
)
408-
original_mag = original_layer.get_mag(foreign_mag_path.name)
409-
mag_properties = copy.deepcopy(original_mag._properties)
410-
411-
self.bounding_box = self.bounding_box.extended_by(original_layer.bounding_box)
412-
self._properties.wkw_resolutions += [mag_properties]
413-
self._setup_mag(mag)
441+
self.add_mag_for_existing_files(foreign_mag_view.mag)
442+
if extend_layer_bounding_box:
443+
self.bounding_box = self.bounding_box.extended_by(
444+
foreign_mag_view.layer.bounding_box
445+
)
414446
self.dataset._export_as_json()
415-
return self.mags[mag]
447+
448+
return self._mags[foreign_mag_view.mag]
416449

417450
def add_symlink_mag(
418-
self, foreign_mag_path: Union[str, Path], make_relative: bool = False
451+
self,
452+
foreign_mag_view_or_path: Union[os.PathLike, str, MagView],
453+
make_relative: bool = False,
454+
extend_layer_bounding_box: bool = True,
419455
) -> MagView:
420456
"""
421-
Creates a symlink to the data at `foreign_mag_path` which belongs to another dataset.
457+
Creates a symlink to the data at `foreign_mag_view_or_path` which belongs to another dataset.
422458
The relevant information from the `datasource-properties.json` of the other dataset is copied to this dataset.
423459
Note: If the other dataset modifies its bounding box afterwards, the change does not affect this properties
424460
(or vice versa).
425461
If make_relative is True, the symlink is made relative to the current dataset path.
426462
"""
427-
foreign_mag_path = Path(os.path.abspath(foreign_mag_path))
428463
return self._add_foreign_mag(
429-
foreign_mag_path, symlink=True, make_relative=make_relative
464+
foreign_mag_view_or_path,
465+
symlink=True,
466+
make_relative=make_relative,
467+
extend_layer_bounding_box=extend_layer_bounding_box,
430468
)
431469

432-
def add_copy_mag(self, foreign_mag_path: Union[str, Path]) -> MagView:
470+
def add_copy_mag(
471+
self,
472+
foreign_mag_view_or_path: Union[os.PathLike, str, MagView],
473+
extend_layer_bounding_box: bool = True,
474+
) -> MagView:
433475
"""
434-
Copies the data at `foreign_mag_path` which belongs to another dataset to the current dataset.
476+
Copies the data at `foreign_mag_view_or_path` which belongs to another dataset to the current dataset.
435477
Additionally, the relevant information from the `datasource-properties.json` of the other dataset are copied too.
436478
"""
437-
foreign_mag_path = Path(os.path.abspath(foreign_mag_path))
438479
return self._add_foreign_mag(
439-
foreign_mag_path, symlink=False, make_relative=False
480+
foreign_mag_view_or_path,
481+
symlink=False,
482+
make_relative=False,
483+
extend_layer_bounding_box=extend_layer_bounding_box,
440484
)
441485

442486
def _create_dir_for_mag(

webknossos/webknossos/dataset/mag_view.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
import shutil
44
from argparse import Namespace
5-
from os.path import join
65
from pathlib import Path
76
from typing import TYPE_CHECKING, Generator, List, Optional, Tuple, Union, cast
87
from uuid import uuid4
@@ -94,14 +93,16 @@ def __init__(
9493
)
9594

9695
if create:
97-
wkw.Dataset.create(
98-
join(layer.dataset.path, layer.name, self.name), self.header
99-
)
96+
wkw.Dataset.create(str(self.path), self.header)
10097

10198
@property
10299
def layer(self) -> "Layer":
103100
return self._layer
104101

102+
@property
103+
def path(self) -> Path:
104+
return self._path
105+
105106
@property
106107
def _properties(self) -> MagViewProperties:
107108
return next(

0 commit comments

Comments
 (0)