Skip to content

Commit 8a550a8

Browse files
valentin-pinkauvalentin-pinkau
andauthored
Introduce transfer modes and implement move+symlink (#1384)
* enforce read_only on remote datasets * reset state to server state when change was tried. * add tests for changing properties on remote datasets. fix test.py * format * introduce transfer modes. * introduce transfer mode. add test for move+symlink * move transfer mode to own file * remove no longer used file * improve comment * fix test * add changelog --------- Co-authored-by: valentin-pinkau <[email protected]>
1 parent 35b039d commit 8a550a8

File tree

7 files changed

+116
-541
lines changed

7 files changed

+116
-541
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
4141
- Updated the api version of the webknossos-api to 12. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])
4242
- Allowing RemoteDataset to align mags, when down- or upsampling [#1382](https://github.com/scalableminds/webknossos-libs/pull/1382)
4343
- RemoteDatasets that use zarr streaming are no longer read-only. [#1383](https://github.com/scalableminds/webknossos-libs/pull/1383)
44+
- Introduced transfer modes COPY, MOVE_AND_SYMLINK and HTTP for Dataset.upload() [#1384](https://github.com/scalableminds/webknossos-libs/pull/1384)
4445

4546
### Fixed
4647
- Fixed test.py to parse the command line arguments correctly. [#1383](https://github.com/scalableminds/webknossos-libs/pull/1383)

webknossos/tests/dataset/test_dataset_download_upload_remote.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from upath import UPath
77

88
import webknossos as wk
9+
from webknossos import TransferMode
910

1011
SAMPLE_BBOX = wk.BoundingBox((3164, 3212, 1017), (10, 10, 10))
1112

@@ -100,8 +101,7 @@ def test_upload_dataset_with_symlinks(tmp_upath: UPath) -> None:
100101
sample_dataset = get_sample_dataset(tmp_upath)
101102
remote_ds = sample_dataset.upload(
102103
new_dataset_name="test_remote_symlink",
103-
upload_directly_to_common_storage=True,
104-
symlink_data_instead_of_copy=True,
104+
transfer_mode=TransferMode.MOVE_AND_SYMLINK,
105105
)
106106
assert np.array_equal(
107107
remote_ds.get_color_layers()[0].get_finest_mag().read(),
@@ -115,7 +115,7 @@ def test_upload_dataset_with_symlinks(tmp_upath: UPath) -> None:
115115
def test_upload_dataset_copy_to_paths(tmp_upath: UPath) -> None:
116116
sample_dataset = get_sample_dataset(tmp_upath)
117117
remote_ds = sample_dataset.upload(
118-
new_dataset_name="test_remote_copy", upload_directly_to_common_storage=True
118+
new_dataset_name="test_remote_copy", transfer_mode=TransferMode.COPY
119119
)
120120
assert np.array_equal(
121121
remote_ds.get_color_layers()[0].get_finest_mag().read(),
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import tempfile
2+
3+
import pytest
4+
from upath import UPath
5+
6+
from webknossos import TransferMode
7+
8+
9+
def test_transfer_mode_move_and_symlink() -> None:
10+
transfer_mode = TransferMode.MOVE_AND_SYMLINK
11+
12+
with tempfile.TemporaryDirectory() as tmp_dir:
13+
src = UPath(tmp_dir) / "src_test"
14+
dst = UPath(tmp_dir) / "dst_test"
15+
16+
src.mkdir(parents=True, exist_ok=True)
17+
(src / "a.txt").touch()
18+
19+
transfer_mode.transfer(src, dst)
20+
21+
assert (dst / "a.txt").exists()
22+
assert (src / "a.txt").exists()
23+
assert src.is_symlink()
24+
25+
# try to move again to same destination
26+
with pytest.raises(AssertionError):
27+
transfer_mode.transfer(src, dst)
28+
29+
# try to move again to different destination
30+
dst2 = UPath(tmp_dir) / "dst_test2"
31+
transfer_mode.transfer(src, dst2)
32+
33+
assert (dst / "a.txt").exists()
34+
assert not dst.is_symlink()
35+
assert (dst2 / "a.txt").exists()
36+
assert dst2.is_symlink()
37+
assert (src / "a.txt").exists()
38+
assert src.is_symlink()

webknossos/webknossos/dataset/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@
2525
from .remote_dataset import RemoteDataset
2626
from .remote_folder import RemoteFolder
2727
from .sampling_modes import SamplingModes
28+
from .transfer_mode import TransferMode

webknossos/webknossos/dataset/dataset.py

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from .remote_dataset import RemoteDataset
6060
from .remote_folder import RemoteFolder
6161
from .sampling_modes import SamplingModes
62+
from .transfer_mode import TransferMode
6263

6364
if TYPE_CHECKING:
6465
import pims
@@ -565,10 +566,10 @@ def publish_to_preliminary_dataset(
565566
self,
566567
dataset_id: str,
567568
path_prefix: str | None = None,
568-
symlink_data_instead_of_copy: bool = False,
569+
transfer_mode: TransferMode = TransferMode.COPY,
569570
) -> None:
570571
"""
571-
Copies or symlinks the data to paths returned by WEBKNOSSOS
572+
Copies or moves+symlinks the data to paths returned by WEBKNOSSOS
572573
The dataset needs to be in status "uploading".
573574
The dataset already exists in WEBKNOSSOS but has no dataset_properties.
574575
With the dataset_properties WEBKNOSSOS can reserve the paths.
@@ -579,16 +580,17 @@ def publish_to_preliminary_dataset(
579580
"""
580581
from ..client.context import _get_context
581582

583+
if transfer_mode == TransferMode.HTTP:
584+
raise ValueError("HTTP transfer mode is not supported for this method")
585+
582586
context = _get_context()
583587
response = context.api_client_with_auth.reserve_dataset_upload_to_paths_for_preliminary(
584588
dataset_id,
585589
ApiReserveDatasetUploadToPathsForPreliminaryParameters(
586590
self._properties, path_prefix
587591
),
588592
)
589-
self._copy_or_symlink_dataset_to_paths(
590-
response.data_source, symlink_data_instead_of_copy
591-
)
593+
self._copy_or_symlink_dataset_to_paths(response.data_source, transfer_mode)
592594

593595
context.api_client_with_auth.finish_dataset_upload_to_paths(dataset_id)
594596

@@ -599,10 +601,9 @@ def upload(
599601
folder_id: str | RemoteFolder | None = None,
600602
require_unique_name: bool = False,
601603
layers_to_link: Sequence[LayerToLink | RemoteLayer] | None = None,
602-
upload_directly_to_common_storage: bool = False,
604+
transfer_mode: TransferMode = TransferMode.HTTP,
603605
jobs: int | None = None,
604606
common_storage_path_prefix: str | None = None,
605-
symlink_data_instead_of_copy: bool = False,
606607
) -> "RemoteDataset":
607608
"""Upload this dataset to webknossos.
608609
@@ -615,11 +616,9 @@ def upload(
615616
folder_id: Optional ID of folder where dataset should be placed
616617
require_unique_name: Whether to make request fail in case a dataset with the name already exists
617618
layers_to_link: Optional list of LayerToLink to link already published layers to the dataset.
618-
upload_directly_to_common_storage: Set this to true when the client has access to the same storage system as the WEBKNOSSOS datastore (file system or cloud storage).
619619
jobs: Optional number of jobs to use for uploading the data.
620-
common_storage_path_prefix: Optional path prefix used when upload_directly_to_common_storage is true to select one of the available mount points for the dataset folder.
621-
symlink_data_instead_of_copy: Considered, when upload_directly_to_common_storage is True. Set this to true when the client has access to the same file system as the WEBKNOSSOS datastore.
622-
When set to true, the data symlinked to the new location.
620+
common_storage_path_prefix: Optional path prefix used when transfer_mode is either COPY or MOVE_AND_SYMLINK
621+
to select one of the available WEBKNOSSOS storages.
623622
Returns:
624623
RemoteDataset: Reference to the newly created remote dataset
625624
Note:
@@ -641,11 +640,6 @@ def upload(
641640
```
642641
"""
643642

644-
if symlink_data_instead_of_copy:
645-
assert upload_directly_to_common_storage, (
646-
"Cannot use symlinking with upload_directly_to_common_storage=False"
647-
)
648-
649643
from ..client.context import _get_context
650644

651645
new_dataset_name = new_dataset_name or self.name
@@ -662,7 +656,7 @@ def upload(
662656
]
663657
)
664658

665-
if upload_directly_to_common_storage:
659+
if transfer_mode in (TransferMode.COPY, TransferMode.MOVE_AND_SYMLINK):
666660
context = _get_context()
667661
response = context.api_client_with_auth.reserve_dataset_upload_to_paths(
668662
ApiReserveDatasetUplaodToPathsParameters(
@@ -683,12 +677,16 @@ def upload(
683677
new_dataset_id = response.new_dataset_id
684678
data_source = response.data_source
685679

686-
self._copy_or_symlink_dataset_to_paths(
687-
data_source, symlink_data_instead_of_copy
688-
)
680+
self._copy_or_symlink_dataset_to_paths(data_source, transfer_mode)
689681
# announce finished upload
690682
context.api_client_with_auth.finish_dataset_upload_to_paths(new_dataset_id)
691683
else:
684+
assert transfer_mode == TransferMode.HTTP, "Expected HTTP transfer mode"
685+
if common_storage_path_prefix is not None:
686+
logger.warning(
687+
"common_storage_path_prefix is not None, but uploading via HTTP."
688+
" Ignoring common_storage_path_prefix."
689+
)
692690
for layer in self.get_segmentation_layers():
693691
if not layer.attachments.is_empty:
694692
raise NotImplementedError(
@@ -704,31 +702,23 @@ def upload(
704702
return RemoteDataset.open(dataset_id=new_dataset_id)
705703

706704
def _copy_or_symlink_dataset_to_paths(
707-
self, data_source: DatasetProperties, symlink_data_instead_of_copy: bool
705+
self, data_source: DatasetProperties, transfer_mode: TransferMode
708706
) -> None:
709707
"""
710-
Iterates over the mags and attachments and copies or symlinks them to the target location.
708+
Iterates over the mags and attachments and copies or moves then symlinks them to the target location.
711709
"""
712-
# copy data
710+
assert transfer_mode in (TransferMode.COPY, TransferMode.MOVE_AND_SYMLINK), (
711+
f"transfer mode not supported. found {transfer_mode}"
712+
)
713+
714+
# transfer data
713715
for layer in data_source.data_layers:
714716
src_layer = self.layers[layer.name]
715717
for mag in layer.mags:
716718
src_mag = src_layer.mags[mag.mag]
717719
assert mag.path is not None, "mag.path must be set to copy/move data"
718720
dst_mag_path = enrich_path(mag.path)
719-
if symlink_data_instead_of_copy:
720-
assert is_fs_path(src_mag.path) and is_fs_path(dst_mag_path), (
721-
f"Either src_mag.path or mag.path are not pointing to a local file system {src_mag.path}, {mag.path}"
722-
)
723-
dst_mag_path.parent.mkdir(parents=True, exist_ok=True)
724-
dst_mag_path.unlink(missing_ok=True)
725-
dst_mag_path.symlink_to(src_mag.path)
726-
else:
727-
copytree(
728-
src_mag.path,
729-
dst_mag_path,
730-
progress_desc=f"copying mag {src_mag.path} to {mag.path}",
731-
)
721+
transfer_mode.transfer(src_mag.path, dst_mag_path)
732722
if isinstance(src_layer, SegmentationLayer):
733723
assert isinstance(layer, SegmentationLayerProperties), (
734724
"If src_layer is a SegmentationLayer, then layer must be a SegmentationLayerProperties"
@@ -738,20 +728,7 @@ def _copy_or_symlink_dataset_to_paths(
738728
src_layer.attachments, layer.attachments
739729
):
740730
dst_attachment_path = enrich_path(dst_attachment.path)
741-
if symlink_data_instead_of_copy:
742-
assert is_fs_path(src_attachment.path) and is_fs_path(
743-
dst_attachment_path
744-
), (
745-
f"Either src_attachment.path or dst_attachment.path are not pointing to a local file system {src_attachment.path}, {dst_attachment.path}"
746-
)
747-
dst_attachment_path.unlink()
748-
dst_attachment_path.symlink_to(src_attachment.path)
749-
else:
750-
copytree(
751-
src_attachment.path,
752-
enrich_path(dst_attachment.path),
753-
progress_desc=f"copying attachment {src_attachment.path} to {dst_attachment.path}",
754-
)
731+
transfer_mode.transfer(src_attachment.path, dst_attachment_path)
755732

756733
@staticmethod
757734
def get_remote_datasets(

0 commit comments

Comments
 (0)