Skip to content

Commit 456679a

Browse files
authored
Fix bug in dump_path (#1346)
* add test to repro dump_path bug * fixes bug in dump_path and prefix dataset names * changelog * clean * refactor * tests * more tests * rename resolve_if_fs to cheap_resolve
1 parent a13ddf1 commit 456679a

File tree

7 files changed

+195
-32
lines changed

7 files changed

+195
-32
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1919
### Changed
2020

2121
### Fixed
22+
- Fixed a bug where paths of references mags were wrong, if the target dataset's path was a prefix of the source dataset's path. [#1346](https://github.com/scalableminds/webknossos-libs/pull/1346)
2223

2324

2425
## [2.4.8](https://github.com/scalableminds/webknossos-libs/releases/tag/v2.4.8) - 2025-08-06

webknossos/tests/dataset/test_dataset.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,7 +1815,10 @@ def test_writing_subset_of_chunked_compressed_data(
18151815

18161816

18171817
@pytest.mark.parametrize("data_format,output_path", DATA_FORMATS_AND_OUTPUT_PATHS)
1818-
def test_add_layer_as_ref(data_format: DataFormat, output_path: UPath) -> None:
1818+
@pytest.mark.parametrize("as_object", [True, False])
1819+
def test_add_layer_as_ref(
1820+
data_format: DataFormat, output_path: UPath, as_object: bool
1821+
) -> None:
18191822
ds_path = copy_simple_dataset(data_format, output_path, "original")
18201823
new_path = prepare_dataset_path(data_format, output_path, "with_refs")
18211824

@@ -1832,10 +1835,17 @@ def test_add_layer_as_ref(data_format: DataFormat, output_path: UPath) -> None:
18321835

18331836
ds = Dataset(new_path, voxel_size=(1, 1, 1))
18341837
# add color layer
1835-
new_layer = ds.add_layer_as_ref(ds_path / "color")
1838+
new_layer = ds.add_layer_as_ref(
1839+
original_ds.get_layer("color") if as_object else ds_path / "color"
1840+
)
18361841
mag = new_layer.get_mag("1")
18371842
# add segmentation layer
1838-
new_segmentation_layer = ds.add_layer_as_ref(ds_path / "segmentation")
1843+
new_segmentation_layer = ds.add_layer_as_ref(
1844+
original_ds.get_layer("segmentation")
1845+
if as_object
1846+
else ds_path / "segmentation",
1847+
new_layer_name="seg",
1848+
)
18391849

18401850
color_mag_path = original_mag.path.name
18411851
assert ds._properties.data_layers[0].mags[0].path == dump_path(
@@ -1847,6 +1857,8 @@ def test_add_layer_as_ref(data_format: DataFormat, output_path: UPath) -> None:
18471857
)
18481858
assert not (new_path / "segmentation" / "1").exists()
18491859
assert not (new_path / "segmentation").exists()
1860+
assert not (new_path / "seg" / "1").exists()
1861+
assert not (new_path / "seg").exists()
18501862

18511863
assert len(ds.layers) == 2
18521864
assert len(ds.get_layer("color").mags) == 1
@@ -1870,6 +1882,25 @@ def test_add_layer_as_ref(data_format: DataFormat, output_path: UPath) -> None:
18701882
assure_exported_properties(ds)
18711883

18721884

1885+
@pytest.mark.parametrize("output_path", OUTPUT_PATHS)
1886+
def test_add_layer_as_ref_prefix(output_path: UPath) -> None:
1887+
source = Dataset(output_path / "name_with_suffix", (1, 1, 1))
1888+
source.add_layer(
1889+
"consensus", SEGMENTATION_CATEGORY, dtype_per_channel="uint8"
1890+
).add_mag(1)
1891+
1892+
target = Dataset(output_path / "name", (1, 1, 1))
1893+
target.add_layer("raw", COLOR_CATEGORY, dtype_per_channel="uint8").add_mag(1)
1894+
1895+
glom = source.get_layer("consensus")
1896+
target.add_layer_as_ref(foreign_layer=glom, new_layer_name="glomeruli")
1897+
1898+
assert target._properties.data_layers[1].mags[0].path == dump_path(
1899+
source.get_layer("consensus").get_mag(1).path,
1900+
UPath.home() / "random", # an unrelated path
1901+
)
1902+
1903+
18731904
@pytest.mark.parametrize("data_format,output_path", DATA_FORMATS_AND_OUTPUT_PATHS)
18741905
def test_ref_layer_add_mag(data_format: DataFormat, output_path: UPath) -> None:
18751906
ds_path = copy_simple_dataset(data_format, output_path, "original")

webknossos/tests/test_utils.py

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
from pathlib import Path
12
from unittest.mock import Mock, patch
23

34
import pytest
5+
from upath import UPath
46

5-
from webknossos.utils import call_with_retries
7+
from webknossos.utils import call_with_retries, dump_path
68

79

810
def test_call_with_retries_success() -> None:
@@ -68,3 +70,122 @@ def test_call_with_retries_direct_failure(mock_sleep: Mock) -> None:
6870

6971
assert mock_fn.call_count == 1 # Only called once, no retries
7072
assert mock_sleep.call_count == 0 # Sleep not called since it failed immediately
73+
74+
75+
def test_dump_path(tmp_path: Path) -> None:
76+
tmp_path = UPath(tmp_path.resolve())
77+
78+
# relative enclosed
79+
dataset_path = tmp_path / "test_dataset"
80+
path = dataset_path / "test.txt"
81+
assert dump_path(path, dataset_path) == "./test.txt"
82+
83+
# relative not enclosed
84+
dataset_path = tmp_path / "test_dataset"
85+
path = dataset_path / ".." / "test_dataset2" / "test.txt"
86+
assert (
87+
dump_path(path, dataset_path) == f"{tmp_path.as_posix()}/test_dataset2/test.txt"
88+
)
89+
90+
# dataset path with ..
91+
dataset_path = tmp_path / "test_dataset" / ".." / "test_dataset2"
92+
path = tmp_path / "test_dataset" / "test.txt"
93+
assert (
94+
dump_path(path, dataset_path) == f"{tmp_path.as_posix()}/test_dataset/test.txt"
95+
)
96+
97+
dataset_path = tmp_path / "test_dataset" / ".." / "test_dataset2"
98+
path = tmp_path / "test_dataset2" / "test.txt"
99+
assert dump_path(path, dataset_path) == "./test.txt"
100+
101+
# dataset path is a prefix
102+
dataset_path = tmp_path / "test_dataset"
103+
path = tmp_path / "test_dataset_longer" / "test.txt"
104+
assert (
105+
dump_path(path, dataset_path)
106+
== f"{tmp_path.as_posix()}/test_dataset_longer/test.txt"
107+
)
108+
109+
# s3 non relative
110+
dataset_path = tmp_path / "test_dataset"
111+
path = UPath(
112+
"s3://bucket/test.txt",
113+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
114+
)
115+
assert dump_path(path, dataset_path) == "s3://s3.amazonaws.com/bucket/test.txt"
116+
117+
# s3 relative
118+
dataset_path = UPath(
119+
"s3://bucket/test_dataset",
120+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
121+
)
122+
path = dataset_path / "test.txt"
123+
assert dump_path(path, dataset_path) == "./test.txt"
124+
125+
# s3 dataset path is a prefix
126+
dataset_path = UPath(
127+
"s3://bucket/test_dataset",
128+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
129+
)
130+
path = (
131+
UPath(
132+
"s3://bucket/test_dataset_longer",
133+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
134+
)
135+
/ "test.txt"
136+
)
137+
assert (
138+
dump_path(path, dataset_path)
139+
== "s3://s3.amazonaws.com/bucket/test_dataset_longer/test.txt"
140+
)
141+
142+
# s3 with ..
143+
dataset_path = UPath(
144+
"s3://bucket/test_dataset",
145+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
146+
)
147+
path = dataset_path / ".." / "test_dataset2" / "test.txt"
148+
assert (
149+
dump_path(path, dataset_path)
150+
== "s3://s3.amazonaws.com/bucket/test_dataset2/test.txt"
151+
)
152+
153+
path = dataset_path / ".." / "test_dataset2" / "test.txt"
154+
assert (
155+
dump_path(path, dataset_path)
156+
== "s3://s3.amazonaws.com/bucket/test_dataset2/test.txt"
157+
)
158+
159+
path = dataset_path / ".." / "test_dataset" / "test.txt"
160+
assert dump_path(path, dataset_path) == "./test.txt"
161+
162+
# s3 dataset with ..
163+
dataset_path = (
164+
UPath(
165+
"s3://bucket/test_dataset",
166+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
167+
)
168+
/ ".."
169+
/ "test_dataset2"
170+
)
171+
path = dataset_path / "test.txt"
172+
assert dump_path(path, dataset_path) == "./test.txt"
173+
174+
path = UPath(
175+
"s3://bucket/test_dataset/test.txt",
176+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
177+
)
178+
assert (
179+
dump_path(path, dataset_path)
180+
== "s3://s3.amazonaws.com/bucket/test_dataset/test.txt"
181+
)
182+
183+
path = (
184+
UPath(
185+
"s3://bucket/",
186+
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
187+
)
188+
/ "test_dataset2"
189+
/ "test.txt"
190+
)
191+
assert dump_path(path, dataset_path) == "./test.txt"

webknossos/webknossos/dataset/attachments.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from upath import UPath
1010

1111
from ..utils import (
12+
cheap_resolve,
1213
copytree,
1314
dump_path,
1415
enrich_path,
1516
is_fs_path,
16-
resolve_if_fs_path,
1717
snake_to_camel_case,
1818
warn_deprecated,
1919
)
@@ -307,7 +307,7 @@ def add_attachments(self, *other: Attachment) -> None:
307307

308308
def add_attachment_as_ref(self, attachment: Attachment) -> None:
309309
new_attachment = type(attachment).from_path_and_name(
310-
resolve_if_fs_path(attachment.path),
310+
cheap_resolve(attachment.path),
311311
attachment.name,
312312
data_format=attachment.data_format,
313313
dataset_path=self._layer.dataset.resolved_path,
@@ -320,7 +320,7 @@ def add_copy_attachments(self, *other: Attachment) -> None:
320320
self.add_attachment_as_copy(*other)
321321

322322
def add_attachment_as_copy(self, attachment: Attachment) -> None:
323-
new_path = resolve_if_fs_path(
323+
new_path = cheap_resolve(
324324
self._layer.path
325325
/ snake_to_camel_case(TYPE_MAPPING[type(attachment)])
326326
/ _maybe_add_suffix(attachment.name, attachment.data_format)
@@ -345,7 +345,7 @@ def add_symlink_attachments(
345345
stacklevel=2,
346346
)
347347
for attachment in other:
348-
new_path = resolve_if_fs_path(attachment.path)
348+
new_path = cheap_resolve(attachment.path)
349349
if is_fs_path(attachment.path):
350350
new_path = (
351351
self._layer.resolved_path

webknossos/webknossos/dataset/dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@
6666
from ..client._upload_dataset import LayerToLink
6767

6868
from ..utils import (
69+
cheap_resolve,
6970
copytree,
7071
count_defined_values,
7172
dump_path,
7273
get_executor_for_args,
7374
infer_metadata_type,
7475
is_fs_path,
7576
named_partial,
76-
resolve_if_fs_path,
7777
rmtree,
7878
strip_trailing_slash,
7979
wait_and_ensure_success,
@@ -323,7 +323,7 @@ def __init__(
323323

324324
self._read_only = read_only
325325
self.path: UPath = path
326-
self._resolved_path: UPath = resolve_if_fs_path(path)
326+
self._resolved_path: UPath = cheap_resolve(path)
327327

328328
if count_defined_values((voxel_size, voxel_size_with_unit)) > 1:
329329
raise ValueError(
@@ -456,7 +456,7 @@ def open(cls, dataset_path: str | PathLike, read_only: bool = False) -> "Dataset
456456
dataset = cls.__new__(cls)
457457
dataset.path = dataset_path
458458
dataset._read_only = read_only
459-
dataset._resolved_path = resolve_if_fs_path(dataset_path)
459+
dataset._resolved_path = cheap_resolve(dataset_path)
460460
return dataset._init_from_properties(dataset_properties)
461461

462462
@property
@@ -1626,7 +1626,7 @@ def add_layer_for_existing_files(
16261626
except ValueError:
16271627
continue
16281628
# Mags are only writable if they are local to the dataset
1629-
resolved_mag_path = resolve_if_fs_path(mag_dir)
1629+
resolved_mag_path = cheap_resolve(mag_dir)
16301630
read_only = resolved_mag_path.parent != self.resolved_path / layer_name
16311631
layer._add_mag_for_existing_files(
16321632
mag_dir.name, mag_path=resolved_mag_path, read_only=read_only

webknossos/webknossos/dataset/layer.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@
4444
from .dataset import Dataset
4545

4646
from ..utils import (
47+
cheap_resolve,
4748
copytree,
4849
dump_path,
4950
enrich_path,
5051
get_executor_for_args,
5152
is_fs_path,
5253
movetree,
5354
named_partial,
54-
resolve_if_fs_path,
5555
rmtree,
5656
warn_deprecated,
5757
)
@@ -176,7 +176,7 @@ def _get_shard_shape(
176176

177177

178178
def _is_foreign_mag(dataset_path: UPath, layer_name: str, mag_path: UPath) -> bool:
179-
return dataset_path / layer_name != resolve_if_fs_path(mag_path).parent
179+
return dataset_path / layer_name != cheap_resolve(mag_path).parent
180180

181181

182182
def _find_mag_path(
@@ -191,9 +191,9 @@ def _find_mag_path(
191191
short_mag_file_path = layer_path / mag.to_layer_name()
192192
long_mag_file_path = layer_path / mag.to_long_layer_name()
193193
if short_mag_file_path.exists():
194-
return resolve_if_fs_path(short_mag_file_path)
194+
return cheap_resolve(short_mag_file_path)
195195
elif long_mag_file_path.exists():
196-
return resolve_if_fs_path(long_mag_file_path)
196+
return cheap_resolve(long_mag_file_path)
197197
else:
198198
raise FileNotFoundError(
199199
f"Could not find any valid mag `{mag}` in `{layer_path}`."
@@ -244,7 +244,7 @@ def __init__(
244244
properties.element_class, properties.num_channels
245245
)
246246
self._mags: dict[Mag, MagView] = {}
247-
resolved_path = resolve_if_fs_path(self.dataset.resolved_path / self.name)
247+
resolved_path = cheap_resolve(self.dataset.resolved_path / self.name)
248248
self._resolved_path: UPath = resolved_path
249249
self._read_only = read_only
250250

@@ -355,9 +355,7 @@ def name(self, layer_name: str) -> None:
355355
if self.path.exists():
356356
self.path.rename(self.dataset.path / layer_name)
357357
self._path = self.dataset.path / layer_name
358-
self._resolved_path = resolve_if_fs_path(
359-
self.dataset.resolved_path / layer_name
360-
)
358+
self._resolved_path = cheap_resolve(self.dataset.resolved_path / layer_name)
361359
del self.dataset.layers[self.name]
362360
self.dataset.layers[layer_name] = self
363361
self._properties.name = layer_name
@@ -1121,7 +1119,7 @@ def _create_dir_for_mag(self, mag: MagLike) -> UPath:
11211119
mag_name = Mag(mag).to_layer_name()
11221120
full_path = self.resolved_path / mag_name
11231121
full_path.mkdir(parents=True, exist_ok=True)
1124-
full_path = resolve_if_fs_path(full_path)
1122+
full_path = cheap_resolve(full_path)
11251123
return full_path
11261124

11271125
def _assert_mag_does_not_exist_yet(self, mag: MagLike) -> None:

0 commit comments

Comments
 (0)