Skip to content

Commit 600af1a

Browse files
authored
Copy refactorings (#1365)
* s3-specific copy * client_kwargs * lint * no custom put/get * optimize copy shape * no conditional write * changelog * better progress bar for check-equality * ci * ci * ci * coverage * recheck_cached=open * return attachments * return attachments in methods * changelog * fixes for attachments * downgrade pytest-cov * pytest-cov * merge * uv.lock * expose unallowed dtypes as constant * add mag arg to add_mag_as_ref * reset deps
1 parent 38ae905 commit 600af1a

File tree

13 files changed

+176
-128
lines changed

13 files changed

+176
-128
lines changed

webknossos/Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1313
[Commits](https://github.com/scalableminds/webknossos-libs/compare/v2.5.0...HEAD)
1414

1515
### Breaking Changes
16+
- The `endpoint_url` in `UPath` objects is now stored directly in the `storage_options` dict, instead of being stored in the `storage_options["client_kwargs"]` dict. [#1365](https://github.com/scalableminds/webknossos-libs/pull/1365)
1617
- Removed Docker image `scalableminds/webknossos-cli` build. It will not be released further. [#1376](https://github.com/scalableminds/webknossos-libs/pull/1376)
1718
- `Dataset.add_layer_as_ref(remote_layer.path)` does not work anymore. Please use `Dataset.add_layer_as_ref(remote_layer)` instead. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])
1819
- Due to the refactoring, the imports of various classes have changed. Please update your code accordingly. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])
@@ -28,6 +29,8 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
2829
- Dataset.add_layer_as_ref() now accepts RemoteLayer objects as well as Layer objects. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])
2930

3031
### Changed
32+
- Only use fs-based copy for mags on local file systems and S3. Other protocols, e.g. memory and HTTP, have caveats that break fs-based copying. [#1365](https://github.com/scalableminds/webknossos-libs/pull/1365)
33+
- The `add_*` methods in `Attachments` now return the created attachment objects, similar to `add_layer` and `add_mag`. [#1365](https://github.com/scalableminds/webknossos-libs/pull/1365)
3134
- The returned dataset ID from dataset exploration is now used. [#1378](https://github.com/scalableminds/webknossos-libs/pull/1378)
3235
- Refactored the architecture, by introducing RemoteLayers, RemoteSegmentationLayers and their abstract base classes. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])
3336
- Updated the api version of the webknossos-api to 12. [#1371](https://github.com/scalableminds/webknossos-libs/pull/1371])

webknossos/tests/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"s3://testoutput",
2323
key=MINIO_ROOT_USER,
2424
secret=MINIO_ROOT_PASSWORD,
25-
client_kwargs={"endpoint_url": f"http://localhost:{MINIO_PORT}"},
25+
endpoint_url=f"http://localhost:{MINIO_PORT}",
2626
)
2727

2828

webknossos/tests/dataset/test_attachments.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def test_attachments(tmp_upath: UPath) -> None:
4646
seg_layer.attachments.add_agglomerate(
4747
UPath(
4848
"s3://bucket/agglomerate.zarr",
49-
client_kwargs={"endpoint_url": "https://s3.eu-central-1.amazonaws.com"},
49+
endpoint_url="https://s3.eu-central-1.amazonaws.com",
5050
),
5151
name="identity",
5252
data_format=AttachmentDataFormat.Zarr3,
@@ -242,7 +242,7 @@ def test_remote_layer(tmp_upath: UPath) -> None:
242242

243243
mesh_path = UPath(
244244
"s3://bucket/meshfile.zarr",
245-
client_kwargs={"endpoint_url": "https://s3.eu-central-1.amazonaws.com"},
245+
endpoint_url="https://s3.eu-central-1.amazonaws.com",
246246
)
247247

248248
seg_layer.attachments.add_mesh(

webknossos/tests/dataset/test_dataset.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,6 +2010,40 @@ def test_add_mag_as_ref(data_format: DataFormat, output_path: UPath) -> None:
20102010
assert (ds_path / "color" / "4").exists()
20112011

20122012

2013+
@pytest.mark.parametrize("data_format,output_path", DATA_FORMATS_AND_OUTPUT_PATHS)
2014+
def test_add_mag_as_ref_with_mag(data_format: DataFormat, output_path: UPath) -> None:
2015+
ds_path = prepare_dataset_path(data_format, output_path, "original")
2016+
new_path = prepare_dataset_path(data_format, output_path, "with_ref")
2017+
2018+
original_ds = Dataset(ds_path, voxel_size=(1, 1, 1))
2019+
original_layer = original_ds.add_layer(
2020+
"color",
2021+
COLOR_CATEGORY,
2022+
dtype_per_channel="uint8",
2023+
bounding_box=BoundingBox((0, 0, 0), (10, 20, 30)),
2024+
)
2025+
original_layer.add_mag(1).write(
2026+
data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)
2027+
)
2028+
2029+
ds = Dataset(new_path, voxel_size=(1, 1, 1))
2030+
layer = ds.add_layer(
2031+
"color",
2032+
COLOR_CATEGORY,
2033+
dtype_per_channel="uint8",
2034+
bounding_box=BoundingBox((6, 6, 6), (10, 20, 30)),
2035+
)
2036+
layer.add_mag_as_ref(original_layer.get_mag(1), mag="2")
2037+
2038+
assert list(layer.mags.values())[0].mag == Mag("2")
2039+
assert list(layer.mags.values())[0]._properties.path == dump_path(
2040+
ds_path / "color" / "1", new_path
2041+
)
2042+
2043+
assure_exported_properties(ds)
2044+
assure_exported_properties(original_ds)
2045+
2046+
20132047
@pytest.mark.parametrize("data_format", [DataFormat.Zarr, DataFormat.Zarr3])
20142048
def test_remote_add_symlink_layer(data_format: DataFormat) -> None:
20152049
src_dataset_path = copy_simple_dataset(data_format, REMOTE_TESTOUTPUT_DIR)

webknossos/tests/test_utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,27 +110,27 @@ def test_dump_path(tmp_path: Path) -> None:
110110
dataset_path = tmp_path / "test_dataset"
111111
path = UPath(
112112
"s3://bucket/test.txt",
113-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
113+
endpoint_url="https://s3.amazonaws.com",
114114
)
115115
assert dump_path(path, dataset_path) == "s3://s3.amazonaws.com/bucket/test.txt"
116116

117117
# s3 relative
118118
dataset_path = UPath(
119119
"s3://bucket/test_dataset",
120-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
120+
endpoint_url="https://s3.amazonaws.com",
121121
)
122122
path = dataset_path / "test.txt"
123123
assert dump_path(path, dataset_path) == "./test.txt"
124124

125125
# s3 dataset path is a prefix
126126
dataset_path = UPath(
127127
"s3://bucket/test_dataset",
128-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
128+
endpoint_url="https://s3.amazonaws.com",
129129
)
130130
path = (
131131
UPath(
132132
"s3://bucket/test_dataset_longer",
133-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
133+
endpoint_url="https://s3.amazonaws.com",
134134
)
135135
/ "test.txt"
136136
)
@@ -142,7 +142,7 @@ def test_dump_path(tmp_path: Path) -> None:
142142
# s3 with ..
143143
dataset_path = UPath(
144144
"s3://bucket/test_dataset",
145-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
145+
endpoint_url="https://s3.amazonaws.com",
146146
)
147147
path = dataset_path / ".." / "test_dataset2" / "test.txt"
148148
assert (
@@ -163,7 +163,7 @@ def test_dump_path(tmp_path: Path) -> None:
163163
dataset_path = (
164164
UPath(
165165
"s3://bucket/test_dataset",
166-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
166+
endpoint_url="https://s3.amazonaws.com",
167167
)
168168
/ ".."
169169
/ "test_dataset2"
@@ -173,7 +173,7 @@ def test_dump_path(tmp_path: Path) -> None:
173173

174174
path = UPath(
175175
"s3://bucket/test_dataset/test.txt",
176-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
176+
endpoint_url="https://s3.amazonaws.com",
177177
)
178178
assert (
179179
dump_path(path, dataset_path)
@@ -183,7 +183,7 @@ def test_dump_path(tmp_path: Path) -> None:
183183
path = (
184184
UPath(
185185
"s3://bucket/",
186-
client_kwargs={"endpoint_url": "https://s3.amazonaws.com"},
186+
endpoint_url="https://s3.amazonaws.com",
187187
)
188188
/ "test_dataset2"
189189
/ "test.txt"

webknossos/webknossos/cli/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def parse_path(value: str) -> UPath:
193193
if value.startswith("s3://") and "S3_ENDPOINT_URL" in environ:
194194
return UPath(
195195
value,
196-
client_kwargs={"endpoint_url": environ["S3_ENDPOINT_URL"]},
196+
endpoint_url=environ["S3_ENDPOINT_URL"],
197197
)
198198

199199
return UPath(value)

webknossos/webknossos/cli/check_equality.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def main(
103103
f"The datasets {source} and {target} are equal \
104104
(with regard to the layers: {layer_names})"
105105
)
106-
except AssertionError as err:
106+
except RuntimeError as err:
107107
print(f"The datasets are not equal: {err}")
108108
exit(1)
109109

@@ -118,25 +118,32 @@ def compare_layers(
118118
layer_name = source_layer.name
119119
logger.info("Checking layer_name: %s", layer_name)
120120

121-
assert source_layer.bounding_box == target_layer.bounding_box, (
122-
f"The bounding boxes of '{layer_name}' layer of source and target \
121+
if source_layer.bounding_box != target_layer.bounding_box:
122+
raise RuntimeError(
123+
f"The bounding boxes of '{layer_name}' layer of source and target \
123124
are not equal: {source_layer.bounding_box} != {target_layer.bounding_box}"
124-
)
125+
)
125126

126127
source_mags = set(source_layer.mags.keys())
127128
target_mags = set(target_layer.mags.keys())
128129

129-
assert source_mags == target_mags, (
130-
f"The mags of '{layer_name}' layer of source and target are not equal: \
130+
if source_mags != target_mags:
131+
raise RuntimeError(
132+
f"The mags of '{layer_name}' layer of source and target are not equal: \
131133
{source_mags} != {target_mags}"
132-
)
134+
)
133135

134136
for mag in source_mags:
135137
source_mag = source_layer.mags[mag]
136138
target_mag = target_layer.mags[mag]
137139

138140
logger.info("Start verification of %s in mag %s", layer_name, mag)
139141
with get_executor_for_args(args=executor_args) as executor:
140-
assert source_mag.content_is_equal(target_mag, executor=executor), (
141-
f"The contents of {source_mag} and {target_mag} differ."
142-
)
142+
if not source_mag.content_is_equal(
143+
target_mag,
144+
executor=executor,
145+
progress_desc=f"Comparing {layer_name}/{mag}",
146+
):
147+
raise RuntimeError(
148+
f"The contents of {source_mag} and {target_mag} differ."
149+
)

webknossos/webknossos/dataset/dataset.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,25 @@
112112

113113
logger = logging.getLogger(__name__)
114114

115+
_ALLOWED_COLOR_LAYER_DTYPES = (
116+
"uint8",
117+
"uint16",
118+
"uint32",
119+
"int8",
120+
"int16",
121+
"int32",
122+
"float32",
123+
)
124+
_ALLOWED_SEGMENTATION_LAYER_DTYPES = (
125+
"uint8",
126+
"uint16",
127+
"uint32",
128+
"uint64",
129+
"int8",
130+
"int16",
131+
"int32",
132+
"int64",
133+
)
115134

116135
SAFE_LARGE_XY: int = 10_000_000_000 # 10 billion
117136

@@ -1082,36 +1101,17 @@ def add_layer(
10821101

10831102
# assert that the dtype_per_channel is supported by webknossos
10841103
if category == COLOR_CATEGORY:
1085-
color_dtypes = (
1086-
"uint8",
1087-
"uint16",
1088-
"uint32",
1089-
"int8",
1090-
"int16",
1091-
"int32",
1092-
"float32",
1093-
)
1094-
if dtype_per_channel.name not in color_dtypes:
1104+
if dtype_per_channel.name not in _ALLOWED_COLOR_LAYER_DTYPES:
10951105
raise ValueError(
10961106
f"Cannot add color layer with dtype {dtype_per_channel.name}. "
1097-
f"Supported dtypes are: {', '.join(color_dtypes)}."
1107+
f"Supported dtypes are: {', '.join(_ALLOWED_COLOR_LAYER_DTYPES)}."
10981108
"For an overview of supported dtypes, see https://docs.webknossos.org/webknossos/data/upload_ui.html",
10991109
)
11001110
else:
1101-
segmentation_dtypes = (
1102-
"uint8",
1103-
"uint16",
1104-
"uint32",
1105-
"uint64",
1106-
"int8",
1107-
"int16",
1108-
"int32",
1109-
"int64",
1110-
)
1111-
if dtype_per_channel.name not in segmentation_dtypes:
1111+
if dtype_per_channel.name not in _ALLOWED_SEGMENTATION_LAYER_DTYPES:
11121112
raise ValueError(
11131113
f"Cannot add segmentation layer with dtype {dtype_per_channel.name}. "
1114-
f"Supported dtypes are: {', '.join(segmentation_dtypes)}."
1114+
f"Supported dtypes are: {', '.join(_ALLOWED_SEGMENTATION_LAYER_DTYPES)}."
11151115
"For an overview of supported dtypes, see https://docs.webknossos.org/webknossos/data/upload_ui.html",
11161116
)
11171117

0 commit comments

Comments
 (0)