Skip to content

Commit 3960f26

Browse files
authored
update zarrita, relaxes numpy requirement (#932)
* update zarrita, relaxes numpy requirement * fix endian_codec * fix codecs * update zarrita * fix zarr3 testdata * fixes in zarrita * new default shard_shapes for Dataset.from_imaegs * fixes chunk_shape
1 parent b2d156c commit 3960f26

File tree

7 files changed

+133
-25
lines changed

7 files changed

+133
-25
lines changed

webknossos/poetry.lock

Lines changed: 64 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webknossos/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ JPype1 = { version = "^1.3.0", optional = true }
6363
pims = { version = "^0.6.0", optional = true }
6464
tifffile = { version = ">=2021.11.2", optional = true }
6565
pylibCZIrw = { version = "3.4.0", source = "scm", optional = true }
66-
zarrita = "0.1.0a12"
66+
zarrita = "0.1.0a18"
6767

6868
[tool.poetry.extras]
6969
pims = ["pims"]
Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,32 @@
1-
{"shape": [3, 24, 24, 24], "data_type": "uint8", "chunk_grid": {"configuration": {"chunk_shape": [3, 32, 32, 32]}, "name": "regular"}, "chunk_key_encoding": {"configuration": {"separator": "/"}, "name": "default"}, "fill_value": 0, "attributes": {}, "codecs": [{"configuration": {"chunk_shape": [3, 16, 16, 16], "codecs": []}, "name": "sharding_indexed"}], "dimension_names": null, "zarr_format": 3, "node_type": "array"}
1+
{
2+
"shape": [3, 24, 24, 24],
3+
"data_type": "uint8",
4+
"chunk_grid": {
5+
"configuration": { "chunk_shape": [3, 32, 32, 32] },
6+
"name": "regular"
7+
},
8+
"chunk_key_encoding": {
9+
"configuration": { "separator": "/" },
10+
"name": "default"
11+
},
12+
"fill_value": 0,
13+
"attributes": {},
14+
"codecs": [
15+
{
16+
"configuration": {
17+
"chunk_shape": [3, 16, 16, 16],
18+
"codecs": [
19+
{ "name": "endian", "configuration": { "endian": "little" } }
20+
],
21+
"index_codecs": [
22+
{ "name": "endian", "configuration": { "endian": "little" } },
23+
{ "name": "crc32c" }
24+
]
25+
},
26+
"name": "sharding_indexed"
27+
}
28+
],
29+
"dimension_names": null,
30+
"zarr_format": 3,
31+
"node_type": "array"
32+
}

webknossos/tests/dataset/test_dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
859859
ds_path = prepare_dataset_path(data_format, output_path)
860860
ds = Dataset(ds_path, voxel_size=(2, 2, 1))
861861
chunk_shape, chunks_per_shard = default_chunk_config(data_format, 8)
862+
shard_shape = chunk_shape * chunks_per_shard
862863

863864
layer = ds.add_layer("color", COLOR_CATEGORY, data_format=data_format)
864865
mag = layer.add_mag(
@@ -874,7 +875,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
874875
with get_executor_for_args(None) as executor:
875876
mag.for_each_chunk(
876877
chunk_job,
877-
chunk_shape=(64, 64, 64),
878+
chunk_shape=shard_shape,
878879
executor=executor,
879880
)
880881
assert np.array_equal(original_data + 50, mag.get_view().read()[0])
@@ -885,7 +886,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
885886
# Test without executor
886887
mag.for_each_chunk(
887888
chunk_job,
888-
chunk_shape=(64, 64, 64),
889+
chunk_shape=shard_shape,
889890
)
890891
assert np.array_equal(original_data + 50, mag.get_view().read()[0])
891892

webknossos/webknossos/dataset/_array.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -485,16 +485,17 @@ def open(cls, path: Path) -> "ZarritaArray":
485485
Array.open_auto(store=path) # check that everything exists
486486
return cls(path)
487487
except Exception as exc:
488-
raise ArrayException(
489-
f"Could not open Zarr array at {path}. `.zarray` not found."
490-
) from exc
488+
raise ArrayException(f"Could not open Zarr array at {path}.") from exc
491489

492490
@staticmethod
493491
def _has_compression_codecs(codecs: List["zarrita.codecs.Codec"]) -> bool:
494-
from zarrita.codecs import BloscCodec, GzipCodec
492+
from zarrita.codecs import BloscCodec, GzipCodec, ZstdCodec
495493

496494
return any(
497-
isinstance(c, BloscCodec) or isinstance(c, GzipCodec) for c in codecs
495+
isinstance(c, BloscCodec)
496+
or isinstance(c, GzipCodec)
497+
or isinstance(c, ZstdCodec)
498+
for c in codecs
498499
)
499500

500501
@property
@@ -504,26 +505,30 @@ def info(self) -> ArrayInfo:
504505

505506
zarray = self._zarray
506507
if isinstance(zarray, Array):
507-
if len(zarray.codecs) == 1 and isinstance(zarray.codecs[0], ShardingCodec):
508-
sharding_codec = zarray.codecs[0]
508+
if len(zarray.codec_pipeline.codecs) == 1 and isinstance(
509+
zarray.codec_pipeline.codecs[0], ShardingCodec
510+
):
511+
sharding_codec = zarray.codec_pipeline.codecs[0]
512+
shard_shape = zarray.metadata.chunk_grid.configuration.chunk_shape
513+
chunk_shape = sharding_codec.configuration.chunk_shape
509514
return ArrayInfo(
510515
data_format=DataFormat.Zarr3,
511516
num_channels=zarray.metadata.shape[0],
512517
voxel_type=zarray.metadata.dtype,
513518
compression_mode=self._has_compression_codecs(
514-
sharding_codec.codecs
519+
sharding_codec.codec_pipeline.codecs
515520
),
516-
chunk_shape=Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
517-
chunks_per_shard=Vec3Int(
518-
zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
519-
)
520-
// Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
521+
chunk_shape=Vec3Int(chunk_shape[1:4]),
522+
chunks_per_shard=Vec3Int(shard_shape[1:4])
523+
// Vec3Int(chunk_shape[1:4]),
521524
)
522525
return ArrayInfo(
523526
data_format=DataFormat.Zarr3,
524527
num_channels=zarray.metadata.shape[0],
525528
voxel_type=zarray.metadata.dtype,
526-
compression_mode=self._has_compression_codecs(zarray.codecs),
529+
compression_mode=self._has_compression_codecs(
530+
zarray.codec_pipeline.codecs
531+
),
527532
chunk_shape=Vec3Int(
528533
zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
529534
)
@@ -560,10 +565,16 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "ZarritaArray":
560565
+ array_info.chunk_shape.to_tuple(),
561566
codecs=[
562567
zarrita.codecs.transpose_codec("F"),
563-
zarrita.codecs.blosc_codec(),
568+
zarrita.codecs.endian_codec(),
569+
zarrita.codecs.blosc_codec(
570+
typesize=array_info.voxel_type.itemsize
571+
),
564572
]
565573
if array_info.compression_mode
566-
else [zarrita.codecs.transpose_codec("F")],
574+
else [
575+
zarrita.codecs.transpose_codec("F"),
576+
zarrita.codecs.endian_codec(),
577+
],
567578
)
568579
],
569580
)

webknossos/webknossos/dataset/dataset.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
from webknossos.dataset.defaults import (
3939
DEFAULT_CHUNK_SHAPE,
40+
DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES,
4041
DEFAULT_CHUNKS_PER_SHARD_ZARR,
4142
)
4243

@@ -1167,6 +1168,10 @@ def add_layer_from_images(
11671168
chunk_shape = DEFAULT_CHUNK_SHAPE.with_z(1)
11681169
if chunks_per_shard is None:
11691170
chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_ZARR.with_z(1)
1171+
1172+
if chunks_per_shard is None and layer.data_format == DataFormat.Zarr3:
1173+
chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES
1174+
11701175
mag_view = layer.add_mag(
11711176
mag=mag,
11721177
chunk_shape=chunk_shape,

webknossos/webknossos/dataset/defaults.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
DEFAULT_CHUNK_SHAPE = Vec3Int.full(32)
88
DEFAULT_CHUNKS_PER_SHARD = Vec3Int.full(32)
99
DEFAULT_CHUNKS_PER_SHARD_ZARR = Vec3Int.full(1)
10+
DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES = Vec3Int(128, 128, 1)

0 commit comments

Comments
 (0)