Skip to content

Commit 572d1b1

Browse files
authored
Merge pull request fideus-labs#164 from junelsolis/zarr-v3-compression
Implement compression for OME-Zarr 0.5
2 parents f645f65 + 1681738 commit 572d1b1

File tree

3 files changed

+1322
-1074
lines changed

3 files changed

+1322
-1074
lines changed

ngff_zarr/to_ngff_zarr.py

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def _write_with_tensorstore(
135135
},
136136
}
137137
if zarr_format == 2:
138-
spec["driver"] = "zarr"
138+
spec["driver"] = "zarr" if zarr_version_major < 3 else "zarr2"
139139
spec["metadata"]["chunks"] = chunks
140140
spec["metadata"]["dimension_separator"] = "/"
141141
spec["metadata"]["dtype"] = array.dtype.str
@@ -146,6 +146,12 @@ def _write_with_tensorstore(
146146
"configuration": {"chunk_shape": chunks},
147147
}
148148
spec["metadata"]["data_type"] = _numpy_to_zarr_dtype(array.dtype)
149+
spec['metadata']["chunk_key_encoding"] = {
150+
"name": "default",
151+
"configuration": {
152+
"separator": "/"
153+
}
154+
}
149155
if dimension_names:
150156
spec["metadata"]["dimension_names"] = dimension_names
151157
if internal_chunk_shape:
@@ -287,7 +293,6 @@ def _configure_sharding(
287293
if chunks_per_shard is None:
288294
return {}, None, arr
289295

290-
sharding_kwargs = {}
291296
c0 = tuple([c[0] for c in arr.chunks])
292297

293298
if isinstance(chunks_per_shard, int):
@@ -302,26 +307,15 @@ def _configure_sharding(
302307
else:
303308
raise ValueError("chunks_per_shard must be an int, tuple, or dict")
304309

305-
from zarr.codecs.sharding import ShardingCodec
306-
307-
if "codec" in kwargs:
308-
nested_codec = kwargs.pop("codec")
309-
sharding_codec = ShardingCodec(
310-
chunk_shape=c0,
311-
codec=nested_codec,
312-
)
313-
else:
314-
sharding_codec = ShardingCodec(chunk_shape=c0)
315-
316-
if "codecs" in kwargs:
317-
previous_codecs = kwargs.pop("codecs")
318-
sharding_kwargs["codecs"] = previous_codecs + [sharding_codec]
319-
else:
320-
sharding_kwargs["codecs"] = [sharding_codec]
321-
322310
internal_chunk_shape = c0
323311
arr = arr.rechunk(shards)
324312

313+
# Only include 'shards' and 'chunks' in sharding_kwargs
314+
sharding_kwargs = {
315+
"shards": shards,
316+
"chunks": c0,
317+
}
318+
325319
return sharding_kwargs, internal_chunk_shape, arr
326320

327321

@@ -383,37 +377,44 @@ def _write_array_direct(
383377
"""Write an array directly using dask.array.to_zarr."""
384378
arr = _prep_for_to_zarr(store, arr)
385379

386-
if region is not None and zarr_array is not None:
387-
dask.array.to_zarr(
388-
arr,
389-
zarr_array,
390-
region=region,
391-
component=path,
392-
overwrite=False,
393-
compute=True,
394-
return_stored=False,
395-
**sharding_kwargs,
396-
**zarr_kwargs,
397-
**format_kwargs,
398-
**dimension_names_kwargs,
399-
**kwargs,
380+
zarr_fmt = format_kwargs.get("zarr_format")
381+
to_zarr_kwargs = {
382+
**sharding_kwargs,
383+
**zarr_kwargs,
384+
**format_kwargs,
385+
**dimension_names_kwargs,
386+
**kwargs,
387+
}
388+
389+
if zarr_fmt == 3 and zarr_array is None:
390+
# Zarr v3, use zarr.create_array and assign (whole array or region)
391+
array = zarr.create_array(
392+
store=store,
393+
name=path,
394+
shape=arr.shape,
395+
dtype=arr.dtype,
396+
**to_zarr_kwargs,
400397
)
398+
if region is not None:
399+
array[region] = arr.compute()
400+
else:
401+
array[:] = arr.compute()
401402
else:
403+
# All other cases: use dask.array.to_zarr
404+
target = zarr_array if (region is not None and zarr_array is not None) else store
402405
dask.array.to_zarr(
403406
arr,
404-
store,
407+
target,
408+
region=region if (region is not None and zarr_array is not None) else None,
405409
component=path,
406410
overwrite=False,
407411
compute=True,
408412
return_stored=False,
409-
**sharding_kwargs,
410-
**zarr_kwargs,
411-
**format_kwargs,
412-
**dimension_names_kwargs,
413-
**kwargs,
413+
**to_zarr_kwargs,
414414
)
415415

416416

417+
417418
def _handle_large_array_writing(
418419
image,
419420
arr: dask.array.Array,
@@ -471,7 +472,7 @@ def _handle_large_array_writing(
471472
for region_index, region in enumerate(regions):
472473
if isinstance(progress, NgffProgressCallback):
473474
progress.add_callback_task(
474-
f"[green]Writing scale {index+1} of {nscales}, region {region_index+1} of {len(regions)}"
475+
f"[green]Writing scale {index + 1} of {nscales}, region {region_index + 1} of {len(regions)}"
475476
)
476477

477478
arr_region = arr[region]
@@ -761,7 +762,7 @@ def to_ngff_zarr(
761762
:param chunks_per_shard: Number of chunks along each axis in a shard. If None, no sharding. Requires OME-Zarr version >= 0.5.
762763
:type chunks_per_shard: int, tuple, or dict, optional
763764
764-
:param **kwargs: Passed to the zarr.creation.create() function, e.g., compression options.
765+
:param **kwargs: Passed to the zarr.create_array() or zarr.creation.create() function, e.g., compression options.
765766
"""
766767
# Setup and validation
767768
store_path = str(store) if isinstance(store, (str, Path)) else None
@@ -783,6 +784,12 @@ def to_ngff_zarr(
783784
zarr_format = 2 if version == "0.4" else 3
784785
format_kwargs = {"zarr_format": zarr_format} if zarr_version_major >= 3 else {}
785786
_zarr_kwargs = zarr_kwargs.copy()
787+
788+
if version == "0.4" and kwargs.get("compressors") is not None:
789+
raise ValueError(
790+
"The argument `compressors` are not supported for OME-Zarr version 0.4. (Zarr v3). Use `compression` instead."
791+
)
792+
786793
if zarr_format == 2 and zarr_version_major >= 3:
787794
_zarr_kwargs["dimension_separator"] = "/"
788795

@@ -864,7 +871,7 @@ def to_ngff_zarr(
864871
else:
865872
if isinstance(progress, NgffProgressCallback):
866873
progress.add_callback_task(
867-
f"[green]Writing scale {index+1} of {nscales}"
874+
f"[green]Writing scale {index + 1} of {nscales}"
868875
)
869876

870877
# For small arrays, write in one go

0 commit comments

Comments
 (0)