diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 8635e06..2aca607 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -41,7 +41,3 @@ class WithSubset: subset: typing.Sequence[slice], shape: typing.Sequence[builtins.int], ) -> WithSubset: ... - -def codec_metadata_v2_to_v3( - filters: typing.Sequence[builtins.str] | None, compressor: builtins.str | None -) -> builtins.list[builtins.str]: ... diff --git a/python/zarrs/pipeline.py b/python/zarrs/pipeline.py index 9e56316..621d9f9 100644 --- a/python/zarrs/pipeline.py +++ b/python/zarrs/pipeline.py @@ -14,8 +14,8 @@ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata if TYPE_CHECKING: - from collections.abc import Generator, Iterable, Iterator - from typing import Any, Self + from collections.abc import Iterable, Iterator + from typing import Self from zarr.abc.store import ByteGetter, ByteSetter, Store from zarr.core.array_spec import ArraySpec @@ -24,7 +24,7 @@ from zarr.core.indexing import SelectorTuple from zarr.dtype import ZDType -from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3 +from ._internal import CodecPipelineImpl from .utils import ( CollapsedDimensionError, DiscontiguousArrayError, @@ -66,29 +66,6 @@ def get_codec_pipeline_impl( return None -def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]: - for codec in codecs: - if codec.__class__.__name__ == "V2Codec": - codec_dict = codec.to_dict() - if codec_dict.get("filters", None) is not None: - filters = [ - json.dumps(filter.get_config()) - for filter in codec_dict.get("filters") - ] - else: - filters = None - if codec_dict.get("compressor", None) is not None: - compressor_json = codec_dict.get("compressor").get_config() - compressor = json.dumps(compressor_json) - else: - compressor = None - codecs_v3 = codec_metadata_v2_to_v3(filters, compressor) - for codec in codecs_v3: - yield json.loads(codec) - else: - yield codec.to_dict() - - class ZarrsCodecPipelineState(TypedDict): codec_metadata_json: str codecs: tuple[Codec, ...] diff --git a/src/concurrency.rs b/src/concurrency.rs index 8f04dbf..9b523d1 100644 --- a/src/concurrency.rs +++ b/src/concurrency.rs @@ -1,10 +1,10 @@ -use pyo3::{exceptions::PyRuntimeError, PyErr, PyResult}; +use pyo3::{exceptions::PyRuntimeError, PyResult}; use zarrs::array::{ codec::CodecOptions, concurrency::calc_concurrency_outer_inner, ArrayCodecTraits, RecommendedConcurrency, }; -use crate::{chunk_item::ChunksItem, CodecPipelineImpl}; +use crate::{chunk_item::ChunksItem, utils::PyErrExt as _, CodecPipelineImpl}; pub trait ChunkConcurrentLimitAndCodecOptions { fn get_chunk_concurrent_limit_and_codec_options( @@ -30,7 +30,7 @@ where let codec_concurrency = codec_pipeline_impl .codec_chain .recommended_concurrency(chunk_representation) - .map_err(|err| PyErr::new::(err.to_string()))?; + .map_py_err::()?; let min_concurrent_chunks = std::cmp::min(codec_pipeline_impl.chunk_concurrent_minimum, num_chunks); diff --git a/src/lib.rs b/src/lib.rs index 0fd7553..ec51ede 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,16 +23,20 @@ use zarrs::array::codec::{ StoragePartialDecoder, }; use zarrs::array::{ - copy_fill_value_into, update_array_bytes, Array, ArrayBytes, ArrayBytesFixedDisjointView, + copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArrayMetadata, ArraySize, CodecChain, FillValue, }; use zarrs::array_subset::ArraySubset; -use zarrs::storage::store::MemoryStore; +use zarrs::config::global_config; +use zarrs::metadata::v2::data_type_metadata_v2_to_endianness; +use zarrs::metadata::v3::MetadataV3; +use zarrs::metadata_ext::v2_to_v3::{ + codec_metadata_v2_to_v3, data_type_metadata_v2_to_v3, ArrayMetadataV2ToV3Error, +}; use zarrs::storage::{ReadableWritableListableStorage, StorageHandle, StoreKey}; mod chunk_item; mod concurrency; -mod metadata_v2; mod runtime; mod store; #[cfg(test)] @@ -41,7 +45,6 @@ mod utils; use crate::chunk_item::ChunksItem; use crate::concurrency::ChunkConcurrentLimitAndCodecOptions; -use crate::metadata_v2::codec_metadata_v2_to_v3; use crate::store::StoreConfig; use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _}; @@ -203,6 +206,35 @@ impl CodecPipelineImpl { } } +fn array_metadata_to_codec_metadata_v3( + metadata: ArrayMetadata, +) -> Result, ArrayMetadataV2ToV3Error> { + match metadata { + ArrayMetadata::V3(metadata) => Ok(metadata.codecs), + ArrayMetadata::V2(metadata) => { + let config = global_config(); + let endianness = data_type_metadata_v2_to_endianness(&metadata.dtype) + .map_err(ArrayMetadataV2ToV3Error::InvalidEndianness)?; + let data_type = data_type_metadata_v2_to_v3( + &metadata.dtype, + config.data_type_aliases_v2(), + config.data_type_aliases_v3(), + )?; + + codec_metadata_v2_to_v3( + metadata.order, + metadata.shape.len(), + &data_type, + endianness, + &metadata.filters, + &metadata.compressor, + config.codec_aliases_v2(), + config.codec_aliases_v3(), + ) + } + } +} + #[gen_stub_pymethods] #[pymethods] impl CodecPipelineImpl { @@ -226,11 +258,10 @@ impl CodecPipelineImpl { ) -> PyResult { let metadata: ArrayMetadata = serde_json::from_str(array_metadata).map_py_err::()?; - - // TODO: Add a direct metadata -> codec chain method to zarrs - let store = Arc::new(MemoryStore::new()); - let array = Array::new_with_metadata(store, "/", metadata).map_py_err::()?; - let codec_chain = Arc::new(array.codecs().clone()); + let codec_metadata = + array_metadata_to_codec_metadata_v3(metadata).map_py_err::()?; + let codec_chain = + Arc::new(CodecChain::from_metadata(&codec_metadata).map_py_err::()?); let mut codec_options = CodecOptionsBuilder::new(); if let Some(validate_checksums) = validate_checksums { @@ -470,7 +501,6 @@ fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_function(wrap_pyfunction!(codec_metadata_v2_to_v3, m)?)?; Ok(()) } diff --git a/src/metadata_v2.rs b/src/metadata_v2.rs deleted file mode 100644 index 9c42050..0000000 --- a/src/metadata_v2.rs +++ /dev/null @@ -1,57 +0,0 @@ -use pyo3::{exceptions::PyRuntimeError, pyfunction, PyErr, PyResult}; -use zarrs::metadata::{ - v2::{ArrayMetadataV2Order, MetadataV2}, - v3::MetadataV3, -}; - -#[pyfunction] -#[pyo3_stub_gen::derive::gen_stub_pyfunction] -#[pyo3(signature = (filters=None, compressor=None))] -pub fn codec_metadata_v2_to_v3( - filters: Option>, - compressor: Option, -) -> PyResult> { - // Try and convert filters/compressor to V2 metadata - let filters = if let Some(filters) = filters { - Some( - filters - .into_iter() - .map(|filter| { - serde_json::from_str::(&filter) - .map_err(|err| PyErr::new::(err.to_string())) - }) - .collect::, _>>()?, - ) - } else { - None - }; - let compressor = if let Some(compressor) = compressor { - Some( - serde_json::from_str::(&compressor) - .map_err(|err| PyErr::new::(err.to_string()))?, - ) - } else { - None - }; - - // FIXME: The array order, dimensionality, data type, and endianness are needed to exhaustively support all Zarr V2 data that zarrs can handle. - // However, CodecPipeline.from_codecs does not supply this information, and CodecPipeline.evolve_from_array_spec is seemingly never called. - let metadata = zarrs::metadata_ext::v2_to_v3::codec_metadata_v2_to_v3( - ArrayMetadataV2Order::C, - 0, // unused with C order - &MetadataV3::new("bool"), // FIXME - None, - &filters, - &compressor, - zarrs::config::global_config().codec_aliases_v2(), - zarrs::config::global_config().codec_aliases_v3(), - ) - .map_err(|err| { - // TODO: More informative error messages from zarrs for ArrayMetadataV2ToV3ConversionError - PyErr::new::(err.to_string()) - })?; - Ok(metadata - .into_iter() - .map(|metadata| serde_json::to_string(&metadata).expect("infallible")) // TODO: Add method to zarrs - .collect()) -}