Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions fixtures/v2/data.zarr/.zmetadata
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,38 @@
],
"zarr_format": 2
},
"3d.chunked.mixed.i2.C.fixedscaleoffset/.zarray": {
"chunks": [
3,
3,
1
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i2",
"fill_value": 0,
"filters": [
{
"astype": "<i2",
"dtype": "<i2",
"id": "fixedscaleoffset",
"offset": 1,
"scale": 2
}
],
"order": "C",
"shape": [
3,
3,
3
],
"zarr_format": 2
},
"3d.chunked.mixed.i2.C/.zarray": {
"chunks": [
3,
Expand All @@ -496,6 +528,38 @@
],
"zarr_format": 2
},
"3d.chunked.mixed.i2.F.fixedscaleoffset/.zarray": {
"chunks": [
3,
3,
1
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i2",
"fill_value": 0,
"filters": [
{
"astype": "<i2",
"dtype": "<i2",
"id": "fixedscaleoffset",
"offset": 1,
"scale": 2
}
],
"order": "F",
"shape": [
3,
3,
3
],
"zarr_format": 2
},
"3d.chunked.mixed.i2.F/.zarray": {
"chunks": [
3,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"chunks": [
3,
3,
1
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i2",
"fill_value": 0,
"filters": [
{
"astype": "<i2",
"dtype": "<i2",
"id": "fixedscaleoffset",
"offset": 1,
"scale": 2
}
],
"order": "C",
"shape": [
3,
3,
3
],
"zarr_format": 2
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified fixtures/v2/data.zipped_from_parent.zarr.zip
Binary file not shown.
Binary file modified fixtures/v2/data.zipped_from_within.zarr.zip
Binary file not shown.
10 changes: 10 additions & 0 deletions packages/@zarrita-ndarray/__tests__/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,14 @@ describe("ndarray", () => {
expect(res.shape).toStrictEqual([3, 3, 3]);
expect(res.stride).toStrictEqual([1, 3, 9]);
});

it("3d.chunked.mixed.i2.C.fixedscaleoffset", async () => {
let arr = await zarr.open.v2(store.resolve("/3d.chunked.mixed.i2.C.fixedscaleoffset"), {
kind: "array",
});
let res = await get(arr);
expect(res.data).toStrictEqual(new Int16Array(range(27)));
expect(res.shape).toStrictEqual([3, 3, 3]);
expect(res.stride).toStrictEqual([9, 3, 1]);
});
});
4 changes: 3 additions & 1 deletion packages/zarrita/src/codecs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { Codec as _Codec } from "numcodecs";
import { BitroundCodec } from "./codecs/bitround.js";
import { BytesCodec } from "./codecs/bytes.js";
import { Crc32cCodec } from "./codecs/crc32c.js";
import { FixedScaleOffsetCodec } from "./codecs/fixedscaleoffset.js";
import { GzipCodec } from "./codecs/gzip.js";
import { JsonCodec } from "./codecs/json2.js";
import { TransposeCodec } from "./codecs/transpose.js";
Expand Down Expand Up @@ -35,7 +36,8 @@ function create_default_registry(): Map<string, () => Promise<CodecEntry>> {
.set("crc32c", () => Crc32cCodec)
.set("vlen-utf8", () => VLenUTF8)
.set("json2", () => JsonCodec)
.set("bitround", () => BitroundCodec);
.set("bitround", () => BitroundCodec)
.set("fixedscaleoffset", () => FixedScaleOffsetCodec);
}

export const registry: Map<string, () => Promise<CodecEntry>> =
Expand Down
56 changes: 56 additions & 0 deletions packages/zarrita/src/codecs/fixedscaleoffset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import type { Chunk, NumberDataType, TypedArrayConstructor } from "../metadata.js"
import { coerce_dtype, get_ctr } from "../util.js"

type FixedScaleOffsetConfig = {
offset: number;
scale: number;
dtype: string;
astype?: string;
Comment on lines +7 to +8
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zarr v3 data types can be a string or a JSON object with type {name: string, configuration: object}. See an example here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one i think needs to be restricted to number types; can numeric types also have that format? is that how endianness is stored?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I see in the spec:

Each data type is associated with an identifier, which can be used in metadata documents to refer to the data type. For the data types defined in this specification, the identifier is a simple ASCII string. However, extensions may use any JSON value to identify a data type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and endianness is specified as a bytes codec, I'm inferring.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah the bytes codec sets the endianness of the encoded data, but for decoded data, endianness is up to the implementation

}

export class FixedScaleOffsetCodec<D extends NumberDataType, A extends NumberDataType> {
readonly kind = "array_to_array";

#offset: number;
#scale: number;
#TypedArrayIn: TypedArrayConstructor<D>
#TypedArrayOut: TypedArrayConstructor<A>

constructor(configuration: FixedScaleOffsetConfig) {
const { data_type } = coerce_dtype(configuration.dtype);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testing on v3 has made me realize that coerce_dtype is really only meant for v2 data type strings, so this will need to be adjusted.

this.#TypedArrayIn = get_ctr(data_type as NumberDataType);
const { data_type: as_data_type } = coerce_dtype(configuration.astype ?? configuration.dtype);
this.#TypedArrayOut = get_ctr(as_data_type as NumberDataType);

this.#offset = configuration.offset;
this.#scale = configuration.scale;
}

static fromConfig(configuration: FixedScaleOffsetConfig) {
return new FixedScaleOffsetCodec(configuration);
}

encode(arr: Chunk<D>): Chunk<A> {
const data = new this.#TypedArrayOut(arr.data.length);
arr.data.forEach((value: number, i: number) => {
data[i] = (value - this.#offset) * this.#scale;
});
return {
data,
shape: arr.shape,
stride: arr.stride
}
}

decode(arr: Chunk<A>): Chunk<D> {
const out_data = new this.#TypedArrayIn(arr.data.length);
arr.data.forEach((value: number, i: number) => {
out_data[i] = (value / this.#scale) + this.#offset;
});
return {
data: out_data,
shape: arr.shape,
stride: arr.stride,
};
}
}
2 changes: 1 addition & 1 deletion packages/zarrita/src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ export function create_chunk_key_encoder({
throw new Error(`Unknown chunk key encoding: ${name}`);
}

function coerce_dtype(
export function coerce_dtype(
dtype: string,
): { data_type: DataType } | { data_type: DataType; endian: "little" | "big" } {
if (dtype === "|O") {
Expand Down
21 changes: 20 additions & 1 deletion scripts/generate-v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import zarr
import numpy as np
from numcodecs import Zlib, Blosc, LZ4, Zstd, VLenUTF8
from numcodecs import Zlib, Blosc, LZ4, Zstd, VLenUTF8, FixedScaleOffset

SELF_DIR = pathlib.Path(__file__).parent

Expand Down Expand Up @@ -170,6 +170,25 @@
chunks=(3, 3, 1),
)

# 3d.chunked.mixed.i2.C.fixedscaleoffset
root.create_dataset(
"3d.chunked.mixed.i2.F.fixedscaleoffset",
data=np.arange(27).reshape(3, 3, 3),
order="F",
dtype="i2",
chunks=(3, 3, 1),
filters=[FixedScaleOffset(offset=1, scale=2, dtype="i2")],
)

# 3d.chunked.mixed.i2.C.fixedscaleoffset
root.create_dataset(
"3d.chunked.mixed.i2.C.fixedscaleoffset",
data=np.arange(27).reshape(3, 3, 3),
order="C",
dtype="i2",
chunks=(3, 3, 1),
filters=[FixedScaleOffset(offset=1, scale=2, dtype="i2")],
)

# 3d.chunked.o
data = np.array(
Expand Down
Loading