Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion packages/zarrita/src/codecs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { Codec as _Codec } from "numcodecs";
import { BitroundCodec } from "./codecs/bitround.js";
import { BytesCodec } from "./codecs/bytes.js";
import { Crc32cCodec } from "./codecs/crc32c.js";
import { FixedScaleOffsetCodec } from "./codecs/fixedscaleoffset.js";
import { GzipCodec } from "./codecs/gzip.js";
import { JsonCodec } from "./codecs/json2.js";
import { TransposeCodec } from "./codecs/transpose.js";
Expand Down Expand Up @@ -35,7 +36,8 @@ function create_default_registry(): Map<string, () => Promise<CodecEntry>> {
.set("crc32c", () => Crc32cCodec)
.set("vlen-utf8", () => VLenUTF8)
.set("json2", () => JsonCodec)
.set("bitround", () => BitroundCodec);
.set("bitround", () => BitroundCodec)
.set("fixedscaleoffset", () => FixedScaleOffsetCodec);
}

export const registry: Map<string, () => Promise<CodecEntry>> =
Expand Down
56 changes: 56 additions & 0 deletions packages/zarrita/src/codecs/fixedscaleoffset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import type { Chunk, NumberDataType, TypedArrayConstructor } from "../metadata.js"
import { coerce_dtype, get_ctr } from "../util.js"

type FixedScaleOffsetConfig = {
offset: number;
scale: number;
dtype: string;
astype?: string;
Comment on lines +7 to +8
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zarr v3 data types can be a string or a JSON object with type {name: string, configuration: object}. See an example here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one i think needs to be restricted to number types; can numeric types also have that format? is that how endianness is stored?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I see in the spec:

Each data type is associated with an identifier, which can be used in metadata documents to refer to the data type. For the data types defined in this specification, the identifier is a simple ASCII string. However, extensions may use any JSON value to identify a data type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and endianness is specified as a bytes codec, I'm inferring.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah the bytes codec sets the endianness of the encoded data, but for decoded data, endianness is up to the implementation

}

export class FixedScaleOffsetCodec<D extends NumberDataType, A extends NumberDataType> {
readonly kind = "array_to_array";

#offset: number;
#scale: number;
#TypedArrayIn: TypedArrayConstructor<D>
#TypedArrayOut: TypedArrayConstructor<A>

constructor(configuration: FixedScaleOffsetConfig) {
const { data_type } = coerce_dtype(configuration.dtype);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testing on v3 has made me realize that coerce_dtype is really only meant for v2 data type strings, so this will need to be adjusted.

this.#TypedArrayIn = get_ctr(data_type as NumberDataType);
const { data_type: as_data_type } = coerce_dtype(configuration.astype ?? configuration.dtype);
this.#TypedArrayOut = get_ctr(as_data_type as NumberDataType);

this.#offset = configuration.offset;
this.#scale = configuration.scale;
}

static fromConfig(configuration: FixedScaleOffsetConfig) {
return new FixedScaleOffsetCodec(configuration);
}

encode(arr: Chunk<D>): Chunk<A> {
const data = new this.#TypedArrayOut(arr.data.length);
arr.data.forEach((value: number, i: number) => {
data[i] = (value - this.#offset) * this.#scale;
});
return {
data,
shape: arr.shape,
stride: arr.stride
}
}

decode(arr: Chunk<A>): Chunk<D> {
const out_data = new this.#TypedArrayIn(arr.data.length);
arr.data.forEach((value: number, i: number) => {
out_data[i] = (value / this.#scale) + this.#offset;
});
return {
data: out_data,
shape: arr.shape,
stride: arr.stride,
};
}
}
2 changes: 1 addition & 1 deletion packages/zarrita/src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ export function create_chunk_key_encoder({
throw new Error(`Unknown chunk key encoding: ${name}`);
}

function coerce_dtype(
export function coerce_dtype(
dtype: string,
): { data_type: DataType } | { data_type: DataType; endian: "little" | "big" } {
if (dtype === "|O") {
Expand Down