Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/vast-things-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"zarrita": minor
---

Added optional chunk cache for use with zarr.get
201 changes: 201 additions & 0 deletions packages/zarrita/__tests__/indexing/get.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import { describe, expect, it } from "vitest";

import * as zarr from "../../src/index.js";
import { get } from "../../src/indexing/ops.js";
import type { ChunkCache } from "../../src/indexing/types.js";
import { range } from "../../src/indexing/util.js";
import type { Chunk, DataType } from "../../src/metadata.js";

let __dirname = path.dirname(url.fileURLToPath(import.meta.url));

Expand Down Expand Up @@ -661,3 +663,202 @@ describe("get v3", () => {
expect(res.stride).toStrictEqual([1, 3, 9]);
});
});

describe("chunk caching", () => {
async function get_array_with_cache(): Promise<zarr.Array<"int16">> {
let root = path.resolve(__dirname, "../../../../fixtures/v3/data.zarr");
let store = zarr.root(new FileSystemStore(root));
return zarr.open.v3(store.resolve("/2d.chunked.i2"), {
kind: "array",
}) as Promise<zarr.Array<"int16">>;
}

it("should work without cache (default behavior)", async () => {
let arr = await get_array_with_cache();
let result = await get(arr, null);

expect(result.data).toStrictEqual(new Int16Array([1, 2, 3, 4]));
expect(result.shape).toStrictEqual([2, 2]);
});

it("should work with Map as cache", async () => {
let arr = await get_array_with_cache();
let cache = new Map();

// First call should populate cache
let result1 = await get(arr, null, { cache });
expect(result1.data).toStrictEqual(new Int16Array([1, 2, 3, 4]));
const cacheSize = cache.size;
expect(cacheSize).toBeGreaterThan(0);

// Second call should use cache
let result2 = await get(arr, null, { cache });
expect(result2).toStrictEqual(result1);
expect(cache.size).toBe(cacheSize);
});

it("should cache chunks with proper keys", async () => {
let arr = await get_array_with_cache();
let cache = new Map();

await get(arr, null, { cache });

let keys = Array.from(cache.keys());
expect(keys.length).toBeGreaterThan(0);

// Keys should contain store ID, array path and chunk coordinates
for (let key of keys) {
expect(key).toMatch(
/^store_\d+:\/2d\.chunked\.i2:c[.\\/]\d+([.\\/]\d+)*$/,
);
}
});

it("should reuse cached chunks across multiple calls", async () => {
let arr = await get_array_with_cache();
let cache = new Map();

let originalGetChunk = arr.getChunk;
let getChunkCallCount = 0;
arr.getChunk = async (...args) => {
getChunkCallCount++;
return originalGetChunk.call(arr, ...args);
};

// First call
await get(arr, null, { cache });
let firstCallCount = getChunkCallCount;
expect(firstCallCount).toBeGreaterThan(0);

// Second call should not fetch chunks again
await get(arr, null, { cache });
expect(getChunkCallCount).toBe(firstCallCount);

// Restore original method
arr.getChunk = originalGetChunk;
});

it("should work with custom cache implementation", async () => {
let arr = await get_array_with_cache();

// Custom cache that tracks operations
let operations: string[] = [];
let cache: ChunkCache = {
get(key: string) {
operations.push(`get:${key}`);
return undefined; // Always miss for this test
},
set(key: string, _value: Chunk<DataType>) {
operations.push(`set:${key}`);
},
};

let result = await get(arr, null, { cache });

expect(result.data).toStrictEqual(new Int16Array([1, 2, 3, 4]));
expect(operations.length).toBeGreaterThan(0);
expect(operations.some((op) => op.startsWith("get:"))).toBe(true);
expect(operations.some((op) => op.startsWith("set:"))).toBe(true);
});

it("should handle cache hits correctly", async () => {
let arr = await get_array_with_cache();

// First, find out what cache keys are needed by doing a real call
let cache = new Map();
await get(arr, null, { cache });
let realKeys = Array.from(cache.keys());
let realValues = Array.from(cache.values());

// Clear cache and pre-populate ALL chunks with fake data using the correct keys
cache.clear();
for (let i = 0; i < realKeys.length; i++) {
let fakeChunkData = {
data: new Int16Array([99, 98, 97, 96]), // Use same fake data for all chunks
shape: realValues[i].shape, // Use original shape
stride: realValues[i].stride, // Use original stride
};
cache.set(realKeys[i], fakeChunkData);
}

// Mock getChunk to ensure it's not called
let originalGetChunk = arr.getChunk;
arr.getChunk = async () => {
throw new Error("getChunk should not be called when cache hits");
};

try {
let result = await get(arr, null, { cache });
// Should get some fake data (exact result depends on how chunks are assembled)
expect(result.data).toBeInstanceOf(Int16Array);
expect(result.shape).toStrictEqual([2, 2]);
} finally {
// Restore original method
arr.getChunk = originalGetChunk;
}
});

it("should handle different arrays with separate cache entries", async () => {
let root = path.resolve(__dirname, "../../../../fixtures/v3/data.zarr");
let store = zarr.root(new FileSystemStore(root));

let arr1 = await zarr.open.v3(store.resolve("/1d.contiguous.raw.i2"), {
kind: "array",
});
let arr2 = await zarr.open.v3(store.resolve("/2d.contiguous.i2"), {
kind: "array",
});

let cache = new Map();

await get(arr1, null, { cache });
await get(arr2, null, { cache });

// Should have separate cache entries for different arrays
let keys = Array.from(cache.keys());
expect(keys.some((k) => k.includes("/1d.contiguous.raw.i2:"))).toBe(true);
expect(keys.some((k) => k.includes("/2d.contiguous.i2:"))).toBe(true);
});

it("should handle multiple stores", async () => {
let root1 = path.resolve(__dirname, "../../../../fixtures/v3/data.zarr");
let store1 = zarr.root(new FileSystemStore(root1));
let arr1 = await zarr.open.v3(store1.resolve("/2d.chunked.i2"), {
kind: "array",
});

let root2 = path.resolve(__dirname, "../../../../fixtures/v2/data.zarr");
let store2 = zarr.root(new FileSystemStore(root2));
let arr2 = await zarr.open.v2(store2.resolve("/1d.contiguous.i4"), {
kind: "array",
});

let cache = new Map();

await get(arr1, null, { cache });
await get(arr2, null, { cache });

expect(cache.size).toBeGreaterThan(0);
const storePrefixes = new Set(
Array.from(cache.keys()).map((key) => {
return key.split(":")[0]; // Extract store ID from cache key
}),
);
expect(storePrefixes.size).toBe(2); // Should have entries for both stores
});

it("should work with sliced access using cache", async () => {
let arr = await get_array_with_cache();
let cache = new Map();

// Access a slice
let result = await get(arr, [zarr.slice(0, 1), null], { cache });

expect(result.shape).toStrictEqual([1, 2]);
expect(cache.size).toBeGreaterThan(0);

// Access another slice that might reuse same chunks
let result2 = await get(arr, [zarr.slice(1, 2), null], { cache });
expect(result2.shape).toStrictEqual([1, 2]);
});
});
51 changes: 48 additions & 3 deletions packages/zarrita/src/indexing/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { type Array, get_context } from "../hierarchy.js";
import type { Chunk, DataType, Scalar, TypedArray } from "../metadata.js";
import { BasicIndexer } from "./indexer.js";
import type {
ChunkCache,
GetOptions,
Prepare,
SetFromChunk,
Expand All @@ -12,6 +13,32 @@ import type {
} from "./types.js";
import { create_queue } from "./util.js";

const NULL_CACHE: ChunkCache = {
get: () => undefined,
set: () => {},
};

// WeakMap to assign unique IDs to store instances
const storeIdMap = new WeakMap<object, number>();
let storeIdCounter = 0;

function getStoreId(store: Readable): string {
if (!storeIdMap.has(store)) {
storeIdMap.set(store, storeIdCounter++);
}
return `store_${storeIdMap.get(store)}`;
}

function createCacheKey(
arr: Array<DataType, Readable>,
chunk_coords: number[],
): string {
let context = get_context(arr);
let chunkKey = context.encode_chunk_key(chunk_coords);
let storeId = getStoreId(arr.store);
return `${storeId}:${arr.path}:${chunkKey}`;
}

function unwrap<D extends DataType>(
arr: TypedArray<D>,
idx: number,
Expand Down Expand Up @@ -50,11 +77,29 @@ export async function get<
);

let queue = opts.create_queue?.() ?? create_queue();
let cache = opts.cache ?? NULL_CACHE;
for (const { chunk_coords, mapping } of indexer) {
queue.add(async () => {
let { data, shape, stride } = await arr.getChunk(chunk_coords, opts.opts);
let chunk = setter.prepare(data, shape, stride);
setter.set_from_chunk(out, chunk, mapping);
let cacheKey = createCacheKey(arr, chunk_coords);
let cachedChunk = cache.get(cacheKey);

if (cachedChunk) {
let chunk = setter.prepare(
cachedChunk.data as TypedArray<D>,
cachedChunk.shape,
cachedChunk.stride,
);
setter.set_from_chunk(out, chunk, mapping);
} else {
let chunkData = await arr.getChunk(chunk_coords, opts.opts);
cache.set(cacheKey, chunkData);
let chunk = setter.prepare(
chunkData.data,
chunkData.shape,
chunkData.stride,
);
setter.set_from_chunk(out, chunk, mapping);
}
});
}

Expand Down
6 changes: 6 additions & 0 deletions packages/zarrita/src/indexing/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ export type SetFromChunk<D extends DataType, NdArray extends Chunk<D>> = (
proj: Projection[],
) => void;

export interface ChunkCache {
get(key: string): Chunk<DataType> | undefined;
set(key: string, value: Chunk<DataType>): void;
}

export type Setter<D extends DataType, Arr extends Chunk<D>> = {
prepare: Prepare<D, Arr>;
set_from_chunk: SetFromChunk<D, Arr>;
Expand All @@ -42,6 +47,7 @@ export type Setter<D extends DataType, Arr extends Chunk<D>> = {

export type Options = {
create_queue?: () => ChunkQueue;
cache?: ChunkCache;
};

export type GetOptions<O> = Options & { opts?: O };
Expand Down