diff --git a/packages/hub/src/lib/list-files.spec.ts b/packages/hub/src/lib/list-files.spec.ts index 2626380184..ee59a78a02 100644 --- a/packages/hub/src/lib/list-files.spec.ts +++ b/packages/hub/src/lib/list-files.spec.ts @@ -60,7 +60,7 @@ describe("listFiles", () => { type: "file", }, ]); - }, 30_000); + }); it("should fetch the list of files from the repo, including last commit", async () => { const cursor = listFiles({ @@ -146,7 +146,7 @@ describe("listFiles", () => { type: "file", }, ]); - }, 30_000); + }); it("should fetch the list of files from the repo, including subfolders", async () => { const cursor = listFiles({ @@ -165,5 +165,5 @@ describe("listFiles", () => { } assert(files.some((file) => file.path === "data/XSUM-EMNLP18-Summary-Data-Original.tar.gz")); - }, 30_000); + }); }); diff --git a/packages/hub/src/lib/parse-safetensors-metadata.spec.ts b/packages/hub/src/lib/parse-safetensors-metadata.spec.ts index 71077e3bbb..d96f5ed650 100644 --- a/packages/hub/src/lib/parse-safetensors-metadata.spec.ts +++ b/packages/hub/src/lib/parse-safetensors-metadata.spec.ts @@ -49,7 +49,7 @@ describe("parseSafetensorsMetadata", () => { assert.deepStrictEqual(parse.parameterCount, { BF16: 176_247_271_424 }); assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 176_247_271_424); // total params = 176B - }, 30_000); + }); it("fetch info for single-file with multiple dtypes", async () => { const parse = await parseSafetensorsMetadata({ diff --git a/packages/hub/src/lib/upload-file.spec.ts b/packages/hub/src/lib/upload-file.spec.ts index cc760de919..af75349631 100644 --- a/packages/hub/src/lib/upload-file.spec.ts +++ b/packages/hub/src/lib/upload-file.spec.ts @@ -94,5 +94,5 @@ describe("uploadFile", () => { hubUrl: TEST_HUB_URL, }); } - }, 30_000); + }); }); diff --git a/packages/hub/src/utils/RangeList.spec.ts b/packages/hub/src/utils/RangeList.spec.ts new file mode 100644 index 0000000000..e05f85a8fa --- /dev/null +++ b/packages/hub/src/utils/RangeList.spec.ts @@ -0,0 +1,96 @@ +import { describe, it, expect } from "vitest"; +import { RangeList } from "./RangeList"; + +describe("RangeList", () => { + it("should add a single range", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + + const ranges = rangeList.getAllRanges(); + expect(ranges).toHaveLength(1); + expect(ranges[0]).toEqual({ + start: 1, + end: 100, + refCount: 1, + data: null, + }); + }); + + it("should handle overlapping ranges", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + rangeList.add(30, 50); + + const ranges = rangeList.getAllRanges(); + expect(ranges).toHaveLength(3); + expect(ranges).toEqual([ + { start: 1, end: 30, refCount: 1, data: null }, + { start: 30, end: 50, refCount: 2, data: null }, + { start: 50, end: 100, refCount: 1, data: null }, + ]); + }); + + it("should remove a range at existing boundaries", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + rangeList.add(30, 50); + rangeList.remove(30, 50); + + const ranges = rangeList.getAllRanges(); + expect(ranges).toHaveLength(3); + expect(ranges).toEqual([ + { start: 1, end: 30, refCount: 1, data: null }, + { start: 30, end: 50, refCount: 1, data: null }, + { start: 50, end: 100, refCount: 1, data: null }, + ]); + }); + + it("should throw error when removing range at non-existing boundaries", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + rangeList.add(30, 50); + + expect(() => rangeList.remove(2, 50)).toThrow("Range boundaries must match existing boundaries"); + }); + + it("should get ranges within boundaries", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + rangeList.add(30, 50); + + const ranges = rangeList.getRanges(30, 100); + expect(ranges).toHaveLength(2); + expect(ranges).toEqual([ + { start: 30, end: 50, refCount: 2, data: null }, + { start: 50, end: 100, refCount: 1, data: null }, + ]); + }); + + it("should throw error when end is less than or equal to start", () => { + const rangeList = new RangeList(); + + expect(() => rangeList.add(100, 1)).toThrow("End must be greater than start"); + expect(() => rangeList.add(1, 1)).toThrow("End must be greater than start"); + expect(() => rangeList.remove(100, 1)).toThrow("End must be greater than start"); + expect(() => rangeList.remove(1, 1)).toThrow("End must be greater than start"); + expect(() => rangeList.getRanges(100, 1)).toThrow("End must be greater than start"); + expect(() => rangeList.getRanges(1, 1)).toThrow("End must be greater than start"); + }); + + it("should handle multiple overlapping ranges", () => { + const rangeList = new RangeList(); + rangeList.add(1, 100); + rangeList.add(30, 50); + rangeList.add(40, 60); + + const ranges = rangeList.getAllRanges(); + expect(ranges).toHaveLength(5); + expect(ranges).toEqual([ + { start: 1, end: 30, refCount: 1, data: null }, + { start: 30, end: 40, refCount: 2, data: null }, + { start: 40, end: 50, refCount: 3, data: null }, + { start: 50, end: 60, refCount: 2, data: null }, + { start: 60, end: 100, refCount: 1, data: null }, + ]); + }); +}); diff --git a/packages/hub/src/utils/RangeList.ts b/packages/hub/src/utils/RangeList.ts new file mode 100644 index 0000000000..69137fe69f --- /dev/null +++ b/packages/hub/src/utils/RangeList.ts @@ -0,0 +1,179 @@ +/** + * Code generated with this prompt by Cursor: + * + * I want to build a class to manage ranges + * + * I can add ranges to it with a start& an end (both integer, end > start). It should store those ranges efficently. + * + * When several ranges overlap, eg [1, 100] and [30, 50], I want the class to split the range into non-overlapping ranges, and add a "ref counter" to the ranges. For example, [1, 30], [30, 50] * 2, [50, 100] + * + * I also want to be able to remove ranges, it will decrease the ref counter or remove the range altogether. I can only remove ranges at existing boundaries. For example, with the [1, 30], [30, 50] * 2, [50, 100] configuration + * + * - removing [1, 100] => the only range remaning is [30, 50] + * - removing [2, 50] => error, because "2' is not a boundary + * - removing [30, 50] => [1, 30], [30, 50], [50, 100] (do not "merge" the ranges back together) + * + * I want to be able to associate data to each range. And I want to be able to get the ranges inside boundaries. For example , with [1, 30], [30, 50] * 2, [50, 100] configuration + * + * - getting [30, 100] => I receive [30, 50] * 2, [50, 100], and I can get / modify the data assocaited to each range by accessing their data prop. Note the "*2" is just the ref counter, there is onlly one range object for the interval returned + * - getting [2, 50] => I get [30, 50] * 2 + * + * ---- + * + * Could optimize with binary search, but the ranges we want to handle are not that many. + */ +interface Range { + start: number; + end: number; + refCount: number; + data: T | null; +} + +export class RangeList { + private ranges: Range[] = []; + + /** + * Add a range to the list. If it overlaps with existing ranges, + * it will split them and increment reference counts accordingly. + */ + add(start: number, end: number): void { + if (end <= start) { + throw new TypeError("End must be greater than start"); + } + + // Find all ranges that overlap with the new range + const overlappingRanges: { index: number; range: Range }[] = []; + for (let i = 0; i < this.ranges.length; i++) { + const range = this.ranges[i]; + if (start < range.end && end > range.start) { + overlappingRanges.push({ index: i, range }); + } + if (range.data !== null) { + throw new Error("Overlapping range already has data"); + } + } + + if (overlappingRanges.length === 0) { + // No overlaps, just add the new range + this.ranges.push({ start, end, refCount: 1, data: null }); + this.ranges.sort((a, b) => a.start - b.start); + return; + } + + // Handle overlaps by splitting ranges + const newRanges: Range[] = []; + let currentPos = start; + + for (let i = 0; i < overlappingRanges.length; i++) { + const { range } = overlappingRanges[i]; + + // Add range before overlap if exists + if (currentPos < range.start) { + newRanges.push({ + start: currentPos, + end: range.start, + refCount: 1, + data: null, + }); + } else if (range.start < currentPos) { + newRanges.push({ + start: range.start, + end: currentPos, + refCount: range.refCount, + data: null, + }); + } + + // Add overlapping part with increased ref count + newRanges.push({ + start: Math.max(currentPos, range.start), + end: Math.min(end, range.end), + refCount: range.refCount + 1, + data: null, + }); + + // Add remaining part of existing range if exists + if (range.end > end) { + newRanges.push({ + start: end, + end: range.end, + refCount: range.refCount, + data: null, + }); + } + + currentPos = Math.max(currentPos, range.end); + } + + // Add remaining part after last overlap if exists + if (currentPos < end) { + newRanges.push({ + start: currentPos, + end, + refCount: 1, + data: null, + }); + } + + // Remove old overlapping ranges and insert new ones + const firstIndex = overlappingRanges[0].index; + const lastIndex = overlappingRanges[overlappingRanges.length - 1].index; + this.ranges.splice(firstIndex, lastIndex - firstIndex + 1, ...newRanges); + this.ranges.sort((a, b) => a.start - b.start); + } + + /** + * Remove a range from the list. The range must start and end at existing boundaries. + */ + remove(start: number, end: number): void { + if (end <= start) { + throw new TypeError("End must be greater than start"); + } + + // Find ranges that need to be modified + const affectedRanges: { index: number; range: Range }[] = []; + for (let i = 0; i < this.ranges.length; i++) { + const range = this.ranges[i]; + if (start < range.end && end > range.start) { + affectedRanges.push({ index: i, range }); + } + } + + if (affectedRanges.length === 0) { + throw new Error("No ranges found to remove"); + } + + // Verify boundaries match + if (start !== affectedRanges[0].range.start || end !== affectedRanges[affectedRanges.length - 1].range.end) { + throw new Error("Range boundaries must match existing boundaries"); + } + + // Todo: also check if there's a gap in the middle but it should not happen with our usage + + for (let i = 0; i < affectedRanges.length; i++) { + const { range } = affectedRanges[i]; + + range.refCount--; + } + + this.ranges = this.ranges.filter((range) => range.refCount > 0); + } + + /** + * Get all ranges within the specified boundaries. + */ + getRanges(start: number, end: number): Range[] { + if (end <= start) { + throw new TypeError("End must be greater than start"); + } + + return this.ranges.filter((range) => start < range.end && end > range.start); + } + + /** + * Get all ranges in the list + */ + getAllRanges(): Range[] { + return [...this.ranges]; + } +} diff --git a/packages/hub/src/utils/WebBlob.spec.ts b/packages/hub/src/utils/WebBlob.spec.ts index 2cbd641980..68ad69e0d3 100644 --- a/packages/hub/src/utils/WebBlob.spec.ts +++ b/packages/hub/src/utils/WebBlob.spec.ts @@ -58,7 +58,7 @@ describe("WebBlob", () => { expect(webBlob).toBeInstanceOf(WebBlob); expect(webBlob).toMatchObject({ url }); expect(await webBlob.slice(10, 22).text()).toBe("__metadata__"); - }, 30_000); + }); it("should lazy load a Xet file hosted on Hugging Face", async () => { const stableDiffusionUrl = @@ -70,7 +70,7 @@ describe("WebBlob", () => { expect(webBlob).toBeInstanceOf(WebBlob); expect(webBlob).toMatchObject({ url }); expect(await webBlob.slice(10, 22).text()).toBe("__metadata__"); - }, 30_000); + }); it("should create a slice on the file", async () => { const expectedText = fullText.slice(10, 20); diff --git a/packages/hub/src/utils/XetBlob.spec.ts b/packages/hub/src/utils/XetBlob.spec.ts index 04ab2c330b..3539ab3473 100644 --- a/packages/hub/src/utils/XetBlob.spec.ts +++ b/packages/hub/src/utils/XetBlob.spec.ts @@ -1,5 +1,7 @@ import { describe, expect, it } from "vitest"; +import type { ReconstructionInfo } from "./XetBlob"; import { bg4_regoup_bytes, XetBlob } from "./XetBlob"; +import { sum } from "./sum"; describe("XetBlob", () => { it("should lazy load the first 22 bytes", async () => { @@ -13,7 +15,7 @@ describe("XetBlob", () => { }); expect(await blob.slice(10, 22).text()).toBe("__metadata__"); - }, 30_000); + }); it("should load the first chunk correctly", async () => { let xorbCount = 0; @@ -44,7 +46,7 @@ describe("XetBlob", () => { expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload)); expect(xorbCount).toBe(1); - }, 30_000); + }); it("should load just past the first chunk correctly", async () => { let xorbCount = 0; @@ -73,15 +75,47 @@ describe("XetBlob", () => { } ).then((res) => res.arrayBuffer()); - expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload)); expect(xetDownload.byteLength).toBe(29929); + expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload)); expect(xorbCount).toBe(2); }); - // In github actions, this test doesn't work inside the browser, but it works locally - // inside both chrome and chromium browsers - // TODO: figure out why + // Doesn't work in chrome due to caching issues, it caches the partial output when the + // fetch is interrupted in the previous test and then uses that cached output in this test (that requires more data) if (typeof window === "undefined") { + it("should load the first 200kB correctly", async () => { + let xorbCount = 0; + const blob = new XetBlob({ + repo: { + type: "model", + name: "celinah/xet-experiments", + }, + hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b", + size: 5_234_139_343, + fetch: async (url, opts) => { + if (typeof url === "string" && url.includes("/xorbs/")) { + xorbCount++; + } + return fetch(url, opts); + }, + internalLogging: true, + }); + + const xetDownload = await blob.slice(0, 200_000).arrayBuffer(); + const bridgeDownload = await fetch( + "https://huggingface.co/celinah/xet-experiments/resolve/main/model5GB.safetensors", + { + headers: { + Range: "bytes=0-199999", + }, + } + ).then((res) => res.arrayBuffer()); + + expect(xetDownload.byteLength).toBe(200_000); + expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload)); + expect(xorbCount).toBe(2); + }, 60_000); + it("should load correctly when loading far into a chunk range", async () => { const blob = new XetBlob({ repo: { @@ -90,6 +124,7 @@ describe("XetBlob", () => { }, hash: "7b3b6d07673a88cf467e67c1f7edef1a8c268cbf66e9dd9b0366322d4ab56d9b", size: 5_234_139_343, + internalLogging: true, }); const xetDownload = await blob.slice(10_000_000, 10_100_000).arrayBuffer(); @@ -103,8 +138,9 @@ describe("XetBlob", () => { ).then((res) => res.arrayBuffer()); console.log("xet", xetDownload.byteLength, "bridge", bridgeDownload.byteLength); + expect(new Uint8Array(xetDownload).length).toEqual(100_000); expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload)); - }, 30_000); + }); } it("should load text correctly when offset_into_range starts in a chunk further than the first", async () => { @@ -155,7 +191,7 @@ describe("XetBlob", () => { console.log("xet", text.length, "bridge", bridgeDownload.length); expect(text.length).toBe(bridgeDownload.length); - }, 30_000); + }); describe("bg4_regoup_bytes", () => { it("should regroup bytes when the array is %4 length", () => { @@ -182,4 +218,711 @@ describe("XetBlob", () => { ); }); }); + + describe("when mocked", () => { + describe("loading many chunks every read", () => { + it("should load different slices", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(1000) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]); + + const mergedChunks = await new Blob(chunks).arrayBuffer(); + const wholeText = (chunk1Content + chunk2Content).repeat(1000); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: Array(1000) + .fill(0) + .map(() => ({ + hash: "test", + range: { + start: 0, + end: 2, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + })), + fetch_info: { + test: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: mergedChunks.byteLength / 1000 - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + controller.enqueue(new Uint8Array(mergedChunks)); + controller.close(); + }, + }) + //mergedChunks + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, 2000, totalSize - 12, totalSize - 2]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index).text(); + expect(content.length).toBe(wholeText.length - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(debugged.filter((e) => e.event === "read").length).toBe(2); // 1 read + 1 undefined + expect(fetchCount).toEqual(1); + + fetchCount = 0; + debugged.length = 0; + } + }); + + it("should load different slices when working with different XORBS", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(1000) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]); + + const mergedChunks = await new Blob(chunks).arrayBuffer(); + const wholeText = (chunk1Content + chunk2Content).repeat(1000); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: Array(1000) + .fill(0) + .map((_, i) => ({ + hash: "test" + (i % 2), + range: { + start: 0, + end: 2, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + })), + fetch_info: { + test0: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: mergedChunks.byteLength - 1, + }, + }, + ], + test1: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: mergedChunks.byteLength - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + controller.enqueue(new Uint8Array(mergedChunks)); + controller.close(); + }, + }) + //mergedChunks + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, 2000, totalSize - 12, totalSize - 2]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index).text(); + expect(content.length).toBe(wholeText.length - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(debugged.filter((e) => e.event === "read").length).toBe(4); // 1 read + 1 undefined + expect(fetchCount).toEqual(2); + + fetchCount = 0; + debugged.length = 0; + } + }); + }); + + describe("loading one chunk at a time", () => { + it("should load different slices but not till the end", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(1000) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]); + + const totalChunkLength = sum(chunks.map((x) => x.byteLength)); + const wholeText = (chunk1Content + chunk2Content).repeat(1000); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: [ + { + hash: "test", + range: { + start: 0, + end: 2000, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + }, + ], + fetch_info: { + test: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2000 }, + url_range: { + start: 0, + end: totalChunkLength - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + for (const chunk of chunks) { + controller.enqueue(chunk); + } + controller.close(); + }, + }), + { + headers: { + "Content-Range": `bytes 0-${totalChunkLength - 1}/${totalChunkLength}`, + ETag: `"test"`, + "Content-Length": `${totalChunkLength}`, + }, + } + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, 2000]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index, 4000).text(); + expect(content.length).toBe(4000 - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(fetchCount).toEqual(1); + + fetchCount = 0; + debugged.length = 0; + } + }); + + it("should load different slices", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(1000) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]); + + const totalChunkLength = sum(chunks.map((x) => x.byteLength)); + const wholeText = (chunk1Content + chunk2Content).repeat(1000); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: Array(1000) + .fill(0) + .map(() => ({ + hash: "test", + range: { + start: 0, + end: 2, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + })), + fetch_info: { + test: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: totalChunkLength - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + for (const chunk of chunks) { + controller.enqueue(chunk); + } + controller.close(); + }, + }) + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, 2000, totalSize - 12, totalSize - 2]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index).text(); + expect(content.length).toBe(wholeText.length - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(debugged.filter((e) => e.event === "read").length).toBe(2000 + 1); // 1 read for each chunk + 1 undefined + expect(fetchCount).toEqual(1); + + fetchCount = 0; + debugged.length = 0; + } + }); + }); + + describe("loading at 29 bytes intervals", () => { + it("should load different slices", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(1000) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]); + const mergedChunks = await new Blob(chunks).arrayBuffer(); + const splitChunks = splitChunk(new Uint8Array(mergedChunks), 29); + + const totalChunkLength = sum(chunks.map((x) => x.byteLength)); + const wholeText = (chunk1Content + chunk2Content).repeat(1000); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: Array(1000) + .fill(0) + .map(() => ({ + hash: "test", + range: { + start: 0, + end: 2, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + })), + fetch_info: { + test: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: totalChunkLength - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + for (const chunk of splitChunks) { + controller.enqueue(chunk); + } + controller.close(); + }, + }) + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, 2000, totalSize - 12, totalSize - 2]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index).text(); + expect(content.length).toBe(wholeText.length - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(debugged.filter((e) => e.event === "read").length).toBe(Math.ceil(totalChunkLength / 29) + 1); // 1 read for each chunk + 1 undefined + expect(fetchCount).toEqual(1); + + fetchCount = 0; + debugged.length = 0; + } + }); + }); + + describe("loading one byte at a time", () => { + it("should load different slices", async () => { + const chunk1Content = "hello"; + const chunk2Content = "world!"; + const debugged: Array<{ event: "read" | string } & Record> = []; + + const chunks = Array(100) + .fill(0) + .flatMap(() => [makeChunk(chunk1Content), makeChunk(chunk2Content)]) + .flatMap((x) => splitChunk(x, 1)); + + const totalChunkLength = sum(chunks.map((x) => x.byteLength)); + const wholeText = (chunk1Content + chunk2Content).repeat(100); + + const totalSize = wholeText.length; + let fetchCount = 0; + + const blob = new XetBlob({ + hash: "test", + repo: { + name: "test", + type: "model", + }, + size: totalSize, + hubUrl: "https://huggingface.co", + listener: (e) => debugged.push(e), + fetch: async function (_url, opts) { + const url = new URL(_url as string); + const headers = opts?.headers as Record | undefined; + + switch (url.hostname) { + case "huggingface.co": { + // This is a token + return new Response( + JSON.stringify({ + casUrl: "https://cas.co", + accessToken: "boo", + exp: 1_000_000, + }) + ); + } + case "cas.co": { + // This is the reconstruction info + const range = headers?.["Range"]?.slice("bytes=".length).split("-").map(Number); + + const start = range?.[0] ?? 0; + // const end = range?.[1] ?? (totalSize - 1); + + return new Response( + JSON.stringify({ + terms: Array(100) + .fill(0) + .map(() => ({ + hash: "test", + range: { + start: 0, + end: 2, + }, + unpacked_length: chunk1Content.length + chunk2Content.length, + })), + fetch_info: { + test: [ + { + url: "https://fetch.co", + range: { start: 0, end: 2 }, + url_range: { + start: 0, + end: totalChunkLength - 1, + }, + }, + ], + }, + offset_into_first_range: start, + } satisfies ReconstructionInfo) + ); + } + case "fetch.co": { + fetchCount++; + return new Response( + new ReadableStream({ + pull(controller) { + for (const chunk of chunks) { + controller.enqueue(chunk); + } + controller.close(); + }, + }) + ); + } + default: + throw new Error("Unhandled URL"); + } + }, + }); + + const startIndexes = [0, 5, 11, 6, 12, 100, totalSize - 12, totalSize - 2]; + + for (const index of startIndexes) { + console.log("slice", index); + const content = await blob.slice(index).text(); + expect(content.length).toBe(wholeText.length - index); + expect(content.slice(0, 1000)).toEqual(wholeText.slice(index).slice(0, 1000)); + expect(debugged.filter((e) => e.event === "read").length).toBe(totalChunkLength + 1); // 1 read for each chunk + 1 undefined + expect(fetchCount).toEqual(1); + + fetchCount = 0; + debugged.length = 0; + } + }); + }); + }); }); + +function makeChunk(content: string) { + const encoded = new TextEncoder().encode(content); + + const array = new Uint8Array(encoded.length + 8); + + const dataView = new DataView(array.buffer); + dataView.setUint8(0, 0); // version + dataView.setUint8(1, encoded.length % 256); // Compressed length + dataView.setUint8(2, (encoded.length >> 8) % 256); // Compressed length + dataView.setUint8(3, (encoded.length >> 16) % 256); // Compressed length + dataView.setUint8(4, 0); // Compression scheme + dataView.setUint8(5, encoded.length % 256); // Uncompressed length + dataView.setUint8(6, (encoded.length >> 8) % 256); // Uncompressed length + dataView.setUint8(7, (encoded.length >> 16) % 256); // Uncompressed length + + array.set(encoded, 8); + + return array; +} + +function splitChunk(chunk: Uint8Array, toLength: number): Uint8Array[] { + const dataView = new DataView(chunk.buffer); + return new Array(Math.ceil(chunk.byteLength / toLength)).fill(0).map((_, i) => { + const array = new Uint8Array(Math.min(toLength, chunk.byteLength - i * toLength)); + + for (let j = 0; j < array.byteLength; j++) { + array[j] = dataView.getUint8(i * toLength + j); + } + return array; + }); +} diff --git a/packages/hub/src/utils/XetBlob.ts b/packages/hub/src/utils/XetBlob.ts index a316c7a4b6..ca91e5cbab 100644 --- a/packages/hub/src/utils/XetBlob.ts +++ b/packages/hub/src/utils/XetBlob.ts @@ -4,6 +4,7 @@ import type { CredentialsParams, RepoDesignation, RepoId } from "../types/public import { checkCredentials } from "./checkCredentials"; import { toRepoId } from "./toRepoId"; import { decompress as lz4_decompress } from "../vendor/lz4js"; +import { RangeList } from "./RangeList"; const JWT_SAFETY_PERIOD = 60_000; const JWT_CACHE_SIZE = 1_000; @@ -17,9 +18,11 @@ type XetBlobCreateOptions = { hash: string; hubUrl?: string; size: number; + listener?: (arg: { event: "read" } | { event: "progress"; progress: { read: number; total: number } }) => void; + internalLogging?: boolean; } & Partial; -interface ReconstructionInfo { +export interface ReconstructionInfo { /** * List of CAS blocks */ @@ -41,7 +44,11 @@ interface ReconstructionInfo { url: string; /** Chunk range */ range: { start: number; end: number }; - /** Byte range, when making the call to the URL */ + /** + * Byte range, when making the call to the URL. + * + * We assume that we're given non-overlapping ranges for each hash + */ url_range: { start: number; end: number }; }> >; @@ -83,7 +90,9 @@ export class XetBlob extends Blob { hash: string; start = 0; end = 0; + internalLogging = false; reconstructionInfo: ReconstructionInfo | undefined; + listener: XetBlobCreateOptions["listener"]; constructor(params: XetBlobCreateOptions) { super([]); @@ -94,6 +103,8 @@ export class XetBlob extends Blob { this.hubUrl = params.hubUrl ?? HUB_URL; this.end = params.size; this.hash = params.hash; + this.listener = params.listener; + this.internalLogging = params.internalLogging ?? false; this.hubUrl; } @@ -114,6 +125,8 @@ export class XetBlob extends Blob { blob.start = this.start; blob.end = this.end; blob.reconstructionInfo = this.reconstructionInfo; + blob.listener = this.listener; + blob.internalLogging = this.internalLogging; return blob; } @@ -145,7 +158,7 @@ export class XetBlob extends Blob { this.#reconstructionInfoPromise = (async () => { const connParams = await getAccessToken(this.repoId, this.accessToken, this.fetch, this.hubUrl); - // console.log( + // debug( // `curl '${connParams.casUrl}/reconstruction/${this.hash}' -H 'Authorization: Bearer ${connParams.accessToken}'` // ); @@ -173,6 +186,24 @@ export class XetBlob extends Blob { await this.#loadReconstructionInfo(); } + const rangeLists = new Map>(); + + if (!this.reconstructionInfo) { + throw new Error("Failed to load reconstruction info"); + } + + for (const term of this.reconstructionInfo.terms) { + let rangeList = rangeLists.get(term.hash); + if (!rangeList) { + rangeList = new RangeList(); + rangeLists.set(term.hash, rangeList); + } + + rangeList.add(term.range.start, term.range.end); + } + const listener = this.listener; + const log = this.internalLogging ? (...args: unknown[]) => console.log(...args) : () => {}; + async function* readData( reconstructionInfo: ReconstructionInfo, customFetch: typeof fetch, @@ -187,6 +218,46 @@ export class XetBlob extends Blob { break; } + const rangeList = rangeLists.get(term.hash); + if (!rangeList) { + throw new Error(`Failed to find range list for term ${term.hash}`); + } + + { + const termRanges = rangeList.getRanges(term.range.start, term.range.end); + + if (termRanges.every((range) => range.data)) { + log("all data available for term", term.hash, readBytesToSkip); + rangeLoop: for (const range of termRanges) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + for (let chunk of range.data!) { + if (readBytesToSkip) { + const skipped = Math.min(readBytesToSkip, chunk.byteLength); + chunk = chunk.slice(skipped); + readBytesToSkip -= skipped; + if (!chunk.byteLength) { + continue; + } + } + if (chunk.byteLength > maxBytes - totalBytesRead) { + chunk = chunk.slice(0, maxBytes - totalBytesRead); + } + totalBytesRead += chunk.byteLength; + // The stream consumer can decide to transfer ownership of the chunk, so we need to return a clone + // if there's more than one range for the same term + yield range.refCount > 1 ? chunk.slice() : chunk; + listener?.({ event: "progress", progress: { read: totalBytesRead, total: maxBytes } }); + + if (totalBytesRead >= maxBytes) { + break rangeLoop; + } + } + } + rangeList.remove(term.range.start, term.range.end); + continue; + } + } + const fetchInfo = reconstructionInfo.fetch_info[term.hash].find( (info) => info.range.start <= term.range.start && info.range.end >= term.range.end ); @@ -197,6 +268,10 @@ export class XetBlob extends Blob { ); } + log("term", term); + log("fetchinfo", fetchInfo); + log("readBytesToSkip", readBytesToSkip); + let resp = await customFetch(fetchInfo.url, { headers: { Range: `bytes=${fetchInfo.url_range.start}-${fetchInfo.url_range.end}`, @@ -217,131 +292,165 @@ export class XetBlob extends Blob { throw await createApiError(resp); } + log( + "expected content length", + resp.headers.get("content-length"), + "range", + fetchInfo.url_range, + resp.headers.get("content-range") + ); + const reader = resp.body?.getReader(); if (!reader) { throw new Error("Failed to get reader from response body"); } let done = false; - let chunksToSkip = term.range.start - fetchInfo.range.start; - let chunksToRead = term.range.end - term.range.start; - let bytesToSkip = 0; + let chunkIndex = fetchInfo.range.start; + const ranges = rangeList.getRanges(fetchInfo.range.start, fetchInfo.range.end); let leftoverBytes: Uint8Array | undefined = undefined; + let totalFetchBytes = 0; - readChunks: while (!done && totalBytesRead < maxBytes) { + fetchData: while (!done && totalBytesRead < maxBytes) { const result = await reader.read(); + listener?.({ event: "read" }); + done = result.done; - if (result.value) { - while (totalBytesRead < maxBytes && chunksToRead) { - if (bytesToSkip) { - if (bytesToSkip >= result.value.length) { - bytesToSkip -= result.value.length; - continue readChunks; - } - result.value = result.value.slice(bytesToSkip); - bytesToSkip = 0; - } - if (leftoverBytes) { - result.value = new Uint8Array([...leftoverBytes, ...result.value]); - leftoverBytes = undefined; - } - if (result.value.length < 8) { - // We need 8 bytes to parse the chunk header - leftoverBytes = result.value; - continue readChunks; - } + log("read", result.value?.byteLength, "bytes", "total read", totalBytesRead, "toSkip", readBytesToSkip); + + if (!result.value) { + log("no data in result, cancelled", result); + continue; + } - const header = new DataView(result.value.buffer, result.value.byteOffset, CHUNK_HEADER_BYTES); - const chunkHeader: ChunkHeader = { - version: header.getUint8(0), - compressed_length: header.getUint8(1) | (header.getUint8(2) << 8) | (header.getUint8(3) << 16), - compression_scheme: header.getUint8(4), - uncompressed_length: header.getUint8(5) | (header.getUint8(6) << 8) | (header.getUint8(7) << 16), - }; + totalFetchBytes += result.value.byteLength; - if (chunkHeader.version !== 0) { - throw new Error(`Unsupported chunk version ${chunkHeader.version}`); - } + if (leftoverBytes) { + result.value = new Uint8Array([...leftoverBytes, ...result.value]); + leftoverBytes = undefined; + } - if ( - chunkHeader.compression_scheme !== CompressionScheme.None && - chunkHeader.compression_scheme !== CompressionScheme.LZ4 && - chunkHeader.compression_scheme !== CompressionScheme.ByteGroupingLZ4 - ) { - throw new Error( - `Unsupported compression scheme ${ - compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme - }` - ); - } + while (totalBytesRead < maxBytes && result.value.byteLength) { + if (result.value.byteLength < 8) { + // We need 8 bytes to parse the chunk header + leftoverBytes = result.value; + continue fetchData; + } - if (chunksToSkip) { - chunksToSkip--; - result.value = result.value.slice(CHUNK_HEADER_BYTES); - bytesToSkip = chunkHeader.compressed_length; - continue; - } + const header = new DataView(result.value.buffer, result.value.byteOffset, CHUNK_HEADER_BYTES); + const chunkHeader: ChunkHeader = { + version: header.getUint8(0), + compressed_length: header.getUint8(1) | (header.getUint8(2) << 8) | (header.getUint8(3) << 16), + compression_scheme: header.getUint8(4), + uncompressed_length: header.getUint8(5) | (header.getUint8(6) << 8) | (header.getUint8(7) << 16), + }; - if (readBytesToSkip >= chunkHeader.uncompressed_length) { - readBytesToSkip -= chunkHeader.uncompressed_length; - result.value = result.value.slice(CHUNK_HEADER_BYTES); - bytesToSkip = chunkHeader.compressed_length; - chunksToRead--; - continue; - } + log("chunk header", chunkHeader, "to skip", readBytesToSkip); - if (result.value.length < chunkHeader.compressed_length + CHUNK_HEADER_BYTES) { - // We need more data to read the full chunk - leftoverBytes = result.value; - continue readChunks; - } + if (chunkHeader.version !== 0) { + throw new Error(`Unsupported chunk version ${chunkHeader.version}`); + } + + if ( + chunkHeader.compression_scheme !== CompressionScheme.None && + chunkHeader.compression_scheme !== CompressionScheme.LZ4 && + chunkHeader.compression_scheme !== CompressionScheme.ByteGroupingLZ4 + ) { + throw new Error( + `Unsupported compression scheme ${ + compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme + }` + ); + } - result.value = result.value.slice(CHUNK_HEADER_BYTES); - - const uncompressed = - chunkHeader.compression_scheme === CompressionScheme.LZ4 - ? lz4_decompress( - result.value.slice(0, chunkHeader.compressed_length), - chunkHeader.uncompressed_length - ) - : chunkHeader.compression_scheme === CompressionScheme.ByteGroupingLZ4 - ? bg4_regoup_bytes( - lz4_decompress( - result.value.slice(0, chunkHeader.compressed_length), - chunkHeader.uncompressed_length - ) - ) - : result.value.slice(0, chunkHeader.compressed_length); - - let bytesToYield: Uint8Array; + if (result.value.byteLength < chunkHeader.compressed_length + CHUNK_HEADER_BYTES) { + // We need more data to read the full chunk + leftoverBytes = result.value; + continue fetchData; + } + + result.value = result.value.slice(CHUNK_HEADER_BYTES); + + let uncompressed = + chunkHeader.compression_scheme === CompressionScheme.LZ4 + ? lz4_decompress(result.value.slice(0, chunkHeader.compressed_length), chunkHeader.uncompressed_length) + : chunkHeader.compression_scheme === CompressionScheme.ByteGroupingLZ4 + ? bg4_regoup_bytes( + lz4_decompress( + result.value.slice(0, chunkHeader.compressed_length), + chunkHeader.uncompressed_length + ) + ) + : result.value.slice(0, chunkHeader.compressed_length); + + const range = ranges.find((range) => chunkIndex >= range.start && chunkIndex < range.end); + const shouldYield = chunkIndex >= term.range.start && chunkIndex < term.range.end; + const minRefCountToStore = shouldYield ? 2 : 1; + let stored = false; + + // Assuming non-overlapping fetch_info ranges for the same hash + if (range && range.refCount >= minRefCountToStore) { + range.data ??= []; + range.data.push(uncompressed); + stored = true; + } + + if (shouldYield) { if (readBytesToSkip) { - const remainingBytes = Math.min(uncompressed.length - readBytesToSkip, maxBytes - totalBytesRead); - bytesToYield = uncompressed.slice(readBytesToSkip, readBytesToSkip + remainingBytes); - readBytesToSkip = 0; - } else { - bytesToYield = uncompressed.slice(0, Math.min(uncompressed.length, maxBytes - totalBytesRead)); + const skipped = Math.min(readBytesToSkip, uncompressed.byteLength); + uncompressed = uncompressed.slice(readBytesToSkip); + readBytesToSkip -= skipped; } - totalBytesRead += bytesToYield.length; - yield bytesToYield; - chunksToRead--; + if (uncompressed.byteLength > maxBytes - totalBytesRead) { + uncompressed = uncompressed.slice(0, maxBytes - totalBytesRead); + } - result.value = result.value.slice(chunkHeader.compressed_length); + if (uncompressed.byteLength) { + log( + "yield", + uncompressed.byteLength, + "bytes", + result.value.byteLength, + "total read", + totalBytesRead, + stored + ); + totalBytesRead += uncompressed.byteLength; + yield stored ? uncompressed.slice() : uncompressed; + listener?.({ event: "progress", progress: { read: totalBytesRead, total: maxBytes } }); + } } + + chunkIndex++; + result.value = result.value.slice(chunkHeader.compressed_length); } } + if ( + done && + totalBytesRead < maxBytes && + totalFetchBytes < fetchInfo.url_range.end - fetchInfo.url_range.start + 1 + ) { + log("done", done, "total read", totalBytesRead, maxBytes, totalFetchBytes); + log("failed to fetch all data for term", term.hash); + throw new Error( + `Failed to fetch all data for term ${term.hash}, fetched ${totalFetchBytes} bytes out of ${ + fetchInfo.url_range.end - fetchInfo.url_range.start + 1 + }` + ); + } + + log("done", done, "total read", totalBytesRead, maxBytes, totalFetchBytes); + // Release the reader + log("cancel reader"); await reader.cancel(); } } - if (!this.reconstructionInfo) { - throw new Error("Failed to load reconstruction info"); - } - const iterator = readData( this.reconstructionInfo, this.fetch, @@ -437,33 +546,33 @@ export function bg4_regoup_bytes(bytes: Uint8Array): Uint8Array { // todo: optimize to do it in-place - const split = Math.floor(bytes.length / 4); - const rem = bytes.length % 4; + const split = Math.floor(bytes.byteLength / 4); + const rem = bytes.byteLength % 4; const g1_pos = split + (rem >= 1 ? 1 : 0); const g2_pos = g1_pos + split + (rem >= 2 ? 1 : 0); const g3_pos = g2_pos + split + (rem == 3 ? 1 : 0); - const ret = new Uint8Array(bytes.length); - for (let i = 0, j = 0; i < bytes.length; i += 4, j++) { + const ret = new Uint8Array(bytes.byteLength); + for (let i = 0, j = 0; i < bytes.byteLength; i += 4, j++) { ret[i] = bytes[j]; } - for (let i = 1, j = g1_pos; i < bytes.length; i += 4, j++) { + for (let i = 1, j = g1_pos; i < bytes.byteLength; i += 4, j++) { ret[i] = bytes[j]; } - for (let i = 2, j = g2_pos; i < bytes.length; i += 4, j++) { + for (let i = 2, j = g2_pos; i < bytes.byteLength; i += 4, j++) { ret[i] = bytes[j]; } - for (let i = 3, j = g3_pos; i < bytes.length; i += 4, j++) { + for (let i = 3, j = g3_pos; i < bytes.byteLength; i += 4, j++) { ret[i] = bytes[j]; } return ret; // alternative implementation (to benchmark which one is faster) - // for (let i = 0; i < bytes.length - 3; i += 4) { + // for (let i = 0; i < bytes.byteLength - 3; i += 4) { // ret[i] = bytes[i / 4]; // ret[i + 1] = bytes[g1_pos + i / 4]; // ret[i + 2] = bytes[g2_pos + i / 4]; @@ -471,14 +580,14 @@ export function bg4_regoup_bytes(bytes: Uint8Array): Uint8Array { // } // if (rem === 1) { - // ret[bytes.length - 1] = bytes[g1_pos - 1]; + // ret[bytes.byteLength - 1] = bytes[g1_pos - 1]; // } else if (rem === 2) { - // ret[bytes.length - 2] = bytes[g1_pos - 1]; - // ret[bytes.length - 1] = bytes[g2_pos - 1]; + // ret[bytes.byteLength - 2] = bytes[g1_pos - 1]; + // ret[bytes.byteLength - 1] = bytes[g2_pos - 1]; // } else if (rem === 3) { - // ret[bytes.length - 3] = bytes[g1_pos - 1]; - // ret[bytes.length - 2] = bytes[g2_pos - 1]; - // ret[bytes.length - 1] = bytes[g3_pos - 1]; + // ret[bytes.byteLength - 3] = bytes[g1_pos - 1]; + // ret[bytes.byteLength - 2] = bytes[g2_pos - 1]; + // ret[bytes.byteLength - 1] = bytes[g3_pos - 1]; // } } diff --git a/packages/hub/vitest-browser.config.mts b/packages/hub/vitest-browser.config.mts index a8a0091d65..e2e1e87f98 100644 --- a/packages/hub/vitest-browser.config.mts +++ b/packages/hub/vitest-browser.config.mts @@ -2,6 +2,7 @@ import { configDefaults, defineConfig } from "vitest/config"; export default defineConfig({ test: { + testTimeout: 30_000, exclude: [ ...configDefaults.exclude, "src/utils/FileBlob.spec.ts", diff --git a/packages/hub/vitest.config.mts b/packages/hub/vitest.config.mts new file mode 100644 index 0000000000..88747caaed --- /dev/null +++ b/packages/hub/vitest.config.mts @@ -0,0 +1,7 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + testTimeout: 30_000, + }, +});