Skip to content

Commit f8be34d

Browse files
committed
more failures
1 parent 9eca789 commit f8be34d

File tree

2 files changed

+52
-2
lines changed

2 files changed

+52
-2
lines changed

packages/hub/src/utils/XetBlob.spec.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,55 @@ describe("XetBlob", () => {
101101
expect(new Uint8Array(xetDownload)).toEqual(new Uint8Array(bridgeDownload));
102102
});
103103

104+
it("should load text correctly when offset_into_range starts in a chunk further than the first", async () => {
105+
const blob = new XetBlob({
106+
repo: {
107+
type: "model",
108+
name: "celinah/xet-experiments",
109+
},
110+
hash: "794efea76d8cb372bbe1385d9e51c3384555f3281e629903ecb6abeff7d54eec",
111+
size: 62_914_580,
112+
});
113+
114+
// Reconstruction info
115+
// {
116+
// "offset_into_first_range": 600000,
117+
// "terms":
118+
// [
119+
// {
120+
// "hash": "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2",
121+
// "unpacked_length": 655360,
122+
// "range": { "start": 0, "end": 5 },
123+
// },
124+
// {
125+
// "hash": "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2",
126+
// "unpacked_length": 655360,
127+
// "range": { "start": 0, "end": 5 },
128+
// },
129+
// ],
130+
// "fetch_info":
131+
// {
132+
// "be748f77930d5929cabd510a15f2c30f2f460b639804ef79dea46affa04fd8b2":
133+
// [
134+
// {
135+
// "range": { "start": 0, "end": 5 },
136+
// "url": "...",
137+
// "url_range": { "start": 0, "end": 2839 },
138+
// },
139+
// ],
140+
// },
141+
// }
142+
143+
const text = await blob.slice(600_000, 700_000).text();
144+
const bridgeDownload = await fetch("https://huggingface.co/celinah/xet-experiments/resolve/main/large_text.txt", {
145+
headers: {
146+
Range: "bytes=600000-699999",
147+
},
148+
}).then((res) => res.text());
149+
150+
expect(text.length).toBe(bridgeDownload.length);
151+
});
152+
104153
describe("bg4_regoup_bytes", () => {
105154
it("should regroup bytes when the array is %4 length", () => {
106155
expect(bg4_regoup_bytes(new Uint8Array([1, 5, 2, 6, 3, 7, 4, 8]))).toEqual(

packages/hub/src/utils/XetBlob.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ export class XetBlob extends Blob {
183183
let readBytesToSkip = reconstructionInfo.offset_into_first_range;
184184

185185
for (const term of reconstructionInfo.terms) {
186+
// console.log("term", term, readBytesToSkip, totalBytesRead);
186187
if (totalBytesRead >= maxBytes) {
187188
break;
188189
}
@@ -277,14 +278,14 @@ export class XetBlob extends Blob {
277278

278279
if (chunksToSkip) {
279280
chunksToSkip--;
280-
leftoverBytes = result.value.slice(CHUNK_HEADER_BYTES);
281+
result.value = result.value.slice(CHUNK_HEADER_BYTES);
281282
bytesToSkip = chunkHeader.compressed_length;
282283
continue;
283284
}
284285

285286
if (readBytesToSkip >= chunkHeader.uncompressed_length) {
286287
readBytesToSkip -= chunkHeader.uncompressed_length;
287-
leftoverBytes = result.value.slice(CHUNK_HEADER_BYTES);
288+
result.value = result.value.slice(CHUNK_HEADER_BYTES);
288289
bytesToSkip = chunkHeader.compressed_length;
289290
chunksToRead--;
290291
continue;

0 commit comments

Comments
 (0)