Skip to content

Commit 6cf55bf

Browse files
committed
fix: handle HuggingFace rate limit response
1 parent e7c1604 commit 6cf55bf

File tree

3 files changed

+85
-6
lines changed

3 files changed

+85
-6
lines changed

src/utils/parseModelUri.ts

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import filenamify from "filenamify";
2+
import prettyMilliseconds from "pretty-ms";
23
import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js";
34
import {getFilenameForBinarySplitGgufPartUrls, resolveBinarySplitGgufPartUrls} from "../gguf/utils/resolveBinarySplitGgufPartUrls.js";
45
import {createSplitPartFilename, getGgufSplitPartsInfo} from "../gguf/utils/resolveSplitGgufParts.js";
@@ -7,9 +8,19 @@ import {isUrl} from "./isUrl.js";
78
import {ModelFileAccessTokens, resolveModelFileAccessTokensTryHeaders} from "./modelFileAccessTokens.js";
89
import {isHuggingFaceUrl, ModelDownloadEndpoints, resolveHuggingFaceEndpoint} from "./modelDownloadEndpoints.js";
910
import {parseModelFileName} from "./parseModelFileName.js";
11+
import {getConsoleLogPrefix} from "./getConsoleLogPrefix.js";
12+
import {signalSleep} from "./signalSleep.js";
1013

1114
const defaultHuggingFaceBranch = "main";
1215
const defaultHuggingFaceFileQuantization = "Q4_K_M";
16+
const huggingFaceRateLimit = {
17+
wait: {
18+
min: 1000,
19+
max: 60 * 5 * 1000,
20+
default: 1000
21+
},
22+
retries: 4
23+
} as const;
1324

1425
export const genericFilePartNumber = "{:\n{number}\n:}" as const;
1526

@@ -208,9 +219,12 @@ async function fetchHuggingFaceModelManifest({
208219
{},
209220
await resolveModelFileAccessTokensTryHeaders(manifestUrl, tokens, endpoints)
210221
];
222+
let rateLimitPendingRetries = 0;
211223

212-
while (headersToTry.length > 0) {
213-
const headers = headersToTry.shift();
224+
for (let i = 0; i < headersToTry.length * (1 + rateLimitPendingRetries); i++) {
225+
const headers = headersToTry[i % headersToTry.length];
226+
if (headers == null)
227+
continue;
214228

215229
let response: Awaited<ReturnType<typeof fetch>> | undefined;
216230
try {
@@ -226,10 +240,52 @@ async function fetchHuggingFaceModelManifest({
226240
signal
227241
});
228242
} catch (err) {
243+
if (signal?.aborted && err === signal?.reason)
244+
throw err;
245+
229246
throw new Error(`Failed to fetch manifest for resolving URI ${JSON.stringify(fullUri)}: ${err}`);
230247
}
231248

232-
if ((response.status >= 500 || response.status === 429 || response.status === 401) && headersToTry.length > 0)
249+
if (response.status === 429) {
250+
const doneRetires = Math.floor(i / headersToTry.length);
251+
rateLimitPendingRetries = Math.min(doneRetires + 1, huggingFaceRateLimit.retries);
252+
253+
if (i % headersToTry.length === headersToTry.length - 1 && i !== headersToTry.length * (1 + rateLimitPendingRetries) - 1) {
254+
const [,secondsUntilResetString] = response.headers.get("ratelimit")
255+
?.split(";")
256+
.map((part) => part.split("="))
257+
.find(([key, value]) => key === "t" && !isNaN(Number(value))) ?? [];
258+
259+
if (secondsUntilResetString != null) {
260+
const timeToWait = Math.min(
261+
huggingFaceRateLimit.wait.max,
262+
Math.max(
263+
huggingFaceRateLimit.wait.min,
264+
Number(secondsUntilResetString) * 1000
265+
)
266+
);
267+
console.info(
268+
getConsoleLogPrefix() +
269+
"Received a rate limit response from Hugging Face, waiting for " + (
270+
prettyMilliseconds(timeToWait, {
271+
keepDecimalsOnWholeSeconds: true,
272+
secondsDecimalDigits: 0,
273+
compact: true,
274+
verbose: true
275+
})
276+
) + " before retrying..."
277+
);
278+
await signalSleep(timeToWait, signal);
279+
} else
280+
await signalSleep(huggingFaceRateLimit.wait.default, signal);
281+
}
282+
283+
continue;
284+
}
285+
286+
if ((response.status >= 500 || response.status === 429 || response.status === 401) &&
287+
i < headersToTry.length * (1 + rateLimitPendingRetries) - 1
288+
)
233289
continue;
234290

235291
if (response.status === 400 || response.status === 404)

src/utils/signalSleep.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
export function signalSleep(delay: number, abortSignal?: AbortSignal): Promise<void> {
2+
return new Promise<void>((accept, reject) => {
3+
if (abortSignal?.aborted)
4+
return void reject(abortSignal.reason);
5+
6+
let timeout: ReturnType<typeof setTimeout> | undefined = undefined;
7+
function onAbort() {
8+
reject(abortSignal?.reason);
9+
clearTimeout(timeout);
10+
abortSignal?.removeEventListener("abort", onAbort);
11+
}
12+
13+
function onTimeout() {
14+
accept();
15+
timeout = undefined;
16+
abortSignal?.removeEventListener("abort", onAbort);
17+
}
18+
19+
abortSignal?.addEventListener("abort", onAbort);
20+
timeout = setTimeout(onTimeout, delay);
21+
});
22+
}

test/standalone/cli/recommendedModels.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {recommendedModels} from "../../../src/cli/recommendedModels.js";
44

55
describe("cli", () => {
66
describe("recommended models", () => {
7-
test("all URIs resolve correctly", async () => {
7+
test("all URIs resolve correctly", {timeout: 1000 * 60 * 6}, async () => {
88
const unresolvedUris = (
99
await Promise.all(
1010
recommendedModels
@@ -18,10 +18,11 @@ describe("cli", () => {
1818
try {
1919
await resolveParsedModelUri(parseModelUri(uri));
2020
return null;
21-
} catch (err) {
21+
} catch (err: Error | any) {
2222
return {
2323
modelName,
24-
uri
24+
uri,
25+
error: String(err?.stack ?? err)
2526
};
2627
}
2728
})

0 commit comments

Comments
 (0)