11import filenamify from "filenamify" ;
2+ import prettyMilliseconds from "pretty-ms" ;
23import { normalizeGgufDownloadUrl } from "../gguf/utils/normalizeGgufDownloadUrl.js" ;
34import { getFilenameForBinarySplitGgufPartUrls , resolveBinarySplitGgufPartUrls } from "../gguf/utils/resolveBinarySplitGgufPartUrls.js" ;
45import { createSplitPartFilename , getGgufSplitPartsInfo } from "../gguf/utils/resolveSplitGgufParts.js" ;
@@ -7,9 +8,19 @@ import {isUrl} from "./isUrl.js";
78import { ModelFileAccessTokens , resolveModelFileAccessTokensTryHeaders } from "./modelFileAccessTokens.js" ;
89import { isHuggingFaceUrl , ModelDownloadEndpoints , resolveHuggingFaceEndpoint } from "./modelDownloadEndpoints.js" ;
910import { parseModelFileName } from "./parseModelFileName.js" ;
11+ import { getConsoleLogPrefix } from "./getConsoleLogPrefix.js" ;
12+ import { signalSleep } from "./signalSleep.js" ;
1013
1114const defaultHuggingFaceBranch = "main" ;
1215const defaultHuggingFaceFileQuantization = "Q4_K_M" ;
16+ const huggingFaceRateLimit = {
17+ wait : {
18+ min : 1000 ,
19+ max : 60 * 5 * 1000 ,
20+ default : 1000
21+ } ,
22+ retries : 4
23+ } as const ;
1324
1425export const genericFilePartNumber = "{:\n{number}\n:}" as const ;
1526
@@ -208,9 +219,12 @@ async function fetchHuggingFaceModelManifest({
208219 { } ,
209220 await resolveModelFileAccessTokensTryHeaders ( manifestUrl , tokens , endpoints )
210221 ] ;
222+ let rateLimitPendingRetries = 0 ;
211223
212- while ( headersToTry . length > 0 ) {
213- const headers = headersToTry . shift ( ) ;
224+ for ( let i = 0 ; i < headersToTry . length * ( 1 + rateLimitPendingRetries ) ; i ++ ) {
225+ const headers = headersToTry [ i % headersToTry . length ] ;
226+ if ( headers == null )
227+ continue ;
214228
215229 let response : Awaited < ReturnType < typeof fetch > > | undefined ;
216230 try {
@@ -226,10 +240,52 @@ async function fetchHuggingFaceModelManifest({
226240 signal
227241 } ) ;
228242 } catch ( err ) {
243+ if ( signal ?. aborted && err === signal ?. reason )
244+ throw err ;
245+
229246 throw new Error ( `Failed to fetch manifest for resolving URI ${ JSON . stringify ( fullUri ) } : ${ err } ` ) ;
230247 }
231248
232- if ( ( response . status >= 500 || response . status === 429 || response . status === 401 ) && headersToTry . length > 0 )
249+ if ( response . status === 429 ) {
250+ const doneRetires = Math . floor ( i / headersToTry . length ) ;
251+ rateLimitPendingRetries = Math . min ( doneRetires + 1 , huggingFaceRateLimit . retries ) ;
252+
253+ if ( i % headersToTry . length === headersToTry . length - 1 && i !== headersToTry . length * ( 1 + rateLimitPendingRetries ) - 1 ) {
254+ const [ , secondsUntilResetString ] = response . headers . get ( "ratelimit" )
255+ ?. split ( ";" )
256+ . map ( ( part ) => part . split ( "=" ) )
257+ . find ( ( [ key , value ] ) => key === "t" && ! isNaN ( Number ( value ) ) ) ?? [ ] ;
258+
259+ if ( secondsUntilResetString != null ) {
260+ const timeToWait = Math . min (
261+ huggingFaceRateLimit . wait . max ,
262+ Math . max (
263+ huggingFaceRateLimit . wait . min ,
264+ Number ( secondsUntilResetString ) * 1000
265+ )
266+ ) ;
267+ console . info (
268+ getConsoleLogPrefix ( ) +
269+ "Received a rate limit response from Hugging Face, waiting for " + (
270+ prettyMilliseconds ( timeToWait , {
271+ keepDecimalsOnWholeSeconds : true ,
272+ secondsDecimalDigits : 0 ,
273+ compact : true ,
274+ verbose : true
275+ } )
276+ ) + " before retrying..."
277+ ) ;
278+ await signalSleep ( timeToWait , signal ) ;
279+ } else
280+ await signalSleep ( huggingFaceRateLimit . wait . default , signal ) ;
281+ }
282+
283+ continue ;
284+ }
285+
286+ if ( ( response . status >= 500 || response . status === 429 || response . status === 401 ) &&
287+ i < headersToTry . length * ( 1 + rateLimitPendingRetries ) - 1
288+ )
233289 continue ;
234290
235291 if ( response . status === 400 || response . status === 404 )
0 commit comments