@@ -448,6 +448,265 @@ describe("OpenAICompatibleEmbedder", () => {
448448 } )
449449 } )
450450
451+ it ( "should honor Retry-After header when present" , async ( ) => {
452+ const testTexts = [ "Hello world" ]
453+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 ] )
454+ const base64String = Buffer . from ( testEmbedding . buffer ) . toString ( "base64" )
455+
456+ // Create error with Retry-After header info
457+ const rateLimitError : any = {
458+ status : 429 ,
459+ message : "Rate limit exceeded" ,
460+ headers : {
461+ "retry-after" : "3" , // 3 seconds
462+ } ,
463+ rateLimitInfo : { retryAfterMs : 3000 } ,
464+ }
465+
466+ mockEmbeddingsCreate . mockRejectedValueOnce ( rateLimitError ) . mockResolvedValueOnce ( {
467+ data : [ { embedding : base64String } ] ,
468+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
469+ } )
470+
471+ const resultPromise = embedder . createEmbeddings ( testTexts )
472+
473+ // First attempt fails immediately
474+ await vitest . advanceTimersByTimeAsync ( 100 )
475+
476+ // Should wait for provider-specified 3 seconds (plus 1s buffer = 4s)
477+ await vitest . advanceTimersByTimeAsync ( 4000 )
478+
479+ const result = await resultPromise
480+
481+ expect ( mockEmbeddingsCreate ) . toHaveBeenCalledTimes ( 2 )
482+ expect ( console . warn ) . toHaveBeenCalledWith ( expect . stringContaining ( "(using provider-specified delay)" ) )
483+ expect ( result ) . toEqual ( {
484+ embeddings : [ [ 0.25 , 0.5 , 0.75 ] ] ,
485+ usage : { promptTokens : 10 , totalTokens : 15 } ,
486+ } )
487+ } )
488+
489+ it ( "should parse Retry-After header as HTTP-date" , async ( ) => {
490+ const testTexts = [ "Hello world" ]
491+ const fullUrl = "https://api.example.com/v1/embeddings"
492+ const embedder = new OpenAICompatibleEmbedder ( fullUrl , testApiKey , testModelId )
493+
494+ // Future date 5 seconds from now
495+ const futureDate = new Date ( Date . now ( ) + 5000 )
496+ const httpDate = futureDate . toUTCString ( )
497+
498+ const mockFetch = global . fetch as MockedFunction < typeof fetch >
499+ mockFetch
500+ . mockResolvedValueOnce ( {
501+ ok : false ,
502+ status : 429 ,
503+ headers : {
504+ get : ( name : string ) => ( name === "retry-after" ? httpDate : null ) ,
505+ } ,
506+ text : async ( ) => "Rate limited" ,
507+ } as any )
508+ . mockResolvedValueOnce ( {
509+ ok : true ,
510+ status : 200 ,
511+ json : async ( ) => ( {
512+ data : [ { embedding : [ 0.1 , 0.2 , 0.3 ] } ] ,
513+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
514+ } ) ,
515+ } as any )
516+
517+ const resultPromise = embedder . createEmbeddings ( testTexts )
518+
519+ // First attempt fails
520+ await vitest . advanceTimersByTimeAsync ( 100 )
521+
522+ // Should wait approximately 5 seconds (plus buffer)
523+ await vitest . advanceTimersByTimeAsync ( 6000 )
524+
525+ const result = await resultPromise
526+
527+ expect ( mockFetch ) . toHaveBeenCalledTimes ( 2 )
528+ expect ( result . embeddings ) . toEqual ( [ [ 0.1 , 0.2 , 0.3 ] ] )
529+ } )
530+
531+ it ( "should handle X-RateLimit-Reset-After header" , async ( ) => {
532+ const testTexts = [ "Hello world" ]
533+ const fullUrl = "https://api.example.com/v1/embeddings"
534+ const embedder = new OpenAICompatibleEmbedder ( fullUrl , testApiKey , testModelId )
535+
536+ const mockFetch = global . fetch as MockedFunction < typeof fetch >
537+ mockFetch
538+ . mockResolvedValueOnce ( {
539+ ok : false ,
540+ status : 429 ,
541+ headers : {
542+ get : ( name : string ) => ( name === "x-ratelimit-reset-after" ? "2" : null ) ,
543+ } ,
544+ text : async ( ) => "Rate limited" ,
545+ } as any )
546+ . mockResolvedValueOnce ( {
547+ ok : true ,
548+ status : 200 ,
549+ json : async ( ) => ( {
550+ data : [ { embedding : [ 0.1 , 0.2 , 0.3 ] } ] ,
551+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
552+ } ) ,
553+ } as any )
554+
555+ const resultPromise = embedder . createEmbeddings ( testTexts )
556+
557+ // First attempt fails
558+ await vitest . advanceTimersByTimeAsync ( 100 )
559+
560+ // Should wait 2 seconds (plus buffer)
561+ await vitest . advanceTimersByTimeAsync ( 3000 )
562+
563+ const result = await resultPromise
564+
565+ expect ( mockFetch ) . toHaveBeenCalledTimes ( 2 )
566+ expect ( result . embeddings ) . toEqual ( [ [ 0.1 , 0.2 , 0.3 ] ] )
567+ } )
568+
569+ it ( "should handle X-RateLimit-Reset header with Unix timestamp" , async ( ) => {
570+ const testTexts = [ "Hello world" ]
571+ const fullUrl = "https://api.example.com/v1/embeddings"
572+ const embedder = new OpenAICompatibleEmbedder ( fullUrl , testApiKey , testModelId )
573+
574+ // Unix timestamp 4 seconds in the future
575+ const resetTimestamp = Math . floor ( ( Date . now ( ) + 4000 ) / 1000 )
576+
577+ const mockFetch = global . fetch as MockedFunction < typeof fetch >
578+ mockFetch
579+ . mockResolvedValueOnce ( {
580+ ok : false ,
581+ status : 429 ,
582+ headers : {
583+ get : ( name : string ) => ( name === "x-ratelimit-reset" ? resetTimestamp . toString ( ) : null ) ,
584+ } ,
585+ text : async ( ) => "Rate limited" ,
586+ } as any )
587+ . mockResolvedValueOnce ( {
588+ ok : true ,
589+ status : 200 ,
590+ json : async ( ) => ( {
591+ data : [ { embedding : [ 0.1 , 0.2 , 0.3 ] } ] ,
592+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
593+ } ) ,
594+ } as any )
595+
596+ const resultPromise = embedder . createEmbeddings ( testTexts )
597+
598+ // First attempt fails
599+ await vitest . advanceTimersByTimeAsync ( 100 )
600+
601+ // Should wait approximately 4 seconds (plus buffer)
602+ await vitest . advanceTimersByTimeAsync ( 5000 )
603+
604+ const result = await resultPromise
605+
606+ expect ( mockFetch ) . toHaveBeenCalledTimes ( 2 )
607+ expect ( result . embeddings ) . toEqual ( [ [ 0.1 , 0.2 , 0.3 ] ] )
608+ } )
609+
610+ it ( "should handle Gemini-style structured retry info in error body" , async ( ) => {
611+ const testTexts = [ "Hello world" ]
612+ const fullUrl = "https://generativelanguage.googleapis.com/v1beta/openai/embeddings"
613+ const embedder = new OpenAICompatibleEmbedder ( fullUrl , testApiKey , testModelId )
614+
615+ const errorBody = {
616+ error : {
617+ code : 429 ,
618+ message : "Resource exhausted" ,
619+ details : [
620+ {
621+ metadata : {
622+ retry_delay : "10s" ,
623+ } ,
624+ } ,
625+ ] ,
626+ } ,
627+ }
628+
629+ const mockFetch = global . fetch as MockedFunction < typeof fetch >
630+ mockFetch
631+ . mockResolvedValueOnce ( {
632+ ok : false ,
633+ status : 429 ,
634+ headers : {
635+ get : ( ) => null ,
636+ } ,
637+ text : async ( ) => JSON . stringify ( errorBody ) ,
638+ } as any )
639+ . mockResolvedValueOnce ( {
640+ ok : true ,
641+ status : 200 ,
642+ json : async ( ) => ( {
643+ data : [ { embedding : [ 0.1 , 0.2 , 0.3 ] } ] ,
644+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
645+ } ) ,
646+ } as any )
647+
648+ const resultPromise = embedder . createEmbeddings ( testTexts )
649+
650+ // First attempt fails
651+ await vitest . advanceTimersByTimeAsync ( 100 )
652+
653+ // Should wait 10 seconds (plus buffer)
654+ await vitest . advanceTimersByTimeAsync ( 11000 )
655+
656+ const result = await resultPromise
657+
658+ expect ( mockFetch ) . toHaveBeenCalledTimes ( 2 )
659+ expect ( result . embeddings ) . toEqual ( [ [ 0.1 , 0.2 , 0.3 ] ] )
660+ } )
661+
662+ it ( "should parse duration strings correctly" , async ( ) => {
663+ const embedder = new OpenAICompatibleEmbedder ( testBaseUrl , testApiKey , testModelId )
664+
665+ // Access private method for testing
666+ const parseDurationString = ( embedder as any ) . parseDurationString . bind ( embedder )
667+
668+ expect ( parseDurationString ( "10s" ) ) . toBe ( 10000 )
669+ expect ( parseDurationString ( "2m" ) ) . toBe ( 120000 )
670+ expect ( parseDurationString ( "1h" ) ) . toBe ( 3600000 )
671+ expect ( parseDurationString ( "invalid" ) ) . toBeUndefined ( )
672+ expect ( parseDurationString ( null ) ) . toBeUndefined ( )
673+ expect ( parseDurationString ( "" ) ) . toBeUndefined ( )
674+ } )
675+
676+ it ( "should fall back to exponential backoff when no Retry-After is provided" , async ( ) => {
677+ const testTexts = [ "Hello world" ]
678+ const rateLimitError = {
679+ status : 429 ,
680+ message : "Rate limit exceeded" ,
681+ // No headers or rateLimitInfo
682+ }
683+
684+ const testEmbedding = new Float32Array ( [ 0.25 , 0.5 , 0.75 ] )
685+ const base64String = Buffer . from ( testEmbedding . buffer ) . toString ( "base64" )
686+
687+ mockEmbeddingsCreate . mockRejectedValueOnce ( rateLimitError ) . mockResolvedValueOnce ( {
688+ data : [ { embedding : base64String } ] ,
689+ usage : { prompt_tokens : 10 , total_tokens : 15 } ,
690+ } )
691+
692+ const resultPromise = embedder . createEmbeddings ( testTexts )
693+
694+ // First attempt fails
695+ await vitest . advanceTimersByTimeAsync ( 100 )
696+
697+ // Should use exponential backoff (5s for first retry)
698+ await vitest . advanceTimersByTimeAsync ( 5000 )
699+
700+ const result = await resultPromise
701+
702+ expect ( mockEmbeddingsCreate ) . toHaveBeenCalledTimes ( 2 )
703+ expect ( console . warn ) . toHaveBeenCalledWith ( expect . stringContaining ( "(using exponential backoff)" ) )
704+ expect ( result ) . toEqual ( {
705+ embeddings : [ [ 0.25 , 0.5 , 0.75 ] ] ,
706+ usage : { promptTokens : 10 , totalTokens : 15 } ,
707+ } )
708+ } )
709+
451710 it ( "should not retry on non-rate-limit errors" , async ( ) => {
452711 const testTexts = [ "Hello world" ]
453712 const authError = new Error ( "Unauthorized" )
0 commit comments