@@ -4,6 +4,20 @@ import { t } from "../../../i18n"
44import { WatsonXAI } from "@ibm-cloud/watsonx-ai"
55import { IamAuthenticator , CloudPakForDataAuthenticator } from "ibm-cloud-sdk-core"
66
7+ /**
8+ * Configuration for rate limiting
9+ */
10+ interface RateLimitConfig {
11+ /** Base delay between requests in milliseconds */
12+ baseDelay : number
13+ /** Maximum delay between requests in milliseconds */
14+ maxDelay : number
15+ /** Whether to use adaptive rate limiting based on API responses */
16+ adaptive : boolean
17+ /** Maximum concurrent requests */
18+ maxConcurrent : number
19+ }
20+
721/**
822 * IBM watsonx embedder implementation using the native IBM Cloud watsonx.ai package.
923 *
@@ -15,6 +29,10 @@ export class WatsonxEmbedder implements IEmbedder {
1529 private static readonly DEFAULT_MODEL = "ibm/slate-125m-english-rtrvr-v2"
1630 private readonly modelId : string
1731 private readonly projectId ?: string
32+ private readonly rateLimitConfig : RateLimitConfig
33+ private currentDelay : number
34+ private lastRequestTime : number = 0
35+ private rateLimitHits : number = 0
1836
1937 /**
2038 * Creates a new watsonx embedder
@@ -36,13 +54,23 @@ export class WatsonxEmbedder implements IEmbedder {
3654 region : string = "us-south" ,
3755 username ?: string ,
3856 password ?: string ,
57+ rateLimitConfig ?: Partial < RateLimitConfig > ,
3958 ) {
4059 if ( ! apiKey && ! ( username && password ) ) {
4160 throw new Error ( t ( "embeddings:validation.apiKeyRequired" ) )
4261 }
4362 this . modelId = modelId || WatsonxEmbedder . DEFAULT_MODEL
4463 this . projectId = projectId
4564
65+ // Initialize rate limit configuration with defaults
66+ this . rateLimitConfig = {
67+ baseDelay : rateLimitConfig ?. baseDelay ?? 500 ,
68+ maxDelay : rateLimitConfig ?. maxDelay ?? 5000 ,
69+ adaptive : rateLimitConfig ?. adaptive ?? true ,
70+ maxConcurrent : rateLimitConfig ?. maxConcurrent ?? 1 ,
71+ }
72+ this . currentDelay = this . rateLimitConfig . baseDelay
73+
4674 let options : any = {
4775 version : WatsonxEmbedder . WATSONX_VERSION ,
4876 }
@@ -57,6 +85,11 @@ export class WatsonxEmbedder implements IEmbedder {
5785 throw new Error ( "Base URL is required for IBM Cloud Pak for Data" )
5886 }
5987
88+ // Validate URL format for Cloud Pak
89+ if ( ! this . isValidUrl ( baseUrl ) ) {
90+ throw new Error ( "Invalid URL format for IBM Cloud Pak for Data base URL" )
91+ }
92+
6093 if ( username ) {
6194 if ( password ) {
6295 options . authenticator = new CloudPakForDataAuthenticator ( {
@@ -104,17 +137,66 @@ export class WatsonxEmbedder implements IEmbedder {
104137 return knownDimensions [ modelId ] || 768
105138 }
106139
140+ /**
141+ * Validates if a string is a valid URL
142+ * @param url The URL string to validate
143+ * @returns True if the URL is valid, false otherwise
144+ */
145+ private isValidUrl ( url : string ) : boolean {
146+ try {
147+ const parsedUrl = new URL ( url )
148+ return parsedUrl . protocol === "http:" || parsedUrl . protocol === "https:"
149+ } catch {
150+ return false
151+ }
152+ }
153+
154+ /**
155+ * Implements adaptive rate limiting based on API responses
156+ * @param isRateLimited Whether the last request hit a rate limit
157+ */
158+ private adjustRateLimit ( isRateLimited : boolean ) {
159+ if ( ! this . rateLimitConfig . adaptive ) {
160+ return
161+ }
162+
163+ if ( isRateLimited ) {
164+ this . rateLimitHits ++
165+ // Exponential backoff when rate limited
166+ this . currentDelay = Math . min ( this . currentDelay * 2 , this . rateLimitConfig . maxDelay )
167+ } else if ( this . rateLimitHits > 0 ) {
168+ // Gradually reduce delay after successful requests
169+ this . rateLimitHits = Math . max ( 0 , this . rateLimitHits - 1 )
170+ if ( this . rateLimitHits === 0 ) {
171+ this . currentDelay = Math . max ( this . rateLimitConfig . baseDelay , this . currentDelay * 0.9 )
172+ }
173+ }
174+ }
175+
176+ /**
177+ * Waits for the appropriate delay before making the next request
178+ */
179+ private async waitForRateLimit ( ) {
180+ const now = Date . now ( )
181+ const timeSinceLastRequest = now - this . lastRequestTime
182+ const requiredDelay = this . currentDelay
183+
184+ if ( timeSinceLastRequest < requiredDelay ) {
185+ await delay ( requiredDelay - timeSinceLastRequest )
186+ }
187+
188+ this . lastRequestTime = Date . now ( )
189+ }
190+
107191 async createEmbeddings ( texts : string [ ] , model ?: string ) : Promise < EmbeddingResponse > {
108192 const MAX_RETRIES = 3
109- const INITIAL_DELAY_MS = 1000
110- const MAX_CONCURRENT_REQUESTS = 1
111- const REQUEST_DELAY_MS = 500
112193 const modelToUse = model || this . modelId
113194 const embeddings : number [ ] [ ] = [ ]
114195 let promptTokens = 0
115196 let totalTokens = 0
116- for ( let i = 0 ; i < texts . length ; i += MAX_CONCURRENT_REQUESTS ) {
117- const batch = texts . slice ( i , i + MAX_CONCURRENT_REQUESTS )
197+
198+ for ( let i = 0 ; i < texts . length ; i += this . rateLimitConfig . maxConcurrent ) {
199+ const batch = texts . slice ( i , i + this . rateLimitConfig . maxConcurrent )
118200 const batchResults = await Promise . all (
119201 batch . map ( async ( text , batchIndex ) => {
120202 const textIndex = i + batchIndex
@@ -135,7 +217,9 @@ export class WatsonxEmbedder implements IEmbedder {
135217 let lastError
136218 for ( let attempt = 0 ; attempt < MAX_RETRIES ; attempt ++ ) {
137219 try {
138- await delay ( 1000 )
220+ // Apply rate limiting
221+ await this . waitForRateLimit ( )
222+
139223 const response = await this . watsonxClient . embedText ( {
140224 modelId : modelToUse ,
141225 inputs : [ text ] ,
@@ -163,6 +247,8 @@ export class WatsonxEmbedder implements IEmbedder {
163247 }
164248
165249 const tokens = response . result . input_token_count || 0
250+ // Successful request, adjust rate limit if adaptive
251+ this . adjustRateLimit ( false )
166252 return { index : textIndex , embedding, tokens }
167253 } else {
168254 console . warn ( `No embedding results for text at index ${ textIndex } ` )
@@ -177,11 +263,22 @@ export class WatsonxEmbedder implements IEmbedder {
177263 } catch ( error ) {
178264 lastError = error
179265
266+ // Check if this is a rate limit error
267+ const isRateLimitError = this . isRateLimitError ( error )
268+ if ( isRateLimitError ) {
269+ this . adjustRateLimit ( true )
270+ }
271+
180272 if ( attempt < MAX_RETRIES - 1 ) {
181- const delayMs = INITIAL_DELAY_MS * Math . pow ( 2 , attempt )
273+ // Use adaptive delay if rate limited, otherwise exponential backoff
274+ const delayMs = isRateLimitError
275+ ? this . currentDelay
276+ : this . rateLimitConfig . baseDelay * Math . pow ( 2 , attempt )
277+
182278 console . warn (
183- `IBM watsonx API call failed, retrying in ${ delayMs } ms (attempt ${ attempt + 1 } /${ MAX_RETRIES } )` ,
279+ `IBM watsonx API call failed${ isRateLimitError ? " (rate limited)" : "" } , retrying in ${ delayMs } ms (attempt ${ attempt + 1 } /${ MAX_RETRIES } )` ,
184280 )
281+ await delay ( delayMs )
185282 }
186283 }
187284 }
@@ -194,8 +291,9 @@ export class WatsonxEmbedder implements IEmbedder {
194291 } ) ,
195292 )
196293
197- if ( i + MAX_CONCURRENT_REQUESTS < texts . length ) {
198- await new Promise ( ( resolve ) => setTimeout ( resolve , REQUEST_DELAY_MS * 2 ) )
294+ // Add delay between batches if not the last batch
295+ if ( i + this . rateLimitConfig . maxConcurrent < texts . length ) {
296+ await delay ( this . currentDelay )
199297 }
200298
201299 // Process batch results
@@ -218,6 +316,25 @@ export class WatsonxEmbedder implements IEmbedder {
218316 }
219317 }
220318
319+ /**
320+ * Checks if an error is a rate limit error
321+ * @param error The error to check
322+ * @returns True if the error is a rate limit error
323+ */
324+ private isRateLimitError ( error : any ) : boolean {
325+ if ( ! error ) return false
326+
327+ const errorMessage = error . message ?. toLowerCase ( ) || ""
328+ const errorCode = error . code || error . status || error . statusCode
329+
330+ return (
331+ errorCode === 429 ||
332+ errorMessage . includes ( "rate limit" ) ||
333+ errorMessage . includes ( "too many requests" ) ||
334+ errorMessage . includes ( "quota exceeded" )
335+ )
336+ }
337+
221338 /**
222339 * Validates the watsonx embedder configuration by testing the API key and connection
223340 * @returns Promise resolving to validation result with success status and optional error message
0 commit comments