Skip to content

Commit 97b48d7

Browse files
committed
feat: improve watsonx rate limiting and type safety
- Add configurable rate limiting with adaptive backoff strategy - Replace any types with proper TypeScript interfaces - Add URL validation for Cloud Pak platform - Improve error handling for rate limit scenarios - Remove unrelated privacy page formatting changes
1 parent e5623b0 commit 97b48d7

File tree

2 files changed

+216
-26
lines changed

2 files changed

+216
-26
lines changed

src/api/providers/watsonx.ts

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import * as vscode from "vscode"
22
import { Anthropic } from "@anthropic-ai/sdk"
3-
import { ModelInfo, watsonxAiDefaultModelId, watsonxAiModels, WatsonxAIModelId } from "@roo-code/types"
3+
import { ModelInfo, watsonxAiDefaultModelId, watsonxAiModels, WatsonxAIModelId, baseModelInfo } from "@roo-code/types"
44
import type { ApiHandlerOptions } from "../../shared/api"
55
import { IamAuthenticator, CloudPakForDataAuthenticator } from "ibm-cloud-sdk-core"
66
import { ApiStream } from "../transform/stream"
@@ -9,25 +9,67 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
99
import { WatsonXAI } from "@ibm-cloud/watsonx-ai"
1010
import { convertToWatsonxAiMessages } from "../transform/watsonxai-format"
1111

12+
/**
13+
* Extended API handler options for Watson X AI
14+
*/
15+
interface WatsonxApiHandlerOptions extends ApiHandlerOptions {
16+
watsonxProjectId?: string
17+
watsonxBaseUrl?: string
18+
watsonxPlatform?: "ibmCloud" | "cloudPak"
19+
watsonxUsername?: string
20+
watsonxPassword?: string
21+
watsonxAuthType?: "apiKey" | "password"
22+
watsonxApiKey?: string
23+
watsonxModelId?: string
24+
}
25+
26+
/**
27+
* Watson X service configuration options
28+
*/
29+
interface WatsonxServiceOptions {
30+
version: string
31+
serviceUrl: string
32+
authenticator: IamAuthenticator | CloudPakForDataAuthenticator
33+
}
34+
35+
/**
36+
* Watson X message format
37+
*/
38+
interface WatsonxMessage {
39+
role: string
40+
content: string
41+
}
42+
43+
/**
44+
* Watson X text chat parameters
45+
*/
46+
interface WatsonxTextChatParams {
47+
projectId: string
48+
modelId: string
49+
messages: WatsonxMessage[]
50+
maxTokens: number
51+
temperature: number
52+
}
53+
1254
export class WatsonxAIHandler extends BaseProvider implements SingleCompletionHandler {
13-
private options: ApiHandlerOptions
14-
private projectId?: string
55+
private options: WatsonxApiHandlerOptions
56+
private projectId: string
1557
private service: WatsonXAI
1658

1759
constructor(options: ApiHandlerOptions) {
1860
super()
19-
this.options = options
61+
this.options = options as WatsonxApiHandlerOptions
2062

21-
this.projectId = (this.options as any).watsonxProjectId
22-
if (!this.projectId) {
63+
if (!this.options.watsonxProjectId) {
2364
throw new Error("You must provide a valid IBM watsonx project ID.")
2465
}
66+
this.projectId = this.options.watsonxProjectId
2567

26-
const serviceUrl = (this.options as any).watsonxBaseUrl
27-
const platform = (this.options as any).watsonxPlatform
68+
const serviceUrl = this.options.watsonxBaseUrl
69+
const platform = this.options.watsonxPlatform || "ibmCloud"
2870

2971
try {
30-
const serviceOptions: any = {
72+
const serviceOptions: Partial<WatsonxServiceOptions> = {
3173
version: "2024-05-31",
3274
serviceUrl: serviceUrl,
3375
}
@@ -93,7 +135,11 @@ export class WatsonxAIHandler extends BaseProvider implements SingleCompletionHa
93135
* @param messages - The messages to send
94136
* @returns The parameters object for the API call
95137
*/
96-
private createTextChatParams(projectId: string, modelId: string, messages: any[]) {
138+
private createTextChatParams(
139+
projectId: string,
140+
modelId: string,
141+
messages: WatsonxMessage[],
142+
): WatsonxTextChatParams {
97143
const maxTokens = this.options.modelMaxTokens || 2048
98144
const temperature = this.options.modelTemperature || 0.7
99145
return {
@@ -122,9 +168,33 @@ export class WatsonxAIHandler extends BaseProvider implements SingleCompletionHa
122168

123169
try {
124170
// Convert messages to WatsonX format with system prompt
125-
const watsonxMessages = [{ role: "system", content: systemPrompt }, ...convertToWatsonxAiMessages(messages)]
171+
const convertedMessages = convertToWatsonxAiMessages(messages)
172+
// Ensure all messages have string content
173+
const watsonxMessages: WatsonxMessage[] = [{ role: "system", content: systemPrompt }]
174+
175+
for (const msg of convertedMessages) {
176+
let content = ""
177+
if (typeof msg.content === "string") {
178+
content = msg.content
179+
} else if (Array.isArray(msg.content)) {
180+
content = msg.content
181+
.map((part) => {
182+
if (typeof part === "string") {
183+
return part
184+
} else if ("text" in part) {
185+
return part.text
186+
}
187+
return ""
188+
})
189+
.join("")
190+
}
191+
watsonxMessages.push({
192+
role: msg.role,
193+
content: content,
194+
})
195+
}
126196

127-
const params = this.createTextChatParams(this.projectId!, modelId, watsonxMessages)
197+
const params = this.createTextChatParams(this.projectId, modelId, watsonxMessages)
128198
let responseText = ""
129199

130200
// Call the IBM watsonx API using textChat (non-streaming); can be changed to streaming..
@@ -160,8 +230,8 @@ export class WatsonxAIHandler extends BaseProvider implements SingleCompletionHa
160230
async completePrompt(prompt: string): Promise<string> {
161231
try {
162232
const { id: modelId } = this.getModel()
163-
const messages = [{ role: "user", content: prompt }]
164-
const params = this.createTextChatParams(this.projectId!, modelId, messages)
233+
const messages: WatsonxMessage[] = [{ role: "user", content: prompt }]
234+
const params = this.createTextChatParams(this.projectId, modelId, messages)
165235
const response = await this.service.textChat(params)
166236

167237
if (!response?.result?.choices?.[0]?.message?.content) {
@@ -179,9 +249,12 @@ export class WatsonxAIHandler extends BaseProvider implements SingleCompletionHa
179249
* @returns An object containing the model ID and model information
180250
*/
181251
override getModel(): { id: string; info: ModelInfo } {
252+
const modelId = this.options.watsonxModelId || watsonxAiDefaultModelId
253+
const modelInfo =
254+
modelId && modelId in watsonxAiModels ? watsonxAiModels[modelId as WatsonxAIModelId] : baseModelInfo
182255
return {
183-
id: (this.options as any).watsonxModelId || watsonxAiDefaultModelId,
184-
info: watsonxAiModels[(this.options as any).watsonxModelId as WatsonxAIModelId] || "",
256+
id: modelId,
257+
info: modelInfo,
185258
}
186259
}
187260
}

src/services/code-index/embedders/watsonx.ts

Lines changed: 127 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@ import { t } from "../../../i18n"
44
import { WatsonXAI } from "@ibm-cloud/watsonx-ai"
55
import { IamAuthenticator, CloudPakForDataAuthenticator } from "ibm-cloud-sdk-core"
66

7+
/**
8+
* Configuration for rate limiting
9+
*/
10+
interface RateLimitConfig {
11+
/** Base delay between requests in milliseconds */
12+
baseDelay: number
13+
/** Maximum delay between requests in milliseconds */
14+
maxDelay: number
15+
/** Whether to use adaptive rate limiting based on API responses */
16+
adaptive: boolean
17+
/** Maximum concurrent requests */
18+
maxConcurrent: number
19+
}
20+
721
/**
822
* IBM watsonx embedder implementation using the native IBM Cloud watsonx.ai package.
923
*
@@ -15,6 +29,10 @@ export class WatsonxEmbedder implements IEmbedder {
1529
private static readonly DEFAULT_MODEL = "ibm/slate-125m-english-rtrvr-v2"
1630
private readonly modelId: string
1731
private readonly projectId?: string
32+
private readonly rateLimitConfig: RateLimitConfig
33+
private currentDelay: number
34+
private lastRequestTime: number = 0
35+
private rateLimitHits: number = 0
1836

1937
/**
2038
* Creates a new watsonx embedder
@@ -36,13 +54,23 @@ export class WatsonxEmbedder implements IEmbedder {
3654
region: string = "us-south",
3755
username?: string,
3856
password?: string,
57+
rateLimitConfig?: Partial<RateLimitConfig>,
3958
) {
4059
if (!apiKey && !(username && password)) {
4160
throw new Error(t("embeddings:validation.apiKeyRequired"))
4261
}
4362
this.modelId = modelId || WatsonxEmbedder.DEFAULT_MODEL
4463
this.projectId = projectId
4564

65+
// Initialize rate limit configuration with defaults
66+
this.rateLimitConfig = {
67+
baseDelay: rateLimitConfig?.baseDelay ?? 500,
68+
maxDelay: rateLimitConfig?.maxDelay ?? 5000,
69+
adaptive: rateLimitConfig?.adaptive ?? true,
70+
maxConcurrent: rateLimitConfig?.maxConcurrent ?? 1,
71+
}
72+
this.currentDelay = this.rateLimitConfig.baseDelay
73+
4674
let options: any = {
4775
version: WatsonxEmbedder.WATSONX_VERSION,
4876
}
@@ -57,6 +85,11 @@ export class WatsonxEmbedder implements IEmbedder {
5785
throw new Error("Base URL is required for IBM Cloud Pak for Data")
5886
}
5987

88+
// Validate URL format for Cloud Pak
89+
if (!this.isValidUrl(baseUrl)) {
90+
throw new Error("Invalid URL format for IBM Cloud Pak for Data base URL")
91+
}
92+
6093
if (username) {
6194
if (password) {
6295
options.authenticator = new CloudPakForDataAuthenticator({
@@ -104,17 +137,66 @@ export class WatsonxEmbedder implements IEmbedder {
104137
return knownDimensions[modelId] || 768
105138
}
106139

140+
/**
141+
* Validates if a string is a valid URL
142+
* @param url The URL string to validate
143+
* @returns True if the URL is valid, false otherwise
144+
*/
145+
private isValidUrl(url: string): boolean {
146+
try {
147+
const parsedUrl = new URL(url)
148+
return parsedUrl.protocol === "http:" || parsedUrl.protocol === "https:"
149+
} catch {
150+
return false
151+
}
152+
}
153+
154+
/**
155+
* Implements adaptive rate limiting based on API responses
156+
* @param isRateLimited Whether the last request hit a rate limit
157+
*/
158+
private adjustRateLimit(isRateLimited: boolean) {
159+
if (!this.rateLimitConfig.adaptive) {
160+
return
161+
}
162+
163+
if (isRateLimited) {
164+
this.rateLimitHits++
165+
// Exponential backoff when rate limited
166+
this.currentDelay = Math.min(this.currentDelay * 2, this.rateLimitConfig.maxDelay)
167+
} else if (this.rateLimitHits > 0) {
168+
// Gradually reduce delay after successful requests
169+
this.rateLimitHits = Math.max(0, this.rateLimitHits - 1)
170+
if (this.rateLimitHits === 0) {
171+
this.currentDelay = Math.max(this.rateLimitConfig.baseDelay, this.currentDelay * 0.9)
172+
}
173+
}
174+
}
175+
176+
/**
177+
* Waits for the appropriate delay before making the next request
178+
*/
179+
private async waitForRateLimit() {
180+
const now = Date.now()
181+
const timeSinceLastRequest = now - this.lastRequestTime
182+
const requiredDelay = this.currentDelay
183+
184+
if (timeSinceLastRequest < requiredDelay) {
185+
await delay(requiredDelay - timeSinceLastRequest)
186+
}
187+
188+
this.lastRequestTime = Date.now()
189+
}
190+
107191
async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> {
108192
const MAX_RETRIES = 3
109-
const INITIAL_DELAY_MS = 1000
110-
const MAX_CONCURRENT_REQUESTS = 1
111-
const REQUEST_DELAY_MS = 500
112193
const modelToUse = model || this.modelId
113194
const embeddings: number[][] = []
114195
let promptTokens = 0
115196
let totalTokens = 0
116-
for (let i = 0; i < texts.length; i += MAX_CONCURRENT_REQUESTS) {
117-
const batch = texts.slice(i, i + MAX_CONCURRENT_REQUESTS)
197+
198+
for (let i = 0; i < texts.length; i += this.rateLimitConfig.maxConcurrent) {
199+
const batch = texts.slice(i, i + this.rateLimitConfig.maxConcurrent)
118200
const batchResults = await Promise.all(
119201
batch.map(async (text, batchIndex) => {
120202
const textIndex = i + batchIndex
@@ -135,7 +217,9 @@ export class WatsonxEmbedder implements IEmbedder {
135217
let lastError
136218
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
137219
try {
138-
await delay(1000)
220+
// Apply rate limiting
221+
await this.waitForRateLimit()
222+
139223
const response = await this.watsonxClient.embedText({
140224
modelId: modelToUse,
141225
inputs: [text],
@@ -163,6 +247,8 @@ export class WatsonxEmbedder implements IEmbedder {
163247
}
164248

165249
const tokens = response.result.input_token_count || 0
250+
// Successful request, adjust rate limit if adaptive
251+
this.adjustRateLimit(false)
166252
return { index: textIndex, embedding, tokens }
167253
} else {
168254
console.warn(`No embedding results for text at index ${textIndex}`)
@@ -177,11 +263,22 @@ export class WatsonxEmbedder implements IEmbedder {
177263
} catch (error) {
178264
lastError = error
179265

266+
// Check if this is a rate limit error
267+
const isRateLimitError = this.isRateLimitError(error)
268+
if (isRateLimitError) {
269+
this.adjustRateLimit(true)
270+
}
271+
180272
if (attempt < MAX_RETRIES - 1) {
181-
const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempt)
273+
// Use adaptive delay if rate limited, otherwise exponential backoff
274+
const delayMs = isRateLimitError
275+
? this.currentDelay
276+
: this.rateLimitConfig.baseDelay * Math.pow(2, attempt)
277+
182278
console.warn(
183-
`IBM watsonx API call failed, retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`,
279+
`IBM watsonx API call failed${isRateLimitError ? " (rate limited)" : ""}, retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`,
184280
)
281+
await delay(delayMs)
185282
}
186283
}
187284
}
@@ -194,8 +291,9 @@ export class WatsonxEmbedder implements IEmbedder {
194291
}),
195292
)
196293

197-
if (i + MAX_CONCURRENT_REQUESTS < texts.length) {
198-
await new Promise((resolve) => setTimeout(resolve, REQUEST_DELAY_MS * 2))
294+
// Add delay between batches if not the last batch
295+
if (i + this.rateLimitConfig.maxConcurrent < texts.length) {
296+
await delay(this.currentDelay)
199297
}
200298

201299
// Process batch results
@@ -218,6 +316,25 @@ export class WatsonxEmbedder implements IEmbedder {
218316
}
219317
}
220318

319+
/**
320+
* Checks if an error is a rate limit error
321+
* @param error The error to check
322+
* @returns True if the error is a rate limit error
323+
*/
324+
private isRateLimitError(error: any): boolean {
325+
if (!error) return false
326+
327+
const errorMessage = error.message?.toLowerCase() || ""
328+
const errorCode = error.code || error.status || error.statusCode
329+
330+
return (
331+
errorCode === 429 ||
332+
errorMessage.includes("rate limit") ||
333+
errorMessage.includes("too many requests") ||
334+
errorMessage.includes("quota exceeded")
335+
)
336+
}
337+
221338
/**
222339
* Validates the watsonx embedder configuration by testing the API key and connection
223340
* @returns Promise resolving to validation result with success status and optional error message

0 commit comments

Comments
 (0)