From f2255d900d7bdfd78b560e4a8330e68836f0e5a1 Mon Sep 17 00:00:00 2001
From: hannesrudolph <hrudolph@gmail.com>
Date: Wed, 2 Jul 2025 13:23:35 -0600
Subject: [PATCH 1/2] fix: prevent indexer hanging on OpenAI insufficient quota
 errors (#5350)

---
 src/i18n/locales/ca/embeddings.json           |  2 +
 src/i18n/locales/de/embeddings.json           |  2 +
 src/i18n/locales/en/embeddings.json           |  2 +
 src/i18n/locales/es/embeddings.json           |  2 +
 src/i18n/locales/fr/embeddings.json           |  2 +
 src/i18n/locales/hi/embeddings.json           |  2 +
 src/i18n/locales/id/embeddings.json           |  2 +
 src/i18n/locales/it/embeddings.json           |  2 +
 src/i18n/locales/ja/embeddings.json           |  2 +
 src/i18n/locales/ko/embeddings.json           |  2 +
 src/i18n/locales/nl/embeddings.json           |  2 +
 src/i18n/locales/pl/embeddings.json           |  2 +
 src/i18n/locales/pt-BR/embeddings.json        |  2 +
 src/i18n/locales/ru/embeddings.json           |  2 +
 src/i18n/locales/tr/embeddings.json           |  2 +
 src/i18n/locales/vi/embeddings.json           |  2 +
 src/i18n/locales/zh-CN/embeddings.json        |  2 +
 src/i18n/locales/zh-TW/embeddings.json        |  2 +
 .../__tests__/openai-compatible.spec.ts       | 44 +++++++++++++++++++
 .../embedders/__tests__/openai.spec.ts        | 40 +++++++++++++++++
 .../code-index/embedders/openai-compatible.ts | 35 ++++++++++++++-
 src/services/code-index/embedders/openai.ts   | 35 ++++++++++++++-
 22 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/src/i18n/locales/ca/embeddings.json b/src/i18n/locales/ca/embeddings.json
index 3302ff7acd..c77c3c50cf 100644
--- a/src/i18n/locales/ca/embeddings.json
+++ b/src/i18n/locales/ca/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "No s'han pogut crear les incrustacions: ha fallat l'autenticació. Comproveu la vostra clau d'API.",
 	"failedWithStatus": "No s'han pogut crear les incrustacions després de {{attempts}} intents: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "No s'han pogut crear les incrustacions després de {{attempts}} intents: {{errorMessage}}",
+	"insufficientQuota": "No s'han pogut crear les incrustacions: Quota insuficient. Comproveu el saldo del vostre compte OpenAI i afegiu crèdits per continuar.",
 	"failedMaxAttempts": "No s'han pogut crear les incrustacions després de {{attempts}} intents",
 	"textExceedsTokenLimit": "El text a l'índex {{index}} supera el límit màxim de testimonis ({{itemTokens}} > {{maxTokens}}). S'està ometent.",
+	"textWithPrefixExceedsTokenLimit": "El text a l'índex {{index}} amb prefix supera el límit màxim de tokens ({{estimatedTokens}} > {{maxTokens}}). No s'afegirà el prefix.",
 	"rateLimitRetry": "S'ha assolit el límit de velocitat, es torna a intentar en {{delayMs}}ms (intent {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "No s'ha pogut llegir el cos de l'error",
diff --git a/src/i18n/locales/de/embeddings.json b/src/i18n/locales/de/embeddings.json
index 300899fd1b..5536155ff2 100644
--- a/src/i18n/locales/de/embeddings.json
+++ b/src/i18n/locales/de/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Erstellung von Einbettungen fehlgeschlagen: Authentifizierung fehlgeschlagen. Bitte überprüfe deinen API-Schlüssel.",
 	"failedWithStatus": "Erstellung von Einbettungen nach {{attempts}} Versuchen fehlgeschlagen: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Erstellung von Einbettungen nach {{attempts}} Versuchen fehlgeschlagen: {{errorMessage}}",
+	"insufficientQuota": "Erstellung von Einbettungen fehlgeschlagen: Unzureichendes Kontingent. Bitte überprüfen Sie Ihr OpenAI-Kontoguthaben und fügen Sie Credits hinzu, um fortzufahren.",
 	"failedMaxAttempts": "Erstellung von Einbettungen nach {{attempts}} Versuchen fehlgeschlagen",
 	"textExceedsTokenLimit": "Text bei Index {{index}} überschreitet das maximale Token-Limit ({{itemTokens}} > {{maxTokens}}). Wird übersprungen.",
+	"textWithPrefixExceedsTokenLimit": "Text bei Index {{index}} mit Präfix überschreitet das maximale Token-Limit ({{estimatedTokens}} > {{maxTokens}}). Präfix wird nicht hinzugefügt.",
 	"rateLimitRetry": "Ratenlimit erreicht, Wiederholung in {{delayMs}}ms (Versuch {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Fehlerinhalt konnte nicht gelesen werden",
diff --git a/src/i18n/locales/en/embeddings.json b/src/i18n/locales/en/embeddings.json
index e57f3de0e8..cbe1c3b1e9 100644
--- a/src/i18n/locales/en/embeddings.json
+++ b/src/i18n/locales/en/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Failed to create embeddings: Authentication failed. Please check your API key.",
 	"failedWithStatus": "Failed to create embeddings after {{attempts}} attempts: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Failed to create embeddings after {{attempts}} attempts: {{errorMessage}}",
+	"insufficientQuota": "Failed to create embeddings: Insufficient quota. Please check your OpenAI account balance and add credits to continue.",
 	"failedMaxAttempts": "Failed to create embeddings after {{attempts}} attempts",
 	"textExceedsTokenLimit": "Text at index {{index}} exceeds maximum token limit ({{itemTokens}} > {{maxTokens}}). Skipping.",
+	"textWithPrefixExceedsTokenLimit": "Text at index {{index}} with prefix exceeds maximum token limit ({{estimatedTokens}} > {{maxTokens}}). Not adding prefix.",
 	"rateLimitRetry": "Rate limit hit, retrying in {{delayMs}}ms (attempt {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Could not read error body",
diff --git a/src/i18n/locales/es/embeddings.json b/src/i18n/locales/es/embeddings.json
index c2d7795362..cb1142b0a1 100644
--- a/src/i18n/locales/es/embeddings.json
+++ b/src/i18n/locales/es/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "No se pudieron crear las incrustaciones: Error de autenticación. Comprueba tu clave de API.",
 	"failedWithStatus": "No se pudieron crear las incrustaciones después de {{attempts}} intentos: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "No se pudieron crear las incrustaciones después de {{attempts}} intentos: {{errorMessage}}",
+	"insufficientQuota": "No se pudieron crear las incrustaciones: Cuota insuficiente. Por favor, verifique el saldo de su cuenta OpenAI y agregue créditos para continuar.",
 	"failedMaxAttempts": "No se pudieron crear las incrustaciones después de {{attempts}} intentos",
 	"textExceedsTokenLimit": "El texto en el índice {{index}} supera el límite máximo de tokens ({{itemTokens}} > {{maxTokens}}). Omitiendo.",
+	"textWithPrefixExceedsTokenLimit": "El texto en el índice {{index}} con prefijo excede el límite máximo de tokens ({{estimatedTokens}} > {{maxTokens}}). No se agrega el prefijo.",
 	"rateLimitRetry": "Límite de velocidad alcanzado, reintentando en {{delayMs}}ms (intento {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "No se pudo leer el cuerpo del error",
diff --git a/src/i18n/locales/fr/embeddings.json b/src/i18n/locales/fr/embeddings.json
index 4dbbe6218b..a2a99c4f5d 100644
--- a/src/i18n/locales/fr/embeddings.json
+++ b/src/i18n/locales/fr/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Échec de la création des embeddings : Échec de l'authentification. Veuillez vérifier votre clé API.",
 	"failedWithStatus": "Échec de la création des embeddings après {{attempts}} tentatives : HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Échec de la création des embeddings après {{attempts}} tentatives : {{errorMessage}}",
+	"insufficientQuota": "Échec de la création des embeddings : Quota insuffisant. Veuillez vérifier le solde de votre compte OpenAI et ajouter des crédits pour continuer.",
 	"failedMaxAttempts": "Échec de la création des embeddings après {{attempts}} tentatives",
 	"textExceedsTokenLimit": "Le texte à l'index {{index}} dépasse la limite maximale de tokens ({{itemTokens}} > {{maxTokens}}). Ignoré.",
+	"textWithPrefixExceedsTokenLimit": "Le texte à l'index {{index}} avec préfixe dépasse la limite maximale de jetons ({{estimatedTokens}} > {{maxTokens}}). Ne pas ajouter de préfixe.",
 	"rateLimitRetry": "Limite de débit atteinte, nouvelle tentative dans {{delayMs}}ms (tentative {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Impossible de lire le corps de l'erreur",
diff --git a/src/i18n/locales/hi/embeddings.json b/src/i18n/locales/hi/embeddings.json
index 312d42e69c..4aee3893e2 100644
--- a/src/i18n/locales/hi/embeddings.json
+++ b/src/i18n/locales/hi/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "एम्बेडिंग बनाने में विफल: प्रमाणीकरण विफल। कृपया अपनी एपीआई कुंजी जांचें।",
 	"failedWithStatus": "{{attempts}} प्रयासों के बाद एम्बेडिंग बनाने में विफल: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "{{attempts}} प्रयासों के बाद एम्बेडिंग बनाने में विफल: {{errorMessage}}",
+	"insufficientQuota": "एम्बेडिंग बनाने में विफल: अपर्याप्त कोटा। कृपया अपने OpenAI खाते की शेष राशि जांचें और जारी रखने के लिए क्रेडिट जोड़ें।",
 	"failedMaxAttempts": "{{attempts}} प्रयासों के बाद एम्बेडिंग बनाने में विफल",
 	"textExceedsTokenLimit": "अनुक्रमणिका {{index}} पर पाठ अधिकतम टोकन सीमा ({{itemTokens}} > {{maxTokens}}) से अधिक है। छोड़ा जा रहा है।",
+	"textWithPrefixExceedsTokenLimit": "उपसर्ग के साथ इंडेक्स {{index}} पर टेक्स्ट अधिकतम टोकन सीमा ({{estimatedTokens}} > {{maxTokens}}) से अधिक है। उपसर्ग नहीं जोड़ा जा रहा है।",
 	"rateLimitRetry": "दर सीमा समाप्त, {{delayMs}}ms में पुन: प्रयास किया जा रहा है (प्रयास {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "त्रुटि सामग्री पढ़ नहीं सका",
diff --git a/src/i18n/locales/id/embeddings.json b/src/i18n/locales/id/embeddings.json
index abfa9cb354..792cb6f6ec 100644
--- a/src/i18n/locales/id/embeddings.json
+++ b/src/i18n/locales/id/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Gagal membuat embeddings: Autentikasi gagal. Silakan periksa API key Anda.",
 	"failedWithStatus": "Gagal membuat embeddings setelah {{attempts}} percobaan: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Gagal membuat embeddings setelah {{attempts}} percobaan: {{errorMessage}}",
+	"insufficientQuota": "Gagal membuat embeddings: Kuota tidak mencukupi. Silakan periksa saldo akun OpenAI Anda dan tambahkan kredit untuk melanjutkan.",
 	"failedMaxAttempts": "Gagal membuat embeddings setelah {{attempts}} percobaan",
 	"textExceedsTokenLimit": "Teks pada indeks {{index}} melebihi batas maksimum token ({{itemTokens}} > {{maxTokens}}). Dilewati.",
+	"textWithPrefixExceedsTokenLimit": "Teks di indeks {{index}} dengan awalan melebihi batas token maksimum ({{estimatedTokens}} > {{maxTokens}}). Tidak menambahkan awalan.",
 	"rateLimitRetry": "Batas rate tercapai, mencoba lagi dalam {{delayMs}}ms (percobaan {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Tidak dapat membaca body error",
diff --git a/src/i18n/locales/it/embeddings.json b/src/i18n/locales/it/embeddings.json
index 5bd7164886..ef009ad952 100644
--- a/src/i18n/locales/it/embeddings.json
+++ b/src/i18n/locales/it/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Creazione degli embedding non riuscita: Autenticazione fallita. Controlla la tua chiave API.",
 	"failedWithStatus": "Creazione degli embedding non riuscita dopo {{attempts}} tentativi: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Creazione degli embedding non riuscita dopo {{attempts}} tentativi: {{errorMessage}}",
+	"insufficientQuota": "Creazione degli embedding non riuscita: Quota insufficiente. Controlla il saldo del tuo account OpenAI e aggiungi crediti per continuare.",
 	"failedMaxAttempts": "Creazione degli embedding non riuscita dopo {{attempts}} tentativi",
 	"textExceedsTokenLimit": "Il testo all'indice {{index}} supera il limite massimo di token ({{itemTokens}} > {{maxTokens}}). Saltato.",
+	"textWithPrefixExceedsTokenLimit": "Il testo all'indice {{index}} con prefisso supera il limite massimo di token ({{estimatedTokens}} > {{maxTokens}}). Non aggiungo il prefisso.",
 	"rateLimitRetry": "Limite di velocità raggiunto, nuovo tentativo tra {{delayMs}}ms (tentativo {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Impossibile leggere il corpo dell'errore",
diff --git a/src/i18n/locales/ja/embeddings.json b/src/i18n/locales/ja/embeddings.json
index 862270a364..dffbb54712 100644
--- a/src/i18n/locales/ja/embeddings.json
+++ b/src/i18n/locales/ja/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "埋め込みの作成に失敗しました：認証に失敗しました。APIキーを確認してください。",
 	"failedWithStatus": "{{attempts}}回試行しましたが、埋め込みの作成に失敗しました：HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "{{attempts}}回試行しましたが、埋め込みの作成に失敗しました：{{errorMessage}}",
+	"insufficientQuota": "埋め込みの作成に失敗しました：クォータが不足しています。OpenAIアカウントの残高を確認し、続行するためにクレジットを追加してください。",
 	"failedMaxAttempts": "{{attempts}}回試行しましたが、埋め込みの作成に失敗しました",
 	"textExceedsTokenLimit": "インデックス{{index}}のテキストが最大トークン制限を超えています（{{itemTokens}}> {{maxTokens}}）。スキップします。",
+	"textWithPrefixExceedsTokenLimit": "インデックス {{index}} のプレフィックス付きテキストが最大トークン制限 ({{estimatedTokens}} > {{maxTokens}}) を超えています。プレフィिक्सを追加しません。",
 	"rateLimitRetry": "レート制限に達しました。{{delayMs}}ミリ秒後に再試行します（試行{{attempt}}/{{maxRetries}}）",
 	"ollama": {
 		"couldNotReadErrorBody": "エラー本文を読み取れませんでした",
diff --git a/src/i18n/locales/ko/embeddings.json b/src/i18n/locales/ko/embeddings.json
index 37877bfa97..4a296a694f 100644
--- a/src/i18n/locales/ko/embeddings.json
+++ b/src/i18n/locales/ko/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "임베딩 생성 실패: 인증에 실패했습니다. API 키를 확인하세요.",
 	"failedWithStatus": "{{attempts}}번 시도 후 임베딩 생성 실패: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "{{attempts}}번 시도 후 임베딩 생성 실패: {{errorMessage}}",
+	"insufficientQuota": "임베딩 생성 실패: 할당량이 부족합니다. OpenAI 계정 잔액을 확인하고 계속하려면 크레딧을 추가하세요.",
 	"failedMaxAttempts": "{{attempts}}번 시도 후 임베딩 생성 실패",
 	"textExceedsTokenLimit": "인덱스 {{index}}의 텍스트가 최대 토큰 제한({{itemTokens}} > {{maxTokens}})을 초과했습니다. 건너뜁니다.",
+	"textWithPrefixExceedsTokenLimit": "접두사가 있는 인덱스 {{index}}의 텍스트가 최대 토큰 제한({{estimatedTokens}} > {{maxTokens}})을 초과했습니다. 접두사를 추가하지 않습니다.",
 	"rateLimitRetry": "속도 제한에 도달했습니다. {{delayMs}}ms 후에 다시 시도합니다(시도 {{attempt}}/{{maxRetries}}).",
 	"ollama": {
 		"couldNotReadErrorBody": "오류 본문을 읽을 수 없습니다",
diff --git a/src/i18n/locales/nl/embeddings.json b/src/i18n/locales/nl/embeddings.json
index 7256b0973b..d69e901925 100644
--- a/src/i18n/locales/nl/embeddings.json
+++ b/src/i18n/locales/nl/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Insluitingen maken mislukt: Authenticatie mislukt. Controleer je API-sleutel.",
 	"failedWithStatus": "Insluitingen maken mislukt na {{attempts}} pogingen: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Insluitingen maken mislukt na {{attempts}} pogingen: {{errorMessage}}",
+	"insufficientQuota": "Kan geen embeddings maken: Onvoldoende quota. Controleer uw OpenAI-accountsaldo en voeg tegoeden toe om door te gaan.",
 	"failedMaxAttempts": "Insluitingen maken mislukt na {{attempts}} pogingen",
 	"textExceedsTokenLimit": "Tekst op index {{index}} overschrijdt de maximale tokenlimiet ({{itemTokens}} > {{maxTokens}}). Wordt overgeslagen.",
+	"textWithPrefixExceedsTokenLimit": "Tekst op index {{index}} met prefix overschrijdt de maximale tokenlimiet ({{estimatedTokens}} > {{maxTokens}}). Prefix wordt niet toegevoegd.",
 	"rateLimitRetry": "Snelheidslimiet bereikt, opnieuw proberen over {{delayMs}}ms (poging {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Kon foutinhoud niet lezen",
diff --git a/src/i18n/locales/pl/embeddings.json b/src/i18n/locales/pl/embeddings.json
index c3e160869b..199e02d79c 100644
--- a/src/i18n/locales/pl/embeddings.json
+++ b/src/i18n/locales/pl/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Nie udało się utworzyć osadzeń: Uwierzytelnianie nie powiodło się. Sprawdź swój klucz API.",
 	"failedWithStatus": "Nie udało się utworzyć osadzeń po {{attempts}} próbach: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Nie udało się utworzyć osadzeń po {{attempts}} próbach: {{errorMessage}}",
+	"insufficientQuota": "Nie udało się utworzyć osadzeń: Niewystarczający limit. Sprawdź saldo konta OpenAI i dodaj środki, aby kontynuować.",
 	"failedMaxAttempts": "Nie udało się utworzyć osadzeń po {{attempts}} próbach",
 	"textExceedsTokenLimit": "Tekst w indeksie {{index}} przekracza maksymalny limit tokenów ({{itemTokens}} > {{maxTokens}}). Pomijanie.",
+	"textWithPrefixExceedsTokenLimit": "Tekst w indeksie {{index}} z prefiksem przekracza maksymalny limit tokenów ({{estimatedTokens}} > {{maxTokens}}). Nie dodaję prefiksu.",
 	"rateLimitRetry": "Osiągnięto limit szybkości, ponawianie za {{delayMs}}ms (próba {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Nie można odczytać treści błędu",
diff --git a/src/i18n/locales/pt-BR/embeddings.json b/src/i18n/locales/pt-BR/embeddings.json
index 6b97475265..7300ffd5a6 100644
--- a/src/i18n/locales/pt-BR/embeddings.json
+++ b/src/i18n/locales/pt-BR/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Falha ao criar embeddings: Falha na autenticação. Verifique sua chave de API.",
 	"failedWithStatus": "Falha ao criar embeddings após {{attempts}} tentativas: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Falha ao criar embeddings após {{attempts}} tentativas: {{errorMessage}}",
+	"insufficientQuota": "Falha ao criar embeddings: Quota insuficiente. Verifique o saldo da sua conta OpenAI e adicione créditos para continuar.",
 	"failedMaxAttempts": "Falha ao criar embeddings após {{attempts}} tentativas",
 	"textExceedsTokenLimit": "O texto no índice {{index}} excede o limite máximo de tokens ({{itemTokens}} > {{maxTokens}}). Ignorando.",
+	"textWithPrefixExceedsTokenLimit": "O texto no índice {{index}} com prefixo excede o limite máximo de tokens ({{estimatedTokens}} > {{maxTokens}}). Não adicionando prefixo.",
 	"rateLimitRetry": "Limite de taxa atingido, tentando novamente em {{delayMs}}ms (tentativa {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Não foi possível ler o corpo do erro",
diff --git a/src/i18n/locales/ru/embeddings.json b/src/i18n/locales/ru/embeddings.json
index c6143816e8..ae9f036232 100644
--- a/src/i18n/locales/ru/embeddings.json
+++ b/src/i18n/locales/ru/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Не удалось создать вложения: Ошибка аутентификации. Проверьте свой ключ API.",
 	"failedWithStatus": "Не удалось создать вложения после {{attempts}} попыток: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Не удалось создать вложения после {{attempts}} попыток: {{errorMessage}}",
+	"insufficientQuota": "Не удалось создать вложения: Недостаточно квоты. Пожалуйста, проверьте баланс вашей учетной записи OpenAI и добавьте кредиты, чтобы продолжить.",
 	"failedMaxAttempts": "Не удалось создать вложения после {{attempts}} попыток",
 	"textExceedsTokenLimit": "Текст в индексе {{index}} превышает максимальный лимит токенов ({{itemTokens}} > {{maxTokens}}). Пропускается.",
+	"textWithPrefixExceedsTokenLimit": "Текст по индексу {{index}} с префиксом превышает максимальный лимит токенов ({{estimatedTokens}} > {{maxTokens}}). Не добавляю префикс.",
 	"rateLimitRetry": "Достигнут лимит скорости, повторная попытка через {{delayMs}} мс (попытка {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Не удалось прочитать тело ошибки",
diff --git a/src/i18n/locales/tr/embeddings.json b/src/i18n/locales/tr/embeddings.json
index 10ad965f0f..0ab80ae841 100644
--- a/src/i18n/locales/tr/embeddings.json
+++ b/src/i18n/locales/tr/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Gömülmeler oluşturulamadı: Kimlik doğrulama başarısız oldu. Lütfen API anahtarınızı kontrol edin.",
 	"failedWithStatus": "{{attempts}} denemeden sonra gömülmeler oluşturulamadı: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "{{attempts}} denemeden sonra gömülmeler oluşturulamadı: {{errorMessage}}",
+	"insufficientQuota": "Gömülmeler oluşturulamadı: Yetersiz kota. Lütfen OpenAI hesap bakiyenizi kontrol edin ve devam etmek için kredi ekleyin.",
 	"failedMaxAttempts": "{{attempts}} denemeden sonra gömülmeler oluşturulamadı",
 	"textExceedsTokenLimit": "{{index}} dizinindeki metin maksimum jeton sınırını aşıyor ({{itemTokens}} > {{maxTokens}}). Atlanıyor.",
+	"textWithPrefixExceedsTokenLimit": "{{index}} dizinindeki önekli metin maksimum jeton sınırını aşıyor ({{estimatedTokens}} > {{maxTokens}}). Önek eklenmiyor.",
 	"rateLimitRetry": "Hız sınırına ulaşıldı, {{delayMs}}ms içinde yeniden deneniyor (deneme {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Hata gövdesi okunamadı",
diff --git a/src/i18n/locales/vi/embeddings.json b/src/i18n/locales/vi/embeddings.json
index a533aaac07..c10a44e6c9 100644
--- a/src/i18n/locales/vi/embeddings.json
+++ b/src/i18n/locales/vi/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "Không thể tạo nhúng: Xác thực không thành công. Vui lòng kiểm tra khóa API của bạn.",
 	"failedWithStatus": "Không thể tạo nhúng sau {{attempts}} lần thử: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Không thể tạo nhúng sau {{attempts}} lần thử: {{errorMessage}}",
+	"insufficientQuota": "Không tạo được embedding: Không đủ hạn ngạch. Vui lòng kiểm tra số dư tài khoản OpenAI của bạn và nạp thêm tín dụng để tiếp tục.",
 	"failedMaxAttempts": "Không thể tạo nhúng sau {{attempts}} lần thử",
 	"textExceedsTokenLimit": "Văn bản tại chỉ mục {{index}} vượt quá giới hạn mã thông báo tối đa ({{itemTokens}} > {{maxTokens}}). Bỏ qua.",
+	"textWithPrefixExceedsTokenLimit": "Văn bản tại chỉ mục {{index}} có tiền tố vượt quá giới hạn mã thông báo tối đa ({{estimatedTokens}} > {{maxTokens}}). Không thêm tiền tố.",
 	"rateLimitRetry": "Đã đạt đến giới hạn tốc độ, thử lại sau {{delayMs}}ms (lần thử {{attempt}}/{{maxRetries}})",
 	"ollama": {
 		"couldNotReadErrorBody": "Không thể đọc nội dung lỗi",
diff --git a/src/i18n/locales/zh-CN/embeddings.json b/src/i18n/locales/zh-CN/embeddings.json
index dba5282844..5db7512419 100644
--- a/src/i18n/locales/zh-CN/embeddings.json
+++ b/src/i18n/locales/zh-CN/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "创建嵌入失败：身份验证失败。请检查您的 API 密钥。",
 	"failedWithStatus": "尝试 {{attempts}} 次后创建嵌入失败：HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "尝试 {{attempts}} 次后创建嵌入失败：{{errorMessage}}",
+	"insufficientQuota": "创建嵌入失败：配额不足。请检查您的 OpenAI 帐户余额並添加积分以继续。",
 	"failedMaxAttempts": "尝试 {{attempts}} 次后创建嵌入失败",
 	"textExceedsTokenLimit": "索引 {{index}} 处的文本超过最大令牌限制 ({{itemTokens}} > {{maxTokens}})。正在跳过。",
+	"textWithPrefixExceedsTokenLimit": "索引 {{index}} 处带前缀的文本超过了最大令牌限制 ({{estimatedTokens}} > {{maxTokens}})。不添加前缀。",
 	"rateLimitRetry": "已达到速率限制，将在 {{delayMs}} 毫秒后重试（尝试次数 {{attempt}}/{{maxRetries}}）",
 	"ollama": {
 		"couldNotReadErrorBody": "无法读取错误内容",
diff --git a/src/i18n/locales/zh-TW/embeddings.json b/src/i18n/locales/zh-TW/embeddings.json
index 71a5a482f2..a31b61848a 100644
--- a/src/i18n/locales/zh-TW/embeddings.json
+++ b/src/i18n/locales/zh-TW/embeddings.json
@@ -3,8 +3,10 @@
 	"authenticationFailed": "建立內嵌失敗：驗證失敗。請檢查您的 API 金鑰。",
 	"failedWithStatus": "嘗試 {{attempts}} 次後建立內嵌失敗：HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "嘗試 {{attempts}} 次後建立內嵌失敗：{{errorMessage}}",
+	"insufficientQuota": "建立嵌入失敗：額度不足。請檢查您的 OpenAI 帳戶餘額並新增點數以繼續。",
 	"failedMaxAttempts": "嘗試 {{attempts}} 次後建立內嵌失敗",
 	"textExceedsTokenLimit": "索引 {{index}} 處的文字超過最大權杖限制 ({{itemTokens}} > {{maxTokens}})。正在略過。",
+	"textWithPrefixExceedsTokenLimit": "索引 {{index}} 處帶前綴的文本超過了最大令牌限制 ({{estimatedTokens}} > {{maxTokens}})。不添加前綴。",
 	"rateLimitRetry": "已達到速率限制，將在 {{delayMs}} 毫秒後重試（嘗試次數 {{attempt}}/{{maxRetries}}）",
 	"ollama": {
 		"couldNotReadErrorBody": "無法讀取錯誤內容",
diff --git a/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts b/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts
index 107f3af24d..a591fca74f 100644
--- a/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts
+++ b/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts
@@ -21,6 +21,8 @@ vitest.mock("../../../../i18n", () => ({
 			"embeddings:textExceedsTokenLimit": `Text at index ${params?.index} exceeds maximum token limit (${params?.itemTokens} > ${params?.maxTokens}). Skipping.`,
 			"embeddings:rateLimitRetry": `Rate limit hit, retrying in ${params?.delayMs}ms (attempt ${params?.attempt}/${params?.maxRetries})`,
 			"embeddings:unknownError": "Unknown error",
+			"embeddings:insufficientQuota":
+				"Failed to create embeddings: Insufficient quota. Please check your OpenAI account balance and add credits to continue.",
 		}
 		return translations[key] || key
 	},
@@ -405,6 +407,48 @@ describe("OpenAICompatibleEmbedder", () => {
 				expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
 			})
 
+			it("should not retry on insufficient quota errors", async () => {
+				const testTexts = ["Hello world"]
+				const quotaError = new Error(
+					"You exceeded your current quota, please check your plan and billing details.",
+				)
+				;(quotaError as any).status = 429
+
+				mockEmbeddingsCreate.mockRejectedValue(quotaError)
+
+				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
+					"Failed to create embeddings: Insufficient quota. Please check your OpenAI account balance and add credits to continue.",
+				)
+
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
+				expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
+			})
+
+			it("should retry on regular rate limit errors", async () => {
+				const testTexts = ["Hello world"]
+				const rateLimitError = new Error("Rate limit exceeded")
+				;(rateLimitError as any).status = 429
+
+				// Create base64 encoded embedding for successful response
+				const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
+				const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
+
+				mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
+					data: [{ embedding: base64String }],
+					usage: { prompt_tokens: 10, total_tokens: 15 },
+				})
+
+				const resultPromise = embedder.createEmbeddings(testTexts)
+				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS)
+				const result = await resultPromise
+
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
+				expect(result).toEqual({
+					embeddings: [[0.25, 0.5, 0.75]],
+					usage: { promptTokens: 10, totalTokens: 15 },
+				})
+			})
+
 			it("should throw error immediately on non-retryable errors", async () => {
 				const testTexts = ["Hello world"]
 				const serverError = new Error("Internal server error")
diff --git a/src/services/code-index/embedders/__tests__/openai.spec.ts b/src/services/code-index/embedders/__tests__/openai.spec.ts
index c93c049844..b6283267e8 100644
--- a/src/services/code-index/embedders/__tests__/openai.spec.ts
+++ b/src/services/code-index/embedders/__tests__/openai.spec.ts
@@ -18,6 +18,8 @@ vitest.mock("../../../../i18n", () => ({
 			"embeddings:failedMaxAttempts": `Failed to create embeddings after ${params?.attempts} attempts`,
 			"embeddings:textExceedsTokenLimit": `Text at index ${params?.index} exceeds maximum token limit (${params?.itemTokens} > ${params?.maxTokens}). Skipping.`,
 			"embeddings:rateLimitRetry": `Rate limit hit, retrying in ${params?.delayMs}ms (attempt ${params?.attempt}/${params?.maxRetries})`,
+			"embeddings:insufficientQuota":
+				"Failed to create embeddings: Insufficient quota. Please check your OpenAI account balance and add credits to continue.",
 		}
 		return translations[key] || key
 	},
@@ -307,6 +309,44 @@ describe("OpenAiEmbedder", () => {
 				expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
 			})
 
+			it("should not retry on insufficient quota errors", async () => {
+				const testTexts = ["Hello world"]
+				const quotaError = new Error(
+					"You exceeded your current quota, please check your plan and billing details.",
+				)
+				;(quotaError as any).status = 429
+
+				mockEmbeddingsCreate.mockRejectedValue(quotaError)
+
+				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
+					"Failed to create embeddings: Insufficient quota. Please check your OpenAI account balance and add credits to continue.",
+				)
+
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
+				expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
+			})
+
+			it("should retry on regular rate limit errors", async () => {
+				const testTexts = ["Hello world"]
+				const rateLimitError = new Error("Rate limit exceeded")
+				;(rateLimitError as any).status = 429
+
+				mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: { prompt_tokens: 10, total_tokens: 15 },
+				})
+
+				const resultPromise = embedder.createEmbeddings(testTexts)
+				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS)
+				const result = await resultPromise
+
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
+				expect(result).toEqual({
+					embeddings: [[0.1, 0.2, 0.3]],
+					usage: { promptTokens: 10, totalTokens: 15 },
+				})
+			})
+
 			it("should throw error immediately on non-retryable errors", async () => {
 				const testTexts = ["Hello world"]
 				const serverError = new Error("Internal server error")
diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts
index 88eced8a0a..68afcacaf1 100644
--- a/src/services/code-index/embedders/openai-compatible.ts
+++ b/src/services/code-index/embedders/openai-compatible.ts
@@ -276,7 +276,10 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 				const isRateLimitError = httpError?.status === 429
 				const hasMoreAttempts = attempts < MAX_RETRIES - 1
 
-				if (isRateLimitError && hasMoreAttempts) {
+				// Add quota detection
+				const isQuotaError = this.isInsufficientQuotaError(error)
+
+				if (isRateLimitError && !isQuotaError && hasMoreAttempts) {
 					const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
 					console.warn(
 						t("embeddings:rateLimitRetry", {
@@ -287,6 +290,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 					)
 					await new Promise((resolve) => setTimeout(resolve, delayMs))
 					continue
+				} else if (isQuotaError) {
+					// Throw specific quota error immediately
+					throw new Error(t("embeddings:insufficientQuota"))
 				}
 
 				// Log the error for debugging
@@ -331,4 +337,31 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 			name: "openai-compatible",
 		}
 	}
+
+	/**
+	 * Detects if an error is due to insufficient quota/credits
+	 * @param error The error object to check
+	 * @returns True if the error indicates insufficient quota
+	 */
+	private isInsufficientQuotaError(error: any): boolean {
+		if (error?.status !== 429) return false
+
+		const errorMessage =
+			error?.message?.toLowerCase() ||
+			error?.response?.data?.error?.message?.toLowerCase() ||
+			error?.error?.message?.toLowerCase() ||
+			""
+
+		const quotaKeywords = [
+			"insufficient_quota",
+			"insufficient quota",
+			"quota exceeded",
+			"insufficient funds",
+			"billing",
+			"payment required",
+			"credits",
+		]
+
+		return quotaKeywords.some((keyword) => errorMessage.includes(keyword))
+	}
 }
diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts
index 667c2f46d4..79010ca0b1 100644
--- a/src/services/code-index/embedders/openai.ts
+++ b/src/services/code-index/embedders/openai.ts
@@ -141,7 +141,10 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 				const isRateLimitError = error?.status === 429
 				const hasMoreAttempts = attempts < MAX_RETRIES - 1
 
-				if (isRateLimitError && hasMoreAttempts) {
+				// Add quota detection
+				const isQuotaError = this.isInsufficientQuotaError(error)
+
+				if (isRateLimitError && !isQuotaError && hasMoreAttempts) {
 					const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
 					console.warn(
 						t("embeddings:rateLimitRetry", {
@@ -152,6 +155,9 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 					)
 					await new Promise((resolve) => setTimeout(resolve, delayMs))
 					continue
+				} else if (isQuotaError) {
+					// Throw specific quota error immediately
+					throw new Error(t("embeddings:insufficientQuota"))
 				}
 
 				// Log the error for debugging
@@ -193,4 +199,31 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 			name: "openai",
 		}
 	}
+
+	/**
+	 * Detects if an error is due to insufficient quota/credits
+	 * @param error The error object to check
+	 * @returns True if the error indicates insufficient quota
+	 */
+	private isInsufficientQuotaError(error: any): boolean {
+		if (error?.status !== 429) return false
+
+		const errorMessage =
+			error?.message?.toLowerCase() ||
+			error?.response?.data?.error?.message?.toLowerCase() ||
+			error?.error?.message?.toLowerCase() ||
+			""
+
+		const quotaKeywords = [
+			"insufficient_quota",
+			"insufficient quota",
+			"quota exceeded",
+			"insufficient funds",
+			"billing",
+			"payment required",
+			"credits",
+		]
+
+		return quotaKeywords.some((keyword) => errorMessage.includes(keyword))
+	}
 }

From 3905dcbd5d60f9ea93ab0d1b8c6a6618b0f59a96 Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Fri, 4 Jul 2025 17:32:42 -0500
Subject: [PATCH 2/2] fix: address review comments - extract shared quota
 detection utility and fix translation typos

---
 src/i18n/locales/vi/embeddings.json           |  2 +-
 src/i18n/locales/zh-CN/embeddings.json        |  2 +-
 src/i18n/locales/zh-TW/embeddings.json        |  4 +--
 .../code-index/embedders/openai-compatible.ts | 30 ++-----------------
 src/services/code-index/embedders/openai.ts   | 30 ++-----------------
 .../embedders/utils/quota-detection.ts        | 30 +++++++++++++++++++
 6 files changed, 38 insertions(+), 60 deletions(-)
 create mode 100644 src/services/code-index/embedders/utils/quota-detection.ts

diff --git a/src/i18n/locales/vi/embeddings.json b/src/i18n/locales/vi/embeddings.json
index c10a44e6c9..15fe3b418b 100644
--- a/src/i18n/locales/vi/embeddings.json
+++ b/src/i18n/locales/vi/embeddings.json
@@ -3,7 +3,7 @@
 	"authenticationFailed": "Không thể tạo nhúng: Xác thực không thành công. Vui lòng kiểm tra khóa API của bạn.",
 	"failedWithStatus": "Không thể tạo nhúng sau {{attempts}} lần thử: HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "Không thể tạo nhúng sau {{attempts}} lần thử: {{errorMessage}}",
-	"insufficientQuota": "Không tạo được embedding: Không đủ hạn ngạch. Vui lòng kiểm tra số dư tài khoản OpenAI của bạn và nạp thêm tín dụng để tiếp tục.",
+	"insufficientQuota": "Không tạo được nhúng: Không đủ hạn ngạch. Vui lòng kiểm tra số dư tài khoản OpenAI của bạn và nạp thêm tín dụng để tiếp tục.",
 	"failedMaxAttempts": "Không thể tạo nhúng sau {{attempts}} lần thử",
 	"textExceedsTokenLimit": "Văn bản tại chỉ mục {{index}} vượt quá giới hạn mã thông báo tối đa ({{itemTokens}} > {{maxTokens}}). Bỏ qua.",
 	"textWithPrefixExceedsTokenLimit": "Văn bản tại chỉ mục {{index}} có tiền tố vượt quá giới hạn mã thông báo tối đa ({{estimatedTokens}} > {{maxTokens}}). Không thêm tiền tố.",
diff --git a/src/i18n/locales/zh-CN/embeddings.json b/src/i18n/locales/zh-CN/embeddings.json
index 5db7512419..b682ff3411 100644
--- a/src/i18n/locales/zh-CN/embeddings.json
+++ b/src/i18n/locales/zh-CN/embeddings.json
@@ -3,7 +3,7 @@
 	"authenticationFailed": "创建嵌入失败：身份验证失败。请检查您的 API 密钥。",
 	"failedWithStatus": "尝试 {{attempts}} 次后创建嵌入失败：HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "尝试 {{attempts}} 次后创建嵌入失败：{{errorMessage}}",
-	"insufficientQuota": "创建嵌入失败：配额不足。请检查您的 OpenAI 帐户余额並添加积分以继续。",
+	"insufficientQuota": "创建嵌入失败：配额不足。请检查您的 OpenAI 帐户余额并添加积分以继续。",
 	"failedMaxAttempts": "尝试 {{attempts}} 次后创建嵌入失败",
 	"textExceedsTokenLimit": "索引 {{index}} 处的文本超过最大令牌限制 ({{itemTokens}} > {{maxTokens}})。正在跳过。",
 	"textWithPrefixExceedsTokenLimit": "索引 {{index}} 处带前缀的文本超过了最大令牌限制 ({{estimatedTokens}} > {{maxTokens}})。不添加前缀。",
diff --git a/src/i18n/locales/zh-TW/embeddings.json b/src/i18n/locales/zh-TW/embeddings.json
index a31b61848a..464f58b5c0 100644
--- a/src/i18n/locales/zh-TW/embeddings.json
+++ b/src/i18n/locales/zh-TW/embeddings.json
@@ -3,10 +3,10 @@
 	"authenticationFailed": "建立內嵌失敗：驗證失敗。請檢查您的 API 金鑰。",
 	"failedWithStatus": "嘗試 {{attempts}} 次後建立內嵌失敗：HTTP {{statusCode}} - {{errorMessage}}",
 	"failedWithError": "嘗試 {{attempts}} 次後建立內嵌失敗：{{errorMessage}}",
-	"insufficientQuota": "建立嵌入失敗：額度不足。請檢查您的 OpenAI 帳戶餘額並新增點數以繼續。",
+	"insufficientQuota": "建立內嵌失敗：額度不足。請檢查您的 OpenAI 帳戶餘額並新增點數以繼續。",
 	"failedMaxAttempts": "嘗試 {{attempts}} 次後建立內嵌失敗",
 	"textExceedsTokenLimit": "索引 {{index}} 處的文字超過最大權杖限制 ({{itemTokens}} > {{maxTokens}})。正在略過。",
-	"textWithPrefixExceedsTokenLimit": "索引 {{index}} 處帶前綴的文本超過了最大令牌限制 ({{estimatedTokens}} > {{maxTokens}})。不添加前綴。",
+	"textWithPrefixExceedsTokenLimit": "索引 {{index}} 處帶前綴的文本超過了最大權杖限制 ({{estimatedTokens}} > {{maxTokens}})。不添加前綴。",
 	"rateLimitRetry": "已達到速率限制，將在 {{delayMs}} 毫秒後重試（嘗試次數 {{attempt}}/{{maxRetries}}）",
 	"ollama": {
 		"couldNotReadErrorBody": "無法讀取錯誤內容",
diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts
index 68afcacaf1..622a83a805 100644
--- a/src/services/code-index/embedders/openai-compatible.ts
+++ b/src/services/code-index/embedders/openai-compatible.ts
@@ -8,6 +8,7 @@ import {
 } from "../constants"
 import { getDefaultModelId, getModelQueryPrefix } from "../../../shared/embeddingModels"
 import { t } from "../../../i18n"
+import { isInsufficientQuotaError } from "./utils/quota-detection"
 
 interface EmbeddingItem {
 	embedding: string | number[]
@@ -277,7 +278,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 				const hasMoreAttempts = attempts < MAX_RETRIES - 1
 
 				// Add quota detection
-				const isQuotaError = this.isInsufficientQuotaError(error)
+				const isQuotaError = isInsufficientQuotaError(error)
 
 				if (isRateLimitError && !isQuotaError && hasMoreAttempts) {
 					const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
@@ -337,31 +338,4 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 			name: "openai-compatible",
 		}
 	}
-
-	/**
-	 * Detects if an error is due to insufficient quota/credits
-	 * @param error The error object to check
-	 * @returns True if the error indicates insufficient quota
-	 */
-	private isInsufficientQuotaError(error: any): boolean {
-		if (error?.status !== 429) return false
-
-		const errorMessage =
-			error?.message?.toLowerCase() ||
-			error?.response?.data?.error?.message?.toLowerCase() ||
-			error?.error?.message?.toLowerCase() ||
-			""
-
-		const quotaKeywords = [
-			"insufficient_quota",
-			"insufficient quota",
-			"quota exceeded",
-			"insufficient funds",
-			"billing",
-			"payment required",
-			"credits",
-		]
-
-		return quotaKeywords.some((keyword) => errorMessage.includes(keyword))
-	}
 }
diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts
index 79010ca0b1..f2e14be6bf 100644
--- a/src/services/code-index/embedders/openai.ts
+++ b/src/services/code-index/embedders/openai.ts
@@ -8,6 +8,7 @@ import {
 	MAX_BATCH_RETRIES as MAX_RETRIES,
 	INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS,
 } from "../constants"
+import { isInsufficientQuotaError } from "./utils/quota-detection"
 import { getModelQueryPrefix } from "../../../shared/embeddingModels"
 import { t } from "../../../i18n"
 
@@ -142,7 +143,7 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 				const hasMoreAttempts = attempts < MAX_RETRIES - 1
 
 				// Add quota detection
-				const isQuotaError = this.isInsufficientQuotaError(error)
+				const isQuotaError = isInsufficientQuotaError(error)
 
 				if (isRateLimitError && !isQuotaError && hasMoreAttempts) {
 					const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
@@ -199,31 +200,4 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 			name: "openai",
 		}
 	}
-
-	/**
-	 * Detects if an error is due to insufficient quota/credits
-	 * @param error The error object to check
-	 * @returns True if the error indicates insufficient quota
-	 */
-	private isInsufficientQuotaError(error: any): boolean {
-		if (error?.status !== 429) return false
-
-		const errorMessage =
-			error?.message?.toLowerCase() ||
-			error?.response?.data?.error?.message?.toLowerCase() ||
-			error?.error?.message?.toLowerCase() ||
-			""
-
-		const quotaKeywords = [
-			"insufficient_quota",
-			"insufficient quota",
-			"quota exceeded",
-			"insufficient funds",
-			"billing",
-			"payment required",
-			"credits",
-		]
-
-		return quotaKeywords.some((keyword) => errorMessage.includes(keyword))
-	}
 }
diff --git a/src/services/code-index/embedders/utils/quota-detection.ts b/src/services/code-index/embedders/utils/quota-detection.ts
new file mode 100644
index 0000000000..28dc3aa0cb
--- /dev/null
+++ b/src/services/code-index/embedders/utils/quota-detection.ts
@@ -0,0 +1,30 @@
+/**
+ * Utility functions for detecting quota-related errors from OpenAI API
+ */
+
+/**
+ * Detects if an error is due to insufficient quota/credits
+ * @param error The error object to check
+ * @returns True if the error indicates insufficient quota
+ */
+export function isInsufficientQuotaError(error: any): boolean {
+	if (error?.status !== 429) return false
+
+	const errorMessage =
+		error?.message?.toLowerCase() ||
+		error?.response?.data?.error?.message?.toLowerCase() ||
+		error?.error?.message?.toLowerCase() ||
+		""
+
+	const quotaKeywords = [
+		"insufficient_quota",
+		"insufficient quota",
+		"quota exceeded",
+		"insufficient funds",
+		"billing",
+		"payment required",
+		"credits",
+	]
+
+	return quotaKeywords.some((keyword) => errorMessage.includes(keyword))
+}