gitcommitshow
diff --git a/‎RateLimitManager.js‎
Lines changed: 44 additions & 8 deletions b/‎RateLimitManager.js‎
Lines changed: 44 additions & 8 deletions
diff --git a/‎ResilientLLM.js‎
Lines changed: 26 additions & 15 deletions b/‎ResilientLLM.js‎
Lines changed: 26 additions & 15 deletions
@@ -1,6 +1,9 @@
 import TokenBucket from './TokenBucket.js';
+import { sleep } from './Utility.js';
 
 class RateLimitManager {
+    static #instances = new Map(); // bucketId -> instance
+    
     /**
      * @param {Object} config
      * @param {number} config.requestsPerMinute - Max requests per minute
@@ -12,15 +15,52 @@ class RateLimitManager {
         // llmTokenBucket: limits number of LLM text tokens per minute
         this.llmTokenBucket = new TokenBucket(llmTokensPerMinute, llmTokensPerMinute / 60); // refill per second
     }
+    
+    /**
+     * Get or create a rate limit manager instance for the given bucketId
+     * @param {string} bucketId - The service identifier
+     * @param {Object} config - Rate limit configuration
+     * @returns {RateLimitManager} - The rate limit manager instance
+     */
+    static getInstance(bucketId, config) {
+        if (!this.#instances.has(bucketId)) {
+            this.#instances.set(bucketId, new RateLimitManager(config));
+        }
+        return this.#instances.get(bucketId);
+    }
+    
+    /**
+     * Clear a rate limit manager instance for the given bucketId
+     * @param {string} bucketId - The service identifier
+     */
+    static clear(bucketId) {
+        this.#instances.delete(bucketId);
+    }
 
     /**
      * Attempt to acquire a request slot and the required number of LLM tokens.
      * Waits until both are available.
      * @param {number} llmTokenCount
      */
-    async acquire(llmTokenCount = 1) {
-        while (!(this.requestBucket.tryRemoveToken() && this.llmTokenBucket.tryRemoveToken(llmTokenCount))) {
-            await this._sleep(100);
+    async acquire(llmTokenCount = 1, abortSignal) {
+        // Check abort signal before entering loop
+        if (abortSignal?.aborted) {
+            const error = new Error(abortSignal.reason || 'Operation was aborted');
+            error.name = 'AbortError';
+            throw error;
+        }
+        
+        console.log('Awaiting rate limit...');
+        while (!abortSignal?.aborted && !(this.requestBucket.tryRemoveToken() && this.llmTokenBucket.tryRemoveToken(llmTokenCount))) {
+            await sleep(100, abortSignal);
+        }
+        console.log('Wait for rate limit complete...');
+        
+        // Final check after loop - if aborted during sleep, throw error
+        if (abortSignal?.aborted) {
+            const error = new Error(abortSignal.reason || 'Operation was aborted');
+            error.name = 'AbortError';
+            throw error;
         }
     }
 
@@ -55,11 +95,7 @@ class RateLimitManager {
         if (info.llmTokensPerMinute) {
             this.llmTokenBucket.update({ capacity: info.llmTokensPerMinute, refillRate: info.llmTokensPerMinute / 60 });
         }
-    }
-
-    _sleep(ms) {
-        return new Promise(resolve => setTimeout(resolve, ms));
-    }
+    }  
 }
 
 export default RateLimitManager; 
@@ -29,16 +29,10 @@ class ResilientLLM {
         this.topP = options?.topP || process.env.AI_TOP_P || 0.95;
         // Add rate limit config options if provided
         this.rateLimitConfig = options?.rateLimitConfig || { requestsPerMinute: 10, llmTokensPerMinute: 150000 };
-        // Instantiate ResilientOperation for LLM calls
-        this.resilientOperation = new ResilientOperation({
-            bucketId: this.aiService,
-            rateLimitConfig: this.rateLimitConfig,
-            retries: options?.retries || 3,
-            timeout: this.timeout,
-            backoffFactor: options?.backoffFactor || 2,
-            onRateLimitUpdate: options?.onRateLimitUpdate,
-            cacheStore: this.cacheStore
-        });
+        this.retries = options?.retries || 3;
+        this.backoffFactor = options?.backoffFactor || 2;
+        this.onRateLimitUpdate = options?.onRateLimitUpdate;
+        this._abortController = null;
     }
 
     getApiUrl(aiService) {
@@ -159,11 +153,24 @@ class ResilientLLM {
             throw new Error('Invalid provider specified. Use "anthropic" or "openai" or "gemini" or "ollama".');
         }
         try{
+            // Instantiate ResilientOperation for LLM calls
+            this.resilientOperation = new ResilientOperation({
+                bucketId: this.aiService,
+                rateLimitConfig: this.rateLimitConfig,
+                retries: this.retries,
+                timeout: this.timeout,
+                backoffFactor: this.backoffFactor,
+                onRateLimitUpdate: this.onRateLimitUpdate,
+                cacheStore: this.cacheStore
+            });
+            // Use single instance of abort controller for all operations
+            this._abortController = this._abortController || new AbortController();
             // Wrap the LLM API call in ResilientOperation for rate limiting, retries, etc.
             const { data, statusCode } = await this.resilientOperation
                 .withTokens(estimatedLLMTokens)
                 .withCache()
-                .execute(this._makeHttpRequest, apiUrl, requestBody, headers);
+                .withAbortControl(this._abortController)
+                .execute(this._makeHttpRequest, apiUrl, requestBody, headers, this._abortController.signal);
             /**
              * OpenAI chat completion response
              * {
@@ -256,6 +263,8 @@ class ResilientLLM {
      * @returns {Promise<{data: any, statusCode: number}>}
      */
     async _makeHttpRequest(apiUrl, requestBody, headers, abortSignal) {
+        console.log("Making HTTP request to:", apiUrl);
+        console.log("You may cancel it by calling abort() method on the ResilientLLM instance");
         const startTime = Date.now();
 
         try {
@@ -291,7 +300,8 @@ class ResilientLLM {
 
     /**
      * Parse errors from various LLM APIs to create uniform error communication
-     * @param {*} error 
+     * @param {number|null} statusCode - HTTP status code or null for general errors
+     * @param {Error|Object|null} error - Error object
      * @reference https://platform.openai.com/docs/guides/error-codes/api-error-codes
      * @reference https://docs.anthropic.com/en/api/errors
      */
@@ -305,8 +315,6 @@ class ResilientLLM {
                 throw new Error(error?.message || "Invalid API Key");
             case 403:
                 throw new Error(error?.message || "You are not authorized to access this resource");
-            case 400:
-                throw new Error(error?.message || "Bad request");
             case 429:
                 throw new Error(error?.message || "Rate limit exceeded");
             case 404:
@@ -380,7 +388,10 @@ class ResilientLLM {
         return data?.choices?.[0]?.message?.content;
     }
 
-
+    abort(){
+        this._abortController?.abort();
+        this._abortController = null;
+    }
 
     /**
      * Estimate the number of tokens in a text