diff --git a/assets/featureFlag/alpha.json b/assets/featureFlag/alpha.json index 70db0bd0..e61838ed 100644 --- a/assets/featureFlag/alpha.json +++ b/assets/featureFlag/alpha.json @@ -46,4 +46,4 @@ ] } } -} +} \ No newline at end of file diff --git a/assets/featureFlag/beta.json b/assets/featureFlag/beta.json index 37a1b7a3..75397205 100644 --- a/assets/featureFlag/beta.json +++ b/assets/featureFlag/beta.json @@ -45,4 +45,4 @@ ] } } -} +} \ No newline at end of file diff --git a/assets/featureFlag/prod.json b/assets/featureFlag/prod.json index 0c467924..81c3f603 100644 --- a/assets/featureFlag/prod.json +++ b/assets/featureFlag/prod.json @@ -41,4 +41,4 @@ ] } } -} +} \ No newline at end of file diff --git a/src/stacks/actions/StackActionOperations.ts b/src/stacks/actions/StackActionOperations.ts index 2fff2fd6..327dc20d 100644 --- a/src/stacks/actions/StackActionOperations.ts +++ b/src/stacks/actions/StackActionOperations.ts @@ -274,6 +274,7 @@ export async function cleanupReviewStack( maxRetries: 3, initialDelayMs: 1000, operationName: `Delete stack ${workflow.stackName}`, + totalTimeoutMs: 30_000, }, logger, ); @@ -298,6 +299,7 @@ export async function deleteChangeSet( maxRetries: 3, initialDelayMs: 1000, operationName: `Delete change set ${workflow.changeSetName}`, + totalTimeoutMs: 30_000, }, logger, ); diff --git a/src/utils/Retry.ts b/src/utils/Retry.ts index 7ad43ef7..211eee61 100644 --- a/src/utils/Retry.ts +++ b/src/utils/Retry.ts @@ -1,4 +1,5 @@ import { Logger } from 'pino'; +import { extractErrorMessage } from './Errors'; export type RetryOptions = { maxRetries?: number; @@ -6,8 +7,8 @@ export type RetryOptions = { maxDelayMs?: number; backoffMultiplier?: number; jitterFactor?: number; - operationName?: string; - totalTimeoutMs?: number; + operationName: string; + totalTimeoutMs: number; }; function sleep(ms: number): Promise { @@ -16,65 +17,79 @@ function sleep(ms: number): Promise { }); } -function checkTotalTimeout(startTime: number, totalTimeoutMs: number | undefined, operationName: string): void { - if (totalTimeoutMs !== undefined && Date.now() - startTime > totalTimeoutMs) { - throw new Error(`${operationName} timed out after ${totalTimeoutMs}ms`); - } -} - -function calculateNextDelay( - currentDelay: number, +function calculateDelay( + attempt: number, + initialDelayMs: number, jitterFactor: number, backoffMultiplier: number, maxDelayMs: number, ): number { - const nextDelay = currentDelay * backoffMultiplier; - const jitter = jitterFactor > 0 ? Math.random() * jitterFactor * nextDelay : 0; - return Math.min(nextDelay + jitter, maxDelayMs); + // 1. Exponential Backoff: initial * 2^0, initial * 2^1, etc. + const exponentialDelay = initialDelayMs * Math.pow(backoffMultiplier, attempt); + + // 2. Cap at Max Delay + const cappedDelay = Math.min(exponentialDelay, maxDelayMs); + + // 3. Add Jitter (randomized percentage of the current delay), capped at maxDelayMs + const jitter = jitterFactor > 0 ? Math.random() * jitterFactor * cappedDelay : 0; + + return Math.min(cappedDelay + jitter, maxDelayMs); } -/** - * Retry a function with exponential backoff - */ export async function retryWithExponentialBackoff( fn: () => Promise, options: RetryOptions, log: Logger, + sleepFn: (ms: number) => Promise = (ms: number) => { + return sleep(ms); + }, ): Promise { - const maxRetries = options.maxRetries ?? 3; - const initialDelayMs = options.initialDelayMs ?? 1000; - const maxDelayMs = options.maxDelayMs ?? 30_000; - const backoffMultiplier = options.backoffMultiplier ?? 2; - const jitterFactor = options.jitterFactor ?? 0; - const operationName = options.operationName ?? 'Operation'; - const totalTimeoutMs = options.totalTimeoutMs; + const { + maxRetries = 3, + initialDelayMs = 1000, + maxDelayMs = 5000, + backoffMultiplier = 2, + jitterFactor = 0.1, + operationName, + totalTimeoutMs, + } = options; - let lastError: Error = new Error('No attempts made'); - let currentDelay = initialDelayMs; - const startTime = Date.now(); + if (backoffMultiplier < 1) { + throw new Error('Backoff multiplier must be greater than or equal to 1'); + } + + if (totalTimeoutMs <= 0) { + throw new Error('Total timeout must be greater than 0'); + } - for (let attempt = 0; attempt <= maxRetries; attempt++) { - checkTotalTimeout(startTime, totalTimeoutMs, operationName); + let lastError: Error | undefined = undefined; + const startTime = performance.now(); + + // If maxRetries is 3, we run: 0 (initial), 1, 2, 3. Total 4 attempts. + const attempts = maxRetries + 1; + for (let attemptIdx = 0; attemptIdx < attempts; attemptIdx++) { + if (attemptIdx > 0 && performance.now() - startTime >= totalTimeoutMs) { + const message = `${operationName} timed out after ${performance.now() - startTime}ms, on attempt #${attemptIdx + 1}/${attempts}`; + const errorMsg = lastError ? `${message}. Last error: ${lastError.message}` : message; + throw new Error(errorMsg); + } try { return await fn(); } catch (error) { - lastError = error instanceof Error ? error : new Error(String(error)); - - if (attempt === maxRetries) { - throw new Error( - `${operationName} failed after ${maxRetries + 1} attempts. Last error: ${lastError.message}`, - ); + lastError = error instanceof Error ? error : new Error(extractErrorMessage(error)); + if (attemptIdx === attempts - 1) { + throw new Error(`${operationName} failed after ${attempts} attempts. Last error: ${lastError.message}`); } + const delay = calculateDelay(attemptIdx, initialDelayMs, jitterFactor, backoffMultiplier, maxDelayMs); log.warn( - `${operationName} attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${currentDelay}ms...`, + `${operationName} attempt ${attemptIdx + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`, ); - await sleep(currentDelay); - currentDelay = calculateNextDelay(currentDelay, jitterFactor, backoffMultiplier, maxDelayMs); + await sleepFn(delay); } } - throw lastError; + throw new Error('Something went wrong, this is not reachable'); } diff --git a/tst/unit/services/cfnLint/PyodideWorkerManager.test.ts b/tst/unit/services/cfnLint/PyodideWorkerManager.test.ts index d7f89157..b37663c5 100644 --- a/tst/unit/services/cfnLint/PyodideWorkerManager.test.ts +++ b/tst/unit/services/cfnLint/PyodideWorkerManager.test.ts @@ -645,35 +645,6 @@ describe('PyodideWorkerManager', () => { }); describe('retry logic', () => { - test('should not retry when maxRetries is 0', async () => { - // Create a worker manager with retries disabled - const noRetryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 0, - initialDelayMs: 10, - maxDelayMs: 100, - backoffMultiplier: 2, - totalTimeoutMs: 5000, - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - // Mock worker creation to fail - workerConstructor.mockImplementation(() => { - throw new Error('Worker creation failed'); - }); - - // Expect initialization to fail immediately without retries - await expect(noRetryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization failed after 1 attempts. Last error: Worker creation failed', - ); - - // Verify no retry attempts were logged - expect(mockLogging.warn.callCount).toBe(0); // No retry warnings - }); - test('should retry initialization on failure and eventually succeed', async () => { // Create a worker manager with retry enabled const retryWorkerManager = new PyodideWorkerManager( @@ -954,9 +925,7 @@ describe('PyodideWorkerManager', () => { }); // Expect initialization to fail with timeout error - await expect(retryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization timed out after 300ms', - ); + await expect(retryWorkerManager.initialize()).rejects.toThrow(/Pyodide initialization timed out after 3/); const totalTime = Date.now() - startTime; @@ -968,162 +937,6 @@ describe('PyodideWorkerManager', () => { expect(attemptCount).toBeGreaterThan(1); expect(attemptCount).toBeLessThan(11); // Should timeout before reaching max retries }); - - test('should work without totalTimeoutMs (backward compatibility)', async () => { - // Create a worker manager without totalTimeoutMs (uses default from DefaultSettings) - const retryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 2, - initialDelayMs: 10, - maxDelayMs: 50, - backoffMultiplier: 2, - totalTimeoutMs: 120_000, // Uses default value from DefaultSettings - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - let attemptCount = 0; - - workerConstructor.mockImplementation(() => { - attemptCount++; - throw new Error('Worker creation failed'); - }); - - // Should fail after max retries, not timeout (since timeout is very large) - await expect(retryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization failed after 3 attempts. Last error: Worker creation failed', - ); - - // Should have made all retry attempts (initial + 2 retries = 3 total) - expect(attemptCount).toBe(3); - }); - - test('should prefer explicit totalTimeoutMs over calculated timeout', async () => { - // Create a worker manager where explicit totalTimeoutMs is much shorter than calculated timeout - const retryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 5, // Would normally allow 6 attempts - initialDelayMs: 100, - maxDelayMs: 1000, // Calculated timeout would be 1000 * 6 = 6000ms - backoffMultiplier: 2, - totalTimeoutMs: 200, // But explicit timeout is only 200ms - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - const startTime = Date.now(); - let attemptCount = 0; - - workerConstructor.mockImplementation(() => { - attemptCount++; - throw new Error('Worker creation failed'); - }); - - // Should timeout quickly due to explicit totalTimeoutMs - await expect(retryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization timed out after 200ms', - ); - - const totalTime = Date.now() - startTime; - - // Should respect the explicit 200ms timeout, not the calculated 6000ms - // Allow more variance for CI environments - expect(totalTime).toBeGreaterThanOrEqual(180); - expect(totalTime).toBeLessThan(350); - - // Should have made fewer attempts due to quick timeout - expect(attemptCount).toBeLessThan(6); // Should not reach max retries - }); - - test('should handle zero totalTimeoutMs (immediate timeout)', async () => { - // Create a worker manager with zero totalTimeoutMs - const retryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 3, - initialDelayMs: 100, - maxDelayMs: 500, - backoffMultiplier: 2, - totalTimeoutMs: 0, // Immediate timeout - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - let attemptCount = 0; - - workerConstructor.mockImplementation(() => { - attemptCount++; - throw new Error('Worker creation failed'); - }); - - // Should timeout immediately after first attempt - await expect(retryWorkerManager.initialize()).rejects.toThrow('Pyodide initialization timed out after 0ms'); - - // Should have made at least one attempt before timing out - expect(attemptCount).toBe(1); - }); - - test('should handle very large totalTimeoutMs', async () => { - // Create a worker manager with very large totalTimeoutMs - const retryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 2, - initialDelayMs: 10, - maxDelayMs: 50, - backoffMultiplier: 2, - totalTimeoutMs: 10000, // Very large timeout (10 seconds) - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - let attemptCount = 0; - - workerConstructor.mockImplementation(() => { - attemptCount++; - throw new Error('Worker creation failed'); - }); - - // Should fail due to max retries, not timeout - await expect(retryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization failed after 3 attempts. Last error: Worker creation failed', - ); - - // Should have made all retry attempts - expect(attemptCount).toBe(3); - }); - - test('should use default operation name in error messages', async () => { - // Create a worker manager without specifying operation name - const retryWorkerManager = new PyodideWorkerManager( - { - maxRetries: 1, - initialDelayMs: 10, - maxDelayMs: 100, - backoffMultiplier: 2, - totalTimeoutMs: 5000, - }, - createDefaultCfnLintSettings(), - - mockLogging, - ); - - // Always fail worker creation - workerConstructor.mockImplementation(() => { - throw new Error('Worker creation failed'); - }); - - // Expect initialization to fail with default operation name - await expect(retryWorkerManager.initialize()).rejects.toThrow( - 'Pyodide initialization failed after 2 attempts. Last error: Worker creation failed', - ); - }); }); describe('race conditions and edge cases', () => { diff --git a/tst/unit/stackActions/StackActionWorkflowOperations.test.ts b/tst/unit/stackActions/StackActionWorkflowOperations.test.ts index a69c06da..c8479eab 100644 --- a/tst/unit/stackActions/StackActionWorkflowOperations.test.ts +++ b/tst/unit/stackActions/StackActionWorkflowOperations.test.ts @@ -265,6 +265,7 @@ describe('StackActionWorkflowOperations', () => { maxRetries: 3, initialDelayMs: 1000, operationName: 'Delete stack test-stack', + totalTimeoutMs: 30000, }, expect.any(Object), // logger ); @@ -293,6 +294,7 @@ describe('StackActionWorkflowOperations', () => { maxRetries: 3, initialDelayMs: 1000, operationName: 'Delete change set changeset-123', + totalTimeoutMs: 30000, }, expect.any(Object), // logger ); diff --git a/tst/unit/utils/Retry.test.ts b/tst/unit/utils/Retry.test.ts index 62b4c20f..8931a7c9 100644 --- a/tst/unit/utils/Retry.test.ts +++ b/tst/unit/utils/Retry.test.ts @@ -1,26 +1,35 @@ -/* eslint-disable vitest/valid-expect */ +import { Logger } from 'pino'; +import * as sinon from 'sinon'; import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { retryWithExponentialBackoff, RetryOptions } from '../../../src/utils/Retry'; +import { RetryOptions, retryWithExponentialBackoff } from '../../../src/utils/Retry'; describe('retryWithExponentialBackoff', () => { - const mockLog = { - warn: vi.fn(), - } as any; + const options: RetryOptions = { + maxRetries: 2, + initialDelayMs: 2, + maxDelayMs: 10, + backoffMultiplier: 1.5, + jitterFactor: 0.5, + operationName: 'SomeOperation', + totalTimeoutMs: 250, + }; + + const mockLog = {} as unknown as Logger; + const sleepFn = sinon.stub().resolves(); beforeEach(() => { - vi.useFakeTimers(); - vi.clearAllMocks(); + sleepFn.resetHistory(); + mockLog.warn = vi.fn(); }); afterEach(() => { - vi.useRealTimers(); + sinon.restore(); }); it('should succeed on first attempt', async () => { const mockFn = vi.fn().mockResolvedValue('success'); - const options: RetryOptions = { maxRetries: 3 }; - const result = await retryWithExponentialBackoff(mockFn, options, mockLog); + const result = await retryWithExponentialBackoff(mockFn, options, mockLog, sleepFn); expect(result).toBe('success'); expect(mockFn).toHaveBeenCalledTimes(1); @@ -33,18 +42,7 @@ describe('retryWithExponentialBackoff', () => { .mockRejectedValueOnce(new Error('Second failure')) .mockResolvedValue('success'); - const options: RetryOptions = { maxRetries: 3, initialDelayMs: 100 }; - - const promise = retryWithExponentialBackoff(mockFn, options, mockLog); - - // Let the first attempt fail and start the first delay - await vi.runOnlyPendingTimersAsync(); - // Let the second attempt fail and start the second delay - await vi.runOnlyPendingTimersAsync(); - // Let the third attempt succeed - await vi.runOnlyPendingTimersAsync(); - - const result = await promise; + const result = await retryWithExponentialBackoff(mockFn, options, mockLog, sleepFn); expect(result).toBe('success'); expect(mockFn).toHaveBeenCalledTimes(3); @@ -52,77 +50,155 @@ describe('retryWithExponentialBackoff', () => { it('should fail after max retries exceeded', async () => { const mockFn = vi.fn().mockRejectedValue(new Error('Persistent failure')); - const options: RetryOptions = { maxRetries: 2, initialDelayMs: 100 }; - - const promise = expect(retryWithExponentialBackoff(mockFn, options, mockLog)).rejects.toThrow( - 'Operation failed after 3 attempts', - ); - await vi.advanceTimersByTimeAsync(300); - await promise; + const promise = retryWithExponentialBackoff(mockFn, options, mockLog); + await expect(promise).rejects.toThrow('Persistent failure'); expect(mockFn).toHaveBeenCalledTimes(3); }); it('should respect total timeout', async () => { const mockFn = vi.fn().mockRejectedValue(new Error('Failure')); - const options: RetryOptions = { - maxRetries: 10, - initialDelayMs: 1000, - totalTimeoutMs: 2000, - operationName: 'TestOperation', - }; - - const startTime = 1000; - vi.spyOn(Date, 'now') - .mockReturnValueOnce(startTime) // Initial call - .mockReturnValueOnce(startTime + 2500); // Timeout check - - const promise = retryWithExponentialBackoff(mockFn, options, mockLog); + const promise = retryWithExponentialBackoff( + mockFn, + { + ...options, + initialDelayMs: 10, + totalTimeoutMs: 1, + }, + mockLog, + ); - await expect(promise).rejects.toThrow('TestOperation timed out after 2000ms'); + try { + await promise; + } catch (err) { + expect(err).instanceof(Error); + const message = (err as Error).message; + expect(message).contains('SomeOperation timed out after'); + expect(message).contains('on attempt #2/3. Last error: Failure'); + return; + } + + throw new Error('Tests have failed'); }); it('should apply exponential backoff correctly', async () => { const mockFn = vi.fn().mockRejectedValue(new Error('Failure')); - const options: RetryOptions = { - maxRetries: 3, - initialDelayMs: 100, - backoffMultiplier: 2, - maxDelayMs: 1000, - }; - const promise: Promise = expect(retryWithExponentialBackoff(mockFn, options, mockLog)).rejects.toThrow(); + const promise = retryWithExponentialBackoff( + mockFn, + { + ...options, + + maxRetries: 5, + initialDelayMs: 10, + maxDelayMs: 100, + backoffMultiplier: 2.5, + jitterFactor: 0.01, + totalTimeoutMs: 10_000, + }, + mockLog, + sleepFn, + ); + + await expect(promise).rejects.toThrow('Failure'); + expect(mockFn).toHaveBeenCalledTimes(6); + expect(sleepFn.callCount).toBe(5); + + expect(checkBounds(sleepFn.args[0][0], 10, 25)).toBe(true); + expect(checkBounds(sleepFn.args[1][0], 25, 62.5)).toBe(true); + expect(checkBounds(sleepFn.args[2][0], 62.5, 100)).toBe(true); + expect(sleepFn.args[3][0]).toBe(100); + expect(sleepFn.args[4][0]).toBe(100); + }); - await vi.advanceTimersByTimeAsync(1000); - await promise; + it('should throw error when backoffMultiplier is less than 1', async () => { + const mockFn = vi.fn().mockResolvedValue('success'); - expect(mockFn).toHaveBeenCalledTimes(4); + await expect( + retryWithExponentialBackoff(mockFn, { ...options, backoffMultiplier: 0.5 }, mockLog, sleepFn), + ).rejects.toThrow('Backoff multiplier must be greater than or equal to 1'); + expect(mockFn).not.toHaveBeenCalled(); + }); + + it('should throw error when totalTimeoutMs is 0 or negative', async () => { + const mockFn = vi.fn().mockResolvedValue('success'); + + await expect( + retryWithExponentialBackoff(mockFn, { ...options, totalTimeoutMs: 0 }, mockLog, sleepFn), + ).rejects.toThrow('Total timeout must be greater than 0'); + + await expect( + retryWithExponentialBackoff(mockFn, { ...options, totalTimeoutMs: -100 }, mockLog, sleepFn), + ).rejects.toThrow('Total timeout must be greater than 0'); + expect(mockFn).not.toHaveBeenCalled(); }); it('should handle non-Error exceptions', async () => { - const mockFn = vi.fn().mockRejectedValue('String error'); - const options: RetryOptions = { maxRetries: 1, initialDelayMs: 100 }; + const mockFn = vi + .fn() + .mockRejectedValueOnce('string error') + .mockRejectedValueOnce({ message: 'object error' }) + .mockResolvedValue('success'); - const promise: Promise = expect(retryWithExponentialBackoff(mockFn, options, mockLog)).rejects.toThrow( - 'Operation failed after 2 attempts. Last error: String error', - ); + const result = await retryWithExponentialBackoff(mockFn, options, mockLog, sleepFn); - await vi.advanceTimersByTimeAsync(200); - await promise; + expect(result).toBe('success'); + expect(mockFn).toHaveBeenCalledTimes(3); }); it('should use default options when not provided', async () => { const mockFn = vi.fn().mockRejectedValue(new Error('Failure')); - const options: RetryOptions = {}; + const minimalOptions: RetryOptions = { + operationName: 'TestOp', + totalTimeoutMs: 60_000, + }; + + const promise = retryWithExponentialBackoff(mockFn, minimalOptions, mockLog, sleepFn); + + await expect(promise).rejects.toThrow('TestOp failed after 4 attempts'); + expect(mockFn).toHaveBeenCalledTimes(4); // default maxRetries is 3, so 4 attempts + }); + + it('should not add jitter when jitterFactor is 0', async () => { + const mockFn = vi.fn().mockRejectedValue(new Error('Failure')); - const promise: Promise = expect(retryWithExponentialBackoff(mockFn, options, mockLog)).rejects.toThrow( - 'Operation failed after 4 attempts', + const promise = retryWithExponentialBackoff( + mockFn, + { + ...options, + maxRetries: 2, + initialDelayMs: 10, + backoffMultiplier: 2, + jitterFactor: 0, + maxDelayMs: 1000, + totalTimeoutMs: 10_000, + }, + mockLog, + sleepFn, ); - await vi.advanceTimersByTimeAsync(8000); - await promise; + await expect(promise).rejects.toThrow('Failure'); + expect(sleepFn.args[0][0]).toBe(10); // 10 * 2^0 = 10 + expect(sleepFn.args[1][0]).toBe(20); // 10 * 2^1 = 20 + }); + + it('should log warnings on retry attempts', async () => { + const mockFn = vi.fn().mockRejectedValueOnce(new Error('Test error')).mockResolvedValue('success'); + + await retryWithExponentialBackoff(mockFn, options, mockLog, sleepFn); - expect(mockFn).toHaveBeenCalledTimes(4); + expect(mockLog.warn).toHaveBeenCalledTimes(1); + expect(mockLog.warn).toHaveBeenCalledWith( + expect.stringContaining('SomeOperation attempt 1 failed: Test error'), + ); }); }); + +function checkBounds(value: number, lowerLimit: number, upperLimit: number) { + if (value > lowerLimit && value < upperLimit) { + return true; + } + + throw new Error(`${value} is either lower than ${lowerLimit} or greater than ${upperLimit}`); +}