Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion assets/featureFlag/alpha.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@
]
}
}
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was the newline at the end removed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ya, tests read/write the file so new lines get removed when you run tests

2 changes: 1 addition & 1 deletion assets/featureFlag/beta.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@
]
}
}
}
}
2 changes: 1 addition & 1 deletion assets/featureFlag/prod.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@
]
}
}
}
}
2 changes: 2 additions & 0 deletions src/stacks/actions/StackActionOperations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ export async function cleanupReviewStack(
maxRetries: 3,
initialDelayMs: 1000,
operationName: `Delete stack ${workflow.stackName}`,
totalTimeoutMs: 30_000,
},
logger,
);
Expand All @@ -298,6 +299,7 @@ export async function deleteChangeSet(
maxRetries: 3,
initialDelayMs: 1000,
operationName: `Delete change set ${workflow.changeSetName}`,
totalTimeoutMs: 30_000,
},
logger,
);
Expand Down
91 changes: 53 additions & 38 deletions src/utils/Retry.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import { Logger } from 'pino';
import { extractErrorMessage } from './Errors';

export type RetryOptions = {
maxRetries?: number;
initialDelayMs?: number;
maxDelayMs?: number;
backoffMultiplier?: number;
jitterFactor?: number;
operationName?: string;
totalTimeoutMs?: number;
operationName: string;
totalTimeoutMs: number;
};

function sleep(ms: number): Promise<void> {
Expand All @@ -16,65 +17,79 @@ function sleep(ms: number): Promise<void> {
});
}

function checkTotalTimeout(startTime: number, totalTimeoutMs: number | undefined, operationName: string): void {
if (totalTimeoutMs !== undefined && Date.now() - startTime > totalTimeoutMs) {
throw new Error(`${operationName} timed out after ${totalTimeoutMs}ms`);
}
}

function calculateNextDelay(
currentDelay: number,
function calculateDelay(
attempt: number,
initialDelayMs: number,
jitterFactor: number,
backoffMultiplier: number,
maxDelayMs: number,
): number {
const nextDelay = currentDelay * backoffMultiplier;
const jitter = jitterFactor > 0 ? Math.random() * jitterFactor * nextDelay : 0;
return Math.min(nextDelay + jitter, maxDelayMs);
// 1. Exponential Backoff: initial * 2^0, initial * 2^1, etc.
const exponentialDelay = initialDelayMs * Math.pow(backoffMultiplier, attempt);

// 2. Cap at Max Delay
const cappedDelay = Math.min(exponentialDelay, maxDelayMs);

// 3. Add Jitter (randomized percentage of the current delay), capped at maxDelayMs
const jitter = jitterFactor > 0 ? Math.random() * jitterFactor * cappedDelay : 0;

return Math.min(cappedDelay + jitter, maxDelayMs);
}

/**
* Retry a function with exponential backoff
*/
export async function retryWithExponentialBackoff<T>(
fn: () => Promise<T>,
options: RetryOptions,
log: Logger,
sleepFn: (ms: number) => Promise<void> = (ms: number) => {
return sleep(ms);
},
): Promise<T> {
const maxRetries = options.maxRetries ?? 3;
const initialDelayMs = options.initialDelayMs ?? 1000;
const maxDelayMs = options.maxDelayMs ?? 30_000;
const backoffMultiplier = options.backoffMultiplier ?? 2;
const jitterFactor = options.jitterFactor ?? 0;
const operationName = options.operationName ?? 'Operation';
const totalTimeoutMs = options.totalTimeoutMs;
const {
maxRetries = 3,
initialDelayMs = 1000,
maxDelayMs = 5000,
backoffMultiplier = 2,
jitterFactor = 0.1,
operationName,
totalTimeoutMs,
} = options;

let lastError: Error = new Error('No attempts made');
let currentDelay = initialDelayMs;
const startTime = Date.now();
if (backoffMultiplier < 1) {
throw new Error('Backoff multiplier must be greater than or equal to 1');
}

if (totalTimeoutMs <= 0) {
throw new Error('Total timeout must be greater than 0');
}

for (let attempt = 0; attempt <= maxRetries; attempt++) {
checkTotalTimeout(startTime, totalTimeoutMs, operationName);
let lastError: Error | undefined = undefined;
const startTime = performance.now();

// If maxRetries is 3, we run: 0 (initial), 1, 2, 3. Total 4 attempts.
const attempts = maxRetries + 1;
for (let attemptIdx = 0; attemptIdx < attempts; attemptIdx++) {
if (attemptIdx > 0 && performance.now() - startTime >= totalTimeoutMs) {
const message = `${operationName} timed out after ${performance.now() - startTime}ms, on attempt #${attemptIdx + 1}/${attempts}`;
const errorMsg = lastError ? `${message}. Last error: ${lastError.message}` : message;
throw new Error(errorMsg);
}

try {
return await fn();
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));

if (attempt === maxRetries) {
throw new Error(
`${operationName} failed after ${maxRetries + 1} attempts. Last error: ${lastError.message}`,
);
lastError = error instanceof Error ? error : new Error(extractErrorMessage(error));
if (attemptIdx === attempts - 1) {
throw new Error(`${operationName} failed after ${attempts} attempts. Last error: ${lastError.message}`);
}

const delay = calculateDelay(attemptIdx, initialDelayMs, jitterFactor, backoffMultiplier, maxDelayMs);
log.warn(
`${operationName} attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${currentDelay}ms...`,
`${operationName} attempt ${attemptIdx + 1} failed: ${lastError.message}. Retrying in ${Math.round(delay)}ms...`,
);

await sleep(currentDelay);
currentDelay = calculateNextDelay(currentDelay, jitterFactor, backoffMultiplier, maxDelayMs);
await sleepFn(delay);
}
}

throw lastError;
throw new Error('Something went wrong, this is not reachable');
}
189 changes: 1 addition & 188 deletions tst/unit/services/cfnLint/PyodideWorkerManager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -645,35 +645,6 @@ describe('PyodideWorkerManager', () => {
});

describe('retry logic', () => {
test('should not retry when maxRetries is 0', async () => {
// Create a worker manager with retries disabled
const noRetryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 0,
initialDelayMs: 10,
maxDelayMs: 100,
backoffMultiplier: 2,
totalTimeoutMs: 5000,
},
createDefaultCfnLintSettings(),

mockLogging,
);

// Mock worker creation to fail
workerConstructor.mockImplementation(() => {
throw new Error('Worker creation failed');
});

// Expect initialization to fail immediately without retries
await expect(noRetryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization failed after 1 attempts. Last error: Worker creation failed',
);

// Verify no retry attempts were logged
expect(mockLogging.warn.callCount).toBe(0); // No retry warnings
});

test('should retry initialization on failure and eventually succeed', async () => {
// Create a worker manager with retry enabled
const retryWorkerManager = new PyodideWorkerManager(
Expand Down Expand Up @@ -954,9 +925,7 @@ describe('PyodideWorkerManager', () => {
});

// Expect initialization to fail with timeout error
await expect(retryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization timed out after 300ms',
);
await expect(retryWorkerManager.initialize()).rejects.toThrow(/Pyodide initialization timed out after 3/);

const totalTime = Date.now() - startTime;

Expand All @@ -968,162 +937,6 @@ describe('PyodideWorkerManager', () => {
expect(attemptCount).toBeGreaterThan(1);
expect(attemptCount).toBeLessThan(11); // Should timeout before reaching max retries
});

test('should work without totalTimeoutMs (backward compatibility)', async () => {
// Create a worker manager without totalTimeoutMs (uses default from DefaultSettings)
const retryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 2,
initialDelayMs: 10,
maxDelayMs: 50,
backoffMultiplier: 2,
totalTimeoutMs: 120_000, // Uses default value from DefaultSettings
},
createDefaultCfnLintSettings(),

mockLogging,
);

let attemptCount = 0;

workerConstructor.mockImplementation(() => {
attemptCount++;
throw new Error('Worker creation failed');
});

// Should fail after max retries, not timeout (since timeout is very large)
await expect(retryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization failed after 3 attempts. Last error: Worker creation failed',
);

// Should have made all retry attempts (initial + 2 retries = 3 total)
expect(attemptCount).toBe(3);
});

test('should prefer explicit totalTimeoutMs over calculated timeout', async () => {
// Create a worker manager where explicit totalTimeoutMs is much shorter than calculated timeout
const retryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 5, // Would normally allow 6 attempts
initialDelayMs: 100,
maxDelayMs: 1000, // Calculated timeout would be 1000 * 6 = 6000ms
backoffMultiplier: 2,
totalTimeoutMs: 200, // But explicit timeout is only 200ms
},
createDefaultCfnLintSettings(),

mockLogging,
);

const startTime = Date.now();
let attemptCount = 0;

workerConstructor.mockImplementation(() => {
attemptCount++;
throw new Error('Worker creation failed');
});

// Should timeout quickly due to explicit totalTimeoutMs
await expect(retryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization timed out after 200ms',
);

const totalTime = Date.now() - startTime;

// Should respect the explicit 200ms timeout, not the calculated 6000ms
// Allow more variance for CI environments
expect(totalTime).toBeGreaterThanOrEqual(180);
expect(totalTime).toBeLessThan(350);

// Should have made fewer attempts due to quick timeout
expect(attemptCount).toBeLessThan(6); // Should not reach max retries
});

test('should handle zero totalTimeoutMs (immediate timeout)', async () => {
// Create a worker manager with zero totalTimeoutMs
const retryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 3,
initialDelayMs: 100,
maxDelayMs: 500,
backoffMultiplier: 2,
totalTimeoutMs: 0, // Immediate timeout
},
createDefaultCfnLintSettings(),

mockLogging,
);

let attemptCount = 0;

workerConstructor.mockImplementation(() => {
attemptCount++;
throw new Error('Worker creation failed');
});

// Should timeout immediately after first attempt
await expect(retryWorkerManager.initialize()).rejects.toThrow('Pyodide initialization timed out after 0ms');

// Should have made at least one attempt before timing out
expect(attemptCount).toBe(1);
});

test('should handle very large totalTimeoutMs', async () => {
// Create a worker manager with very large totalTimeoutMs
const retryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 2,
initialDelayMs: 10,
maxDelayMs: 50,
backoffMultiplier: 2,
totalTimeoutMs: 10000, // Very large timeout (10 seconds)
},
createDefaultCfnLintSettings(),

mockLogging,
);

let attemptCount = 0;

workerConstructor.mockImplementation(() => {
attemptCount++;
throw new Error('Worker creation failed');
});

// Should fail due to max retries, not timeout
await expect(retryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization failed after 3 attempts. Last error: Worker creation failed',
);

// Should have made all retry attempts
expect(attemptCount).toBe(3);
});

test('should use default operation name in error messages', async () => {
// Create a worker manager without specifying operation name
const retryWorkerManager = new PyodideWorkerManager(
{
maxRetries: 1,
initialDelayMs: 10,
maxDelayMs: 100,
backoffMultiplier: 2,
totalTimeoutMs: 5000,
},
createDefaultCfnLintSettings(),

mockLogging,
);

// Always fail worker creation
workerConstructor.mockImplementation(() => {
throw new Error('Worker creation failed');
});

// Expect initialization to fail with default operation name
await expect(retryWorkerManager.initialize()).rejects.toThrow(
'Pyodide initialization failed after 2 attempts. Last error: Worker creation failed',
);
});
});

describe('race conditions and edge cases', () => {
Expand Down
2 changes: 2 additions & 0 deletions tst/unit/stackActions/StackActionWorkflowOperations.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ describe('StackActionWorkflowOperations', () => {
maxRetries: 3,
initialDelayMs: 1000,
operationName: 'Delete stack test-stack',
totalTimeoutMs: 30000,
},
expect.any(Object), // logger
);
Expand Down Expand Up @@ -293,6 +294,7 @@ describe('StackActionWorkflowOperations', () => {
maxRetries: 3,
initialDelayMs: 1000,
operationName: 'Delete change set changeset-123',
totalTimeoutMs: 30000,
},
expect.any(Object), // logger
);
Expand Down
Loading
Loading