Skip to content

Commit 2fc445c

Browse files
committed
feat(integ-runner): detect bootstrap errors and retry tests in valid regions
Add automatic detection of bootstrap-related failures during integration test runs. When a region is not bootstrapped, the integ-runner now: - Detects bootstrap errors using a strongly-typed BootstrapError class - Removes the non-bootstrapped region from the environment pool - Re-queues failed tests to run in remaining valid regions - Provides clear feedback with the exact `cdk bootstrap` command needed This prevents wasted CI time from repeatedly scheduling tests in non-bootstrapped regions and gives users actionable error messages.
1 parent 6386a54 commit 2fc445c

File tree

23 files changed

+2839
-79
lines changed

23 files changed

+2839
-79
lines changed

.projenrc.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,7 @@ const integRunner = configureProject(
14471447
'@types/yargs',
14481448
'constructs@^10',
14491449
'@aws-cdk/[email protected]',
1450+
'fast-check@^3.23.2',
14501451
],
14511452
allowPrivateDeps: true,
14521453
tsconfig: {

packages/@aws-cdk/integ-runner/.projen/deps.json

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/@aws-cdk/integ-runner/lib/cli.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import type { IntegTest, IntegTestInfo } from './runner/integration-tests';
88
import { IntegrationTests } from './runner/integration-tests';
99
import { processUnstableFeatures, availableFeaturesDescription } from './unstable-features';
1010
import type { IntegRunnerMetrics, IntegTestWorkerConfig, DestructiveChange } from './workers';
11-
import { runSnapshotTests, runIntegrationTests } from './workers';
11+
import { runSnapshotTests, runIntegrationTests, printRemovedEnvironmentsSummary } from './workers';
1212
import { watchIntegrationTest } from './workers/integ-watch-worker';
1313

1414
// https://github.com/yargs/yargs/issues/1929
@@ -184,7 +184,7 @@ async function run(options: ReturnType<typeof parseCliArgs>) {
184184

185185
// run integration tests if `--update-on-failed` OR `--force` is used
186186
if (options.runUpdateOnFailed || options.force) {
187-
const { success, metrics } = await runIntegrationTests({
187+
const { success, metrics, removedEnvironments } = await runIntegrationTests({
188188
pool,
189189
tests: testsToRun,
190190
regions: options.testRegions,
@@ -197,6 +197,9 @@ async function run(options: ReturnType<typeof parseCliArgs>) {
197197
});
198198
testsSucceeded = success;
199199

200+
// Print summary of removed environments due to bootstrap errors
201+
printRemovedEnvironmentsSummary(removedEnvironments);
202+
200203
if (options.clean === false) {
201204
logger.warning('Not cleaning up stacks since "--no-clean" was used');
202205
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { ToolkitError } from '@aws-cdk/toolkit-lib';
2+
3+
/**
4+
* Result of bootstrap error detection
5+
*/
6+
export interface BootstrapErrorInfo {
7+
readonly isBootstrapError: boolean;
8+
readonly region?: string;
9+
readonly account?: string;
10+
readonly message: string;
11+
}
12+
13+
/**
14+
* Detects if an error is a bootstrap-related error
15+
*/
16+
export function detectBootstrapError(error: unknown): BootstrapErrorInfo {
17+
// Check for strongly-typed BootstrapError
18+
if (ToolkitError.isBootstrapError(error)) {
19+
return {
20+
isBootstrapError: true,
21+
region: error.environment.region,
22+
account: error.environment.account,
23+
message: error.message,
24+
};
25+
}
26+
27+
const errorMessage = error instanceof Error ? error.message : String(error);
28+
29+
return {
30+
isBootstrapError: false,
31+
message: errorMessage,
32+
};
33+
}

packages/@aws-cdk/integ-runner/lib/workers/common.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { format } from 'util';
22
import type { ResourceImpact } from '@aws-cdk/cloudformation-diff';
33
import * as chalk from 'chalk';
44
import * as logger from '../logger';
5+
import type { TestEnvironment, RemovedEnvironmentInfo } from './environment-pool';
56
import type { IntegTestInfo } from '../runner/integration-tests';
67

78
/**
@@ -37,6 +38,41 @@ export interface IntegTestWorkerConfig extends IntegTestInfo {
3738
readonly destructiveChanges?: DestructiveChange[];
3839
}
3940

41+
/**
42+
* Request to remove an environment from the pool
43+
*/
44+
export interface EnvironmentRemovalRequest {
45+
/**
46+
* The environment to remove
47+
*/
48+
readonly environment: TestEnvironment;
49+
50+
/**
51+
* Human-readable reason for removal
52+
*/
53+
readonly reason: string;
54+
55+
/**
56+
* AWS account ID if known
57+
*/
58+
readonly account?: string;
59+
}
60+
61+
/**
62+
* A test failure that can potentially be retried
63+
*/
64+
export interface RetryableTestFailure extends IntegTestInfo {
65+
/**
66+
* The environment where the failure occurred
67+
*/
68+
readonly failedEnvironment: TestEnvironment;
69+
70+
/**
71+
* Human-readable error message
72+
*/
73+
readonly errorMessage: string;
74+
}
75+
4076
/**
4177
* Information on any destructive changes
4278
*/
@@ -118,6 +154,16 @@ export interface IntegBatchResponse {
118154
* list represents metrics from a single worker (account + region).
119155
*/
120156
readonly metrics: IntegRunnerMetrics[];
157+
158+
/**
159+
* Tests that failed but may succeed if retried in a different environment
160+
*/
161+
readonly retryableFailures?: RetryableTestFailure[];
162+
163+
/**
164+
* Environments that should be removed from the pool
165+
*/
166+
readonly environmentRemovals?: EnvironmentRemovalRequest[];
121167
}
122168

123169
/**
@@ -344,3 +390,24 @@ export function formatError(error: any): string {
344390

345391
return `${name}: ${message}`;
346392
}
393+
394+
/**
395+
* Prints a summary of environments that were removed due to bootstrap errors
396+
*/
397+
export function printRemovedEnvironmentsSummary(removedEnvironments: RemovedEnvironmentInfo[]): void {
398+
if (removedEnvironments.length === 0) {
399+
return;
400+
}
401+
402+
logger.warning('\n%s', chalk.bold('Environments removed due to bootstrap errors:'));
403+
404+
for (const env of removedEnvironments) {
405+
const profileStr = env.profile ? `${env.profile}/` : '';
406+
const accountStr = env.account ? `aws://${env.account}/${env.region}` : env.region;
407+
408+
logger.warning(' • %s%s', profileStr, env.region);
409+
logger.warning(' Run: %s', chalk.blue(`cdk bootstrap ${accountStr}`));
410+
}
411+
412+
logger.warning('');
413+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* Identifies a specific profile+region combination (an "environment" for test execution)
3+
*/
4+
export interface TestEnvironment {
5+
readonly profile?: string;
6+
readonly region: string;
7+
}
8+
9+
/**
10+
* Information about why an environment was removed
11+
*/
12+
export interface RemovedEnvironmentInfo extends TestEnvironment {
13+
readonly reason: string;
14+
readonly account?: string;
15+
readonly removedAt: Date;
16+
}
17+
18+
/**
19+
* Manages a pool of test environments for integration test workers.
20+
*
21+
* This class serves as a centralized pool for test environments, handling:
22+
* - Tracking which environments are available vs removed
23+
* - Recording removal reasons for reporting
24+
*
25+
* Future extensions could include:
26+
* - Load balancing across environments
27+
* - Rate limiting per environment
28+
* - Environment health scoring
29+
* - Automatic environment recovery
30+
*/
31+
export class EnvironmentPool {
32+
private readonly availableEnvironments: Set<string>;
33+
private readonly removedEnvironments: Map<string, RemovedEnvironmentInfo> = new Map();
34+
35+
constructor(environments: TestEnvironment[]) {
36+
this.availableEnvironments = new Set(environments.map(e => this.makeKey(e)));
37+
}
38+
39+
/**
40+
* Creates a unique key for a profile+region combination
41+
*/
42+
private makeKey(env: TestEnvironment): string {
43+
return `${env.profile ?? 'default'}:${env.region}`;
44+
}
45+
46+
/**
47+
* Parses a key back into a TestEnvironment
48+
*/
49+
private parseKey(key: string): TestEnvironment {
50+
const [profile, region] = key.split(':');
51+
return {
52+
profile: profile === 'default' ? undefined : profile,
53+
region,
54+
};
55+
}
56+
57+
/**
58+
* Marks an environment as removed (unavailable for future tests)
59+
*/
60+
public removeEnvironment(env: TestEnvironment, reason: string, account?: string): void {
61+
const key = this.makeKey(env);
62+
if (this.availableEnvironments.has(key)) {
63+
this.availableEnvironments.delete(key);
64+
this.removedEnvironments.set(key, {
65+
...env,
66+
reason,
67+
account,
68+
removedAt: new Date(),
69+
});
70+
}
71+
}
72+
73+
/**
74+
* Checks if an environment is still available
75+
*/
76+
public isAvailable(env: TestEnvironment): boolean {
77+
return this.availableEnvironments.has(this.makeKey(env));
78+
}
79+
80+
/**
81+
* Gets all available environments
82+
*/
83+
public getAvailableEnvironments(): TestEnvironment[] {
84+
return Array.from(this.availableEnvironments).map(key => this.parseKey(key));
85+
}
86+
87+
/**
88+
* Gets all removed environments with their removal info
89+
*/
90+
public getRemovedEnvironments(): RemovedEnvironmentInfo[] {
91+
return Array.from(this.removedEnvironments.values());
92+
}
93+
}

0 commit comments

Comments
 (0)