Skip to content
Merged
60 changes: 38 additions & 22 deletions packages/@aws-cdk-testing/cli-integ/lib/aws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,28 +276,14 @@ export class AwsClients {
}

public async waitForAssumeRole(roleArn: string) {
// Wait until the role has replicated
const deadline = Date.now() + 60_000;
let lastError: Error | undefined;
while (Date.now() < deadline) {
try {
await this.sts.send(new AssumeRoleCommand({
RoleArn: roleArn,
RoleSessionName: 'test-existence',
}));
return;
} catch (e: any) {
lastError = e;

if (e.name === 'AccessDenied') {
continue;
}

throw e;
}
}

throw new Error(`Timed out waiting for role ${roleArn} to become assumable: ${lastError}`);
await retryOnMatchingErrors(
() => this.sts.send(new AssumeRoleCommand({
RoleArn: roleArn,
RoleSessionName: 'test-existence',
})),
['AccessDenied'],
retry.forSeconds(60),
);
}

public async deleteRole(name: string) {
Expand Down Expand Up @@ -381,6 +367,36 @@ export async function sleep(ms: number) {
return new Promise((ok) => setTimeout(ok, ms));
}

/**
* Retry an async operation with error filtering until a deadline is hit.
*
* Use `retry.forSeconds()` to construct a deadline relative to right now.
*
* Only retries on errors with matching names in errorNames array.
*/
export async function retryOnMatchingErrors<T>(
operation: () => Promise<T>,
errorNames: string[],
deadline: Date,
interval: number = 5000,
): Promise<T> {
let i = 0;
while (true) {
try {
i++;
return await operation();
} catch (e: any) {
if (Date.now() > deadline.getTime()) {
throw new Error(`Operation did not succeed after ${i} attempts: ${e}`);
}
if (!errorNames.includes(e.name)) {
throw e;
}
await sleep(interval);
}
}
}

function chainableCredentials(region: string): AwsCredentialIdentityProvider {
if ((process.env.CODEBUILD_BUILD_ARN || process.env.GITHUB_RUN_ID) && process.env.AWS_PROFILE) {
// in codebuild we must assume the role that the cdk uses
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { GetCallerIdentityCommand } from '@aws-sdk/client-sts';
// eslint-disable-next-line import/no-relative-packages
import type { DockerDomainCredentialSource } from '../../../../../@aws-cdk/cdk-assets-lib/lib/private/docker-credentials';
import type { TestFixture } from '../../../lib';
import { integTest, withDefaultFixture, withRetry } from '../../../lib';
import { integTest, withDefaultFixture, withRetry, retry } from '../../../lib';

jest.setTimeout(2 * 60 * 60_000); // Includes the time to acquire locks, worst-case single-threaded runtime

Expand Down Expand Up @@ -83,13 +83,17 @@ async function testDockerCredential(fixture: TestFixture, credSource: DockerDoma
fs.writeFileSync(input, `${domain}\n`);

await fixture.cdkAssets.makeCliAvailable();
const output = await fixture.shell(['docker-credential-cdk-assets', 'get'], {
modEnv: {
...fixture.cdkShellEnv(),
CDK_DOCKER_CREDS_FILE: credsFilePath,
},
stdio: [fs.openSync(input, 'r')],
captureStderr: false,
let output: string = '';

await retry(process.stdout, 'Getting docker credentials', retry.forSeconds(60), async () => {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be retry or waitForAssumeRole or even retryOnMatchingErrors ?

Copy link
Contributor Author

@abidhasan-aws abidhasan-aws Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not be waitForAssumeRole.

We are retrying this because in this call, there is a credential fetching that needs a wait period for IAM eventual consistency. We could use retryOnMatchingErrors, but there is no clean way to catch error and retry based on that. The package just returns exited with code 1 in case of errors.

So we are retrying anyway and not based on errors, thus the generic retry.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Final thought should we add a sleep time to retry function ? Is that something that can be used like the wait and retry we do in retryOnMatchingErrors ? Would that add any value ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It already has a sleep time of 5 seconds.

output = await fixture.shell(['docker-credential-cdk-assets', 'get'], {
modEnv: {
...fixture.cdkShellEnv(),
CDK_DOCKER_CREDS_FILE: credsFilePath,
},
stdio: [fs.openSync(input, 'r')],
captureStderr: false,
});
});

const response = JSON.parse(output);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { deploysSuccessfully } from './testcase';
import { integTest, withCDKMigrateFixture } from '../../../lib';
import { integTest, withCDKMigrateFixture, withRetry } from '../../../lib';

const language = 'java';

jest.setTimeout(2 * 60 * 60_000); // Includes the time to acquire locks, worst-case single-threaded runtime

integTest(
`cdk migrate ${language} deploys successfully`,
withCDKMigrateFixture(language, async (fixture) => {
withRetry(withCDKMigrateFixture(language, async (fixture) => {
await deploysSuccessfully(fixture, language);
}),
})),
);