Skip to content

Commit f475a6c

Browse files
authored
fix(ec2): increase robustness of checking ssm agent ping status. (aws#6621)
## Problem As part of the EC2 Connect process, we check if the SSM agent is pingable on the target instance. This is done here: https://github.com/aws/aws-toolkit-vscode/blob/b9af56c3097242fb796995479f864d95098bf713/packages/core/src/awsService/ec2/model.ts#L153-L162 This check can fail, and is currently does fail a decent amount of the time in telemetry. ## Solution - wrap the check in a `waitUntil`, retrying every half second up to 10 times. --- - Treat all work as PUBLIC. Private `feature/x` branches will not be squash-merged at release time. - Your code changes must meet the guidelines in [CONTRIBUTING.md](https://github.com/aws/aws-toolkit-vscode/blob/master/CONTRIBUTING.md#guidelines). - License: I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 0989414 commit f475a6c

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

packages/core/src/awsService/ec2/model.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import {
2929
testSshConnection,
3030
} from '../../shared/extensions/ssh'
3131
import { getLogger } from '../../shared/logger/logger'
32-
import { CancellationError, Timeout } from '../../shared/utilities/timeoutUtils'
32+
import { CancellationError, Timeout, waitUntil } from '../../shared/utilities/timeoutUtils'
3333
import { showMessageWithCancel } from '../../shared/utilities/messages'
3434
import { SshConfig } from '../../shared/sshConfig'
3535
import { SshKeyPair } from './sshKeyPair'
@@ -150,8 +150,14 @@ export class Ec2Connecter implements vscode.Disposable {
150150
}
151151
}
152152

153-
private async checkForInstanceSsmError(selection: Ec2Selection): Promise<void> {
154-
const isSsmAgentRunning = (await this.ssmClient.getInstanceAgentPingStatus(selection.instanceId)) === 'Online'
153+
public async checkForInstanceSsmError(
154+
selection: Ec2Selection,
155+
options?: Partial<{ interval: number; timeout: number }>
156+
): Promise<void> {
157+
const isSsmAgentRunning = await waitUntil(
158+
async () => (await this.ssmClient.getInstanceAgentPingStatus(selection.instanceId)) === 'Online',
159+
{ interval: options?.interval ?? 500, timeout: options?.timeout ?? 5000 }
160+
)
155161

156162
if (!isSsmAgentRunning) {
157163
this.throwConnectionError('Is SSM Agent running on the target instance?', selection, {

packages/core/src/test/awsService/ec2/model.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,17 @@ describe('Ec2ConnectClient', function () {
125125
}
126126
})
127127

128+
it('retries if agent status is not online', async function () {
129+
const instanceAgentStatus = sinon.stub(SsmClient.prototype, 'getInstanceAgentPingStatus')
130+
instanceAgentStatus.onFirstCall().resolves('Offline')
131+
instanceAgentStatus.onSecondCall().resolves('Online')
132+
try {
133+
await client.checkForInstanceSsmError(instanceSelection, { interval: 10, timeout: 100 })
134+
} catch (err) {
135+
assert.ok(false, `checkForInstanceSsmError failed with error '${err}'`)
136+
}
137+
})
138+
128139
it('does not throw an error if all checks pass', async function () {
129140
sinon.stub(Ec2Connecter.prototype, 'isInstanceRunning').resolves(true)
130141
sinon.stub(Ec2Connecter.prototype, 'getAttachedIamRole').resolves({ Arn: 'testRole' } as IAM.Role)

0 commit comments

Comments
 (0)