Skip to content

Commit 88a283a

Browse files
authored
fix(codecatalyst): DevEnv connect sometimes fails #3257
Problem: - Connecting to a CodeCatalyst DevEnv sometimes fails. - After doing "Update" on a connected DevEnv, polling may fail with this service error: [ERROR]: aws.codecatalyst.openDevEnv: [ConflictException: Cannot start Dev Environment because update process is still going on Solution: - Catch `ConflictException` and continue waiting. - If the DevEnv transitions from STARTING > STOPPING, wait for STARTING a bit before giving up. - Log state transitions for visibility: ``` 2023-… [DEBUG]: devenv not started, waiting (time: 18.023 s): 9981abbc-87f6-4c28- / [ STOPPED/4956ms STARTING/11974ms] ... 2023-… [DEBUG]: devenv not started, waiting (time: 41.723 s): 9981abbc-87f6-4c28- / [ STOPPED/4956ms STARTING/35674ms] 2023-… [DEBUG]: devenv not started, waiting (time: 65.485 s): 9981abbc-87f6-4c28- / [ STOPPED/4956ms STARTING/59436ms] 2023-… [INFO]: devenv started after 66.572 s: 9981abbc-87f6-4c28- / [ STOPPED/4956ms STARTING/60523ms RUNNING/1ms] ```
1 parent 1ddf52a commit 88a283a

File tree

2 files changed

+69
-21
lines changed

2 files changed

+69
-21
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "Bug Fix",
3+
"description": "CodeCatalyst: connecting to a Dev Environment while it is updating sometimes fails"
4+
}

src/shared/clients/codecatalystClient.ts

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -561,62 +561,105 @@ class CodeCatalystClientInternal {
561561
status: string,
562562
timeout: Timeout = new Timeout(180000)
563563
): Promise<DevEnvironment> {
564-
let lastStatus: undefined | string
564+
// Track the status changes chronologically so that we can
565+
// 1. reason about hysterisis (weird flip-flops)
566+
// 2. have visibility in the logs
567+
const lastStatus = new Array<{ status: string; start: number }>()
568+
let alias: string | undefined
569+
565570
try {
566571
const devenv = await this.getDevEnvironment(args)
567-
lastStatus = devenv.status
568-
if (status === 'RUNNING' && lastStatus === 'RUNNING') {
572+
alias = devenv.alias
573+
lastStatus.push({ status: devenv.status, start: Date.now() })
574+
if (status === 'RUNNING' && devenv.status === 'RUNNING') {
569575
// "Debounce" in case caller did not check if the environment was already running.
570576
return devenv
571577
}
572578
} catch {
573-
lastStatus = undefined
579+
lastStatus.length = 0
580+
}
581+
582+
function statusesToString() {
583+
let s = ''
584+
for (let i = 0; i < lastStatus.length; i++) {
585+
const item = lastStatus[i]
586+
const nextItem = i < lastStatus.length - 1 ? lastStatus[i + 1] : undefined
587+
const nextTime = nextItem ? nextItem.start : Date.now()
588+
const elapsed = nextTime - item.start
589+
s += `${s ? ' ' : ''}${item.status}/${elapsed}ms`
590+
}
591+
return `[${s}]`
592+
}
593+
594+
const doLog = (kind: 'debug' | 'info', msg: string) => {
595+
const name = (alias ? alias : args.id).substring(0, 19)
596+
const fmt = `${msg} (time: %d s): %s %s`
597+
if (kind === 'debug') {
598+
this.log.debug(fmt, timeout.elapsedTime / 1000, name, statusesToString())
599+
} else {
600+
this.log.info(fmt, timeout.elapsedTime / 1000, name, statusesToString())
601+
}
574602
}
575603

576604
const progress = await showMessageWithCancel(
577-
localize('AWS.CodeCatalyst.startMde.message', 'CodeCatalyst'),
605+
localize('AWS.CodeCatalyst.devenv.message', 'CodeCatalyst'),
578606
timeout
579607
)
580-
progress.report({ message: localize('AWS.CodeCatalyst.startMde.checking', 'checking status...') })
608+
progress.report({ message: localize('AWS.CodeCatalyst.devenv.checking', 'checking status...') })
581609

582610
const pollDevEnv = waitUntil(
583611
async () => {
584-
this.log.debug(
585-
'devenv not started, waiting (time: %d seconds): %s',
586-
timeout.elapsedTime / 1000,
587-
args.id
588-
)
612+
doLog('debug', 'devenv not started, waiting')
589613
// technically this will continue to be called until it reaches its own timeout, need a better way to 'cancel' a `waitUntil`
590614
if (timeout.completed) {
591615
return
592616
}
593617

594618
const resp = await this.getDevEnvironment(args)
595-
if (lastStatus === 'STARTING' && (resp.status === 'STOPPED' || resp.status === 'STOPPING')) {
619+
const startingStates = lastStatus.filter(o => o.status === 'STARTING').length
620+
if (
621+
startingStates > 1 &&
622+
lastStatus[0].status === 'STARTING' &&
623+
(resp.status === 'STOPPED' || resp.status === 'STOPPING')
624+
) {
596625
throw new ToolkitError('Dev Environment failed to start', { code: 'BadDevEnvState' })
597626
}
598627

599628
if (resp.status === 'STOPPED') {
600629
progress.report({
601-
message: localize('AWS.CodeCatalyst.startMde.stopStart', 'Resuming Dev Environment...'),
630+
message: localize('AWS.CodeCatalyst.devenv.stopStart', 'Resuming Dev Environment...'),
602631
})
603-
await this.startDevEnvironment(args)
632+
try {
633+
await this.startDevEnvironment(args)
634+
} catch (e) {
635+
const err = e as AWS.AWSError
636+
// May happen after "Update Dev Environment":
637+
// ConflictException: "Cannot start Dev Environment because update process is still going on"
638+
// Continue retrying in that case.
639+
if (err.code === 'ConflictException') {
640+
doLog('debug', 'devenv not started (ConflictException), waiting')
641+
} else {
642+
throw new ToolkitError('Dev Environment failed to start', {
643+
code: 'BadDevEnvState',
644+
cause: err,
645+
})
646+
}
647+
}
604648
} else if (resp.status === 'STOPPING') {
605649
progress.report({
606-
message: localize(
607-
'AWS.CodeCatalyst.startMde.resuming',
608-
'Waiting for Dev Environment to stop...'
609-
),
650+
message: localize('AWS.CodeCatalyst.devenv.resuming', 'Waiting for Dev Environment to stop...'),
610651
})
611652
} else if (resp.status === 'FAILED') {
612653
throw new ToolkitError('Dev Environment failed to start', { code: 'FailedDevEnv' })
613654
} else {
614655
progress.report({
615-
message: localize('AWS.CodeCatalyst.startMde.starting', 'Opening Dev Environment...'),
656+
message: localize('AWS.CodeCatalyst.devenv.starting', 'Opening Dev Environment...'),
616657
})
617658
}
618659

619-
lastStatus = resp.status
660+
if (lastStatus[lastStatus.length - 1].status !== resp.status) {
661+
lastStatus.push({ status: resp.status, start: Date.now() })
662+
}
620663
return resp.status === 'RUNNING' ? resp : undefined
621664
},
622665
// note: the `waitUntil` will resolve prior to the real timeout if it is refreshed
@@ -625,9 +668,10 @@ class CodeCatalystClientInternal {
625668

626669
const devenv = await waitTimeout(pollDevEnv, timeout)
627670
if (!devenv) {
671+
doLog('info', 'devenv failed to start')
628672
throw new ToolkitError('Dev Environment failed to start', { code: 'Timeout' })
629673
}
630-
this.log.info('devenv started after %d seconds: %s', timeout.elapsedTime / 1000, args.id)
674+
doLog('info', 'devenv started')
631675

632676
return devenv
633677
}

0 commit comments

Comments
 (0)