Skip to content

Commit 3976e1f

Browse files
authored
Try to delete services created by CloudDebug integration tests when deinstrument fails (#2411)
- On deinstrument failure, the ecs services were not cleaned up properly. This has lead to multiple zombie services in our integration test accounts - Also simplify the code using waiters
1 parent 9032b18 commit 3976e1f

File tree

4 files changed

+36
-39
lines changed

4 files changed

+36
-39
lines changed

jetbrains-core/it/software/aws/toolkits/jetbrains/services/clouddebug/CloudDebugTestCase.kt

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ import com.nhaarman.mockitokotlin2.mock
99
import org.junit.After
1010
import org.junit.Before
1111
import org.junit.Rule
12+
import software.amazon.awssdk.core.waiters.WaiterOverrideConfiguration
1213
import software.amazon.awssdk.http.apache.ApacheHttpClient
1314
import software.amazon.awssdk.regions.Region
1415
import software.amazon.awssdk.services.cloudformation.CloudFormationClient
1516
import software.amazon.awssdk.services.ecs.EcsClient
1617
import software.amazon.awssdk.services.ecs.model.AssignPublicIp
18+
import software.amazon.awssdk.services.ecs.model.DescribeServicesRequest
1719
import software.amazon.awssdk.services.ecs.model.LaunchType
1820
import software.amazon.awssdk.services.ecs.model.Service
1921
import software.aws.toolkits.core.region.AwsRegion
@@ -29,8 +31,7 @@ import software.aws.toolkits.jetbrains.services.ecs.EcsUtils
2931
import software.aws.toolkits.jetbrains.services.ecs.resources.EcsResources
3032
import software.aws.toolkits.jetbrains.utils.rules.CloudFormationLazyInitRule
3133
import java.nio.file.Paths
32-
import java.util.concurrent.CountDownLatch
33-
import java.util.concurrent.TimeUnit
34+
import java.time.Duration
3435

3536
abstract class CloudDebugTestCase(private val taskDefName: String) {
3637
protected lateinit var service: Service
@@ -69,7 +70,7 @@ abstract class CloudDebugTestCase(private val taskDefName: String) {
6970
runUnderRealCredentials(getProject()) {
7071
println("Instrumenting service")
7172
instrumentService()
72-
val instrumentedServiceName = "cloud-debug-${EcsUtils.serviceArnToName(service.serviceArn())}"
73+
val instrumentedServiceName = instrumentedServiceName()
7374
println("Waiting for $instrumentedServiceName to stabilize")
7475
ecsRule.ecsClient.waiter().waitUntilServicesStable {
7576
it.cluster(service.clusterArn())
@@ -87,15 +88,30 @@ abstract class CloudDebugTestCase(private val taskDefName: String) {
8788

8889
@After
8990
open fun tearDown() {
91+
try {
92+
deinstrumentService()
93+
} finally {
94+
// If deinstrumenting fails, or initialization doesn't work properly, we still want to try to delete the services, so kick that off
95+
runCatching { ecsClient.deleteService { it.cluster(service.clusterArn()).service(service.serviceArn()).force(true) } }
96+
runCatching { ecsClient.deleteService { it.cluster(service.clusterArn()).service(instrumentedServiceName()).force(true) } }
97+
}
98+
}
99+
100+
private fun deinstrumentService() {
90101
// TODO: this doesn't wait for the revert command to complete but fulfills our need to cleanup
91102
if (::instrumentedService.isInitialized) {
92103
runUnderRealCredentials(getProject()) {
93-
deinstrumentService()
104+
DeinstrumentResourceFromExplorerAction.performAction(
105+
getProject(),
106+
service.clusterArn(),
107+
EcsUtils.originalServiceName(instrumentedService.serviceName()),
108+
null
109+
)
94110
println("Waiting for ${instrumentedService.serviceArn()} to be deinstrumented")
95-
ecsClient.waiter().waitUntilServicesInactive {
96-
it.cluster(instrumentedService.clusterArn())
97-
it.services(instrumentedService.serviceArn())
98-
}
111+
ecsClient.waiter().waitUntilServicesInactive(
112+
DescribeServicesRequest.builder().cluster(instrumentedService.clusterArn()).services(instrumentedService.serviceArn()).build(),
113+
WaiterOverrideConfiguration.builder().waitTimeout(Duration.ofMinutes(5)).build()
114+
)
99115
}
100116
// TODO: verify that no error toasts were created, or similar mechanism
101117
}
@@ -146,31 +162,17 @@ abstract class CloudDebugTestCase(private val taskDefName: String) {
146162
}
147163
}
148164

149-
private fun awaitCli(latch: CountDownLatch) = { result: Boolean ->
150-
latch.countDown()
151-
if (!result) {
152-
throw RuntimeException("CLI didn't complete successfully!")
153-
}
154-
}
155-
156165
private fun instrumentService() {
157-
val latch = CountDownLatch(1)
158-
InstrumentResourceAction.performAction(getProject(), service.clusterArn(), service.serviceArn(), instrumentationRole, null, awaitCli(latch))
159-
latch.await(5, TimeUnit.MINUTES)
160-
}
161-
162-
private fun deinstrumentService() {
163-
val latch = CountDownLatch(1)
164-
DeinstrumentResourceFromExplorerAction.performAction(
165-
getProject(),
166-
service.clusterArn(),
167-
EcsUtils.originalServiceName(instrumentedService.serviceName()),
168-
null,
169-
awaitCli(latch)
166+
InstrumentResourceAction.performAction(getProject(), service.clusterArn(), service.serviceArn(), instrumentationRole, null)
167+
println("Waiting for ${service.serviceArn()} to be instrumented")
168+
ecsClient.waiter().waitUntilServicesStable(
169+
DescribeServicesRequest.builder().cluster(service.clusterArn()).services(instrumentedServiceName()).build(),
170+
WaiterOverrideConfiguration.builder().waitTimeout(Duration.ofMinutes(5)).build()
170171
)
171-
latch.await(5, TimeUnit.MINUTES)
172172
}
173173

174+
private fun instrumentedServiceName() = "cloud-debug-${EcsUtils.serviceArnToName(service.serviceArn())}"
175+
174176
abstract fun getProject(): Project
175177

176178
companion object {

jetbrains-core/src/software/aws/toolkits/jetbrains/services/clouddebug/actions/DeinstrumentResourceAction.kt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ class DeinstrumentResourceFromExplorerAction : SingleResourceNodeAction<EcsServi
4444
project: Project,
4545
clusterArn: String,
4646
instrumentedResourceName: String,
47-
selected: EcsServiceNode?,
48-
callback: ((Boolean) -> Unit)? = null
47+
selected: EcsServiceNode?
4948
) {
5049
val originalServiceName = EcsUtils.originalServiceName(instrumentedResourceName)
5150
DeinstrumentAction(
@@ -55,7 +54,7 @@ class DeinstrumentResourceFromExplorerAction : SingleResourceNodeAction<EcsServi
5554
message("cloud_debug.instrument_resource.disable"),
5655
message("cloud_debug.instrument_resource.disable.success", originalServiceName),
5756
message("cloud_debug.instrument_resource.disable.failed", originalServiceName)
58-
).runAction(selected, callback)
57+
).runAction(selected)
5958
}
6059
}
6160
}

jetbrains-core/src/software/aws/toolkits/jetbrains/services/clouddebug/actions/InstrumentResourceAction.kt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ class InstrumentResourceAction(
7474
clusterArn: String,
7575
serviceArn: String,
7676
roleArn: String,
77-
selected: EcsServiceNode?,
78-
callback: ((Boolean) -> Unit)? = null
77+
selected: EcsServiceNode?
7978
) {
8079
InstrumentAction(
8180
project,
@@ -85,7 +84,7 @@ class InstrumentResourceAction(
8584
message("cloud_debug.instrument_resource.enable"),
8685
message("cloud_debug.instrument_resource.enable.success", EcsUtils.serviceArnToName(serviceArn)),
8786
message("cloud_debug.instrument_resource.enable.fail", EcsUtils.serviceArnToName(serviceArn))
88-
).runAction(selected, callback)
87+
).runAction(selected)
8988
}
9089
}
9190
}

jetbrains-core/src/software/aws/toolkits/jetbrains/services/clouddebug/actions/PseCliAction.kt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ abstract class PseCliAction(val project: Project, val actionName: String, privat
5858
abstract fun buildCommandLine(cmd: GeneralCommandLine)
5959
protected abstract fun produceTelemetry(startTime: Instant, result: Result, version: String?)
6060

61-
fun runAction(selectedNode: AbstractTreeNode<*>? = null, callback: ((Boolean) -> Unit)? = null) {
61+
fun runAction(selectedNode: AbstractTreeNode<*>? = null) {
6262
ProgressManager.getInstance().run(
6363
object : Task.Backgroundable(
6464
project,
@@ -105,7 +105,6 @@ abstract class PseCliAction(val project: Project, val actionName: String, privat
105105
null
106106
}
107107
}.toCompletableFuture().join() ?: run {
108-
callback?.invoke(false)
109108
return
110109
}
111110

@@ -162,14 +161,12 @@ abstract class PseCliAction(val project: Project, val actionName: String, privat
162161
)
163162
// reset the cache
164163
project.clearResourceForCurrentConnection(CloudDebuggingResources.LIST_INSTRUMENTED_RESOURCES)
165-
callback?.invoke(true)
166164
} else {
167165
notifyError(
168166
actionName,
169167
failureMessage,
170168
project
171169
)
172-
callback?.invoke(false)
173170
}
174171

175172
// Redraw cluster level if the action was taken from a node

0 commit comments

Comments
 (0)