Skip to content

Commit e42c3a6

Browse files
authored
Only remove finalizer after there are no instances to termiante (#265)
1 parent 113f999 commit e42c3a6

File tree

6 files changed

+79
-29
lines changed

6 files changed

+79
-29
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
- Change `renovate` configuration to only get upgrades for CAPA compatible versions of k8s dependencies.
1313
- Don't reconcile the `ShareReconciler` if only the status field has changed.
1414

15+
### Fixed
16+
17+
- Only remove `finalizer` after there are no more karpenter instances to terminate.
18+
1519
## [0.19.0] - 2025-06-17
1620

1721
### Added

controllers/karpentermachinepool_controller.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,18 @@ func (r *KarpenterMachinePoolReconciler) reconcileDelete(ctx context.Context, lo
213213

214214
// Terminate EC2 instances with the karpenter.sh/nodepool tag matching the KarpenterMachinePool name
215215
logger.Info("Terminating EC2 instances for KarpenterMachinePool", "karpenterMachinePoolName", karpenterMachinePool.Name)
216-
err = ec2Client.TerminateInstancesByTag(ctx, logger, "karpenter.sh/nodepool", karpenterMachinePool.Name)
216+
instanceIDs, err := ec2Client.TerminateInstancesByTag(ctx, logger, "karpenter.sh/nodepool", karpenterMachinePool.Name)
217217
if err != nil {
218218
return reconcile.Result{}, fmt.Errorf("failed to terminate EC2 instances: %w", err)
219219
}
220+
221+
logger.Info("Found instances", "instanceIDs", instanceIDs)
222+
223+
// Requeue if we find instances to terminate. Once there are no instances to terminate, we proceed to remove the finalizer.
224+
// We do this when the cluster is being deleted, to avoid removing the finalizer before karpenter launches a new instance that would be left over.
225+
if len(instanceIDs) > 0 {
226+
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
227+
}
220228
}
221229

222230
// Create deep copy of the reconciled object so we can change it

controllers/karpentermachinepool_controller_test.go

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -261,15 +261,42 @@ var _ = Describe("KarpenterMachinePool reconciler", func() {
261261
err = fakeCtrlClient.Delete(ctx, cluster)
262262
Expect(err).NotTo(HaveOccurred())
263263
})
264+
// This test is a bit cumbersome because we are deleting CRs, so we can't use different `It` blocks or the CRs would be gone.
265+
// We first mock the call to `TerminateInstancesByTag` to return some instances so that we can test
266+
// the behavior when there are pending instances to remove.
267+
// Then we manually/explicitly call the reconciler inside the test again, to be able to test the behavior
268+
// when there are no instances to remove.
269+
When("there are ec2 instances from karpenter", func() {
270+
BeforeEach(func() {
271+
ec2Client.TerminateInstancesByTagReturnsOnCall(0, []string{"i-abc123", "i-def456"}, nil)
272+
})
264273

265-
It("deletes KarpenterMachinePool ec2 instances and finalizer", func() {
266-
Expect(reconcileErr).NotTo(HaveOccurred())
267-
Expect(ec2Client.TerminateInstancesByTagCallCount()).To(Equal(1))
274+
It("deletes KarpenterMachinePool ec2 instances and finalizer", func() {
275+
Expect(reconcileErr).NotTo(HaveOccurred())
276+
Expect(ec2Client.TerminateInstancesByTagCallCount()).To(Equal(1))
277+
Expect(reconcileResult.RequeueAfter).To(Equal(30 * time.Second))
268278

269-
karpenterMachinePoolList := &karpenterinfra.KarpenterMachinePoolList{}
270-
err := fakeCtrlClient.List(ctx, karpenterMachinePoolList)
271-
Expect(err).NotTo(HaveOccurred())
272-
Expect(karpenterMachinePoolList.Items).To(HaveLen(0))
279+
karpenterMachinePoolList := &karpenterinfra.KarpenterMachinePoolList{}
280+
err := fakeCtrlClient.List(ctx, karpenterMachinePoolList)
281+
Expect(err).NotTo(HaveOccurred())
282+
// Finalizer should be there blocking the deletion of the CR
283+
Expect(karpenterMachinePoolList.Items).To(HaveLen(1))
284+
285+
ec2Client.TerminateInstancesByTagReturnsOnCall(0, nil, nil)
286+
287+
reconcileResult, reconcileErr = reconciler.Reconcile(ctx, ctrl.Request{
288+
NamespacedName: types.NamespacedName{
289+
Namespace: KarpenterMachinePoolNamespace,
290+
Name: KarpenterMachinePoolName,
291+
},
292+
})
293+
294+
karpenterMachinePoolList = &karpenterinfra.KarpenterMachinePoolList{}
295+
err = fakeCtrlClient.List(ctx, karpenterMachinePoolList)
296+
Expect(err).NotTo(HaveOccurred())
297+
// Finalizer should've been removed and the CR should be gone
298+
Expect(karpenterMachinePoolList.Items).To(HaveLen(0))
299+
})
273300
})
274301
})
275302
})

pkg/aws/ec2client.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ func getEc2Tags(t map[string]string) []*ec2.Tag {
172172
}
173173

174174
// TerminateInstancesByTag terminates all EC2 instances that have the specified tag key and value.
175-
func (a *AWSEC2) TerminateInstancesByTag(ctx context.Context, logger logr.Logger, tagKey, tagValue string) error {
175+
func (a *AWSEC2) TerminateInstancesByTag(ctx context.Context, logger logr.Logger, tagKey, tagValue string) ([]string, error) {
176+
ids := []string{}
177+
176178
logger.Info("Finding EC2 instances with tag", "tagKey", tagKey, "tagValue", tagValue)
177179

178180
// Create filter for the tag
@@ -188,7 +190,7 @@ func (a *AWSEC2) TerminateInstancesByTag(ctx context.Context, logger logr.Logger
188190
Filters: filter,
189191
})
190192
if err != nil {
191-
return errors.WithStack(err)
193+
return ids, errors.WithStack(err)
192194
}
193195

194196
// Collect instance IDs
@@ -206,7 +208,7 @@ func (a *AWSEC2) TerminateInstancesByTag(ctx context.Context, logger logr.Logger
206208
// If no instances found, return
207209
if len(instanceIds) == 0 {
208210
logger.Info("No instances found with the specified tag")
209-
return nil
211+
return ids, nil
210212
}
211213

212214
// Terminate the instances
@@ -215,9 +217,13 @@ func (a *AWSEC2) TerminateInstancesByTag(ctx context.Context, logger logr.Logger
215217
InstanceIds: instanceIds,
216218
})
217219
if err != nil {
218-
return errors.WithStack(err)
220+
return nil, errors.WithStack(err)
219221
}
220222

221-
logger.Info("Successfully requested termination of instances", "count", len(instanceIds), "tagKey", tagKey, "tagValue", tagValue)
222-
return nil
223+
for _, id := range instanceIds {
224+
ids = append(ids, *id)
225+
}
226+
227+
logger.Info("Successfully requested termination of instances", "count", len(ids), "instances", ids, "tagKey", tagKey, "tagValue", tagValue)
228+
return ids, nil
223229
}

pkg/resolver/package.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ type AWSClients interface {
2626
type EC2Client interface {
2727
CreateSecurityGroupForResolverEndpoints(ctx context.Context, vpcId, groupName string, tags map[string]string) (string, error)
2828
DeleteSecurityGroupForResolverEndpoints(ctx context.Context, logger logr.Logger, vpcId, groupName string) error
29-
TerminateInstancesByTag(ctx context.Context, logger logr.Logger, tagKey, tagValue string) error
29+
TerminateInstancesByTag(ctx context.Context, logger logr.Logger, tagKey, tagValue string) ([]string, error)
3030
}
3131

3232
type ResourceShare struct {

pkg/resolver/resolverfakes/fake_ec2client.go

Lines changed: 19 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)