Skip to content

Commit d681482

Browse files
weshayutinkaovilaiclaude
authored
E2E Fix: remove the finalizers on HCP to allow force delete after timeout expires (#1848)
* Propagate errors from HCP deletion checks Previously, IsHCDeleted and IsHCPDeleted only returned a boolean, causing actual errors to be lost and reported as generic timeout errors. This made debugging difficult when deletion checks failed due to API errors rather than the resource still existing. Changes: - Modified IsHCDeleted to return (bool, error) - Modified IsHCPDeleted to return (bool, error) - Updated WaitForHCDeletion and WaitForHCPDeletion to handle errors - Updated test cases to handle the new error returns This provides better error visibility for troubleshooting HCP deletion timeouts. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]> Signed-off-by: Tiger Kaovilai <[email protected]> * nuke hcp via finalizers --------- Signed-off-by: Tiger Kaovilai <[email protected]> Co-authored-by: Tiger Kaovilai <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent b42c3dc commit d681482

File tree

2 files changed

+101
-19
lines changed

2 files changed

+101
-19
lines changed

tests/e2e/lib/hcp/hcp.go

Lines changed: 97 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,39 @@ func (h *HCHandler) RemoveHCP(timeout time.Duration) error {
6666
log.Printf("\tWaiting for the HC to be deleted")
6767
err := wait.PollUntilContextTimeout(h.Ctx, time.Second*5, timeout, true, func(ctx context.Context) (bool, error) {
6868
log.Printf("\tAttempting to verify HC deletion...")
69-
result := IsHCDeleted(h)
70-
log.Printf("\tHC deletion check result: %v", result)
71-
return result, nil
69+
deleted, err := IsHCDeleted(h)
70+
if err != nil {
71+
log.Printf("\tHC deletion check error: %v", err)
72+
return false, err
73+
}
74+
log.Printf("\tHC deletion check result: %v", deleted)
75+
return deleted, nil
7276
})
7377

7478
if err != nil {
75-
return fmt.Errorf("failed to wait for HC deletion: %v", err)
79+
log.Printf("HC deletion timed out, attempting to nuke resources with finalizers")
80+
if nukeErr := h.NukeHostedCluster(); nukeErr != nil {
81+
return fmt.Errorf("failed to wait for HC deletion (timeout: %v) and failed to nuke resources: %v", err, nukeErr)
82+
}
83+
84+
// Try deletion again after nuking finalizers
85+
log.Printf("Retrying HC deletion after removing finalizers")
86+
retryErr := wait.PollUntilContextTimeout(h.Ctx, time.Second*5, time.Minute*5, true, func(ctx context.Context) (bool, error) {
87+
log.Printf("\tRetry: Attempting to verify HC deletion...")
88+
deleted, err := IsHCDeleted(h)
89+
if err != nil {
90+
log.Printf("\tRetry: HC deletion check error: %v", err)
91+
return false, err
92+
}
93+
log.Printf("\tRetry: HC deletion check result: %v", deleted)
94+
return deleted, nil
95+
})
96+
97+
if retryErr != nil {
98+
return fmt.Errorf("failed to wait for HC deletion even after removing finalizers (original timeout: %v, retry error: %v)", err, retryErr)
99+
}
100+
101+
log.Printf("\tHC successfully deleted after removing finalizers")
76102
}
77103

78104
return nil
@@ -221,22 +247,76 @@ func (h *HCHandler) DeleteHCSecrets() error {
221247

222248
// WaitForHCDeletion waits for the HostedCluster to be deleted
223249
func (h *HCHandler) WaitForHCDeletion() error {
224-
return wait.PollUntilContextTimeout(h.Ctx, WaitForNextCheckTimeout, Wait10Min, true, func(ctx context.Context) (bool, error) {
225-
return IsHCDeleted(h), nil
250+
err := wait.PollUntilContextTimeout(h.Ctx, WaitForNextCheckTimeout, Wait10Min, true, func(ctx context.Context) (bool, error) {
251+
deleted, err := IsHCDeleted(h)
252+
if err != nil {
253+
// Return the error to stop polling and propagate the error details
254+
return false, err
255+
}
256+
return deleted, nil
226257
})
258+
259+
if err != nil {
260+
log.Printf("HC deletion timed out in WaitForHCDeletion, attempting to nuke resources with finalizers")
261+
if nukeErr := h.NukeHostedCluster(); nukeErr != nil {
262+
return fmt.Errorf("failed to wait for HC deletion (timeout: %v) and failed to nuke resources: %v", err, nukeErr)
263+
}
264+
265+
// Try deletion again after nuking finalizers
266+
log.Printf("Retrying HC deletion after removing finalizers in WaitForHCDeletion")
267+
retryErr := wait.PollUntilContextTimeout(h.Ctx, WaitForNextCheckTimeout, time.Minute*5, true, func(ctx context.Context) (bool, error) {
268+
deleted, err := IsHCDeleted(h)
269+
if err != nil {
270+
return false, err
271+
}
272+
return deleted, nil
273+
})
274+
275+
if retryErr != nil {
276+
return fmt.Errorf("failed to wait for HC deletion even after removing finalizers (original timeout: %v, retry error: %v)", err, retryErr)
277+
}
278+
279+
log.Printf("HC successfully deleted after removing finalizers in WaitForHCDeletion")
280+
}
281+
282+
return nil
227283
}
228284

229285
// WaitForHCPDeletion waits for the HostedControlPlane to be deleted
230286
func (h *HCHandler) WaitForHCPDeletion(hcp *hypershiftv1.HostedControlPlane) error {
231287
return wait.PollUntilContextTimeout(h.Ctx, WaitForNextCheckTimeout, Wait10Min, true, func(ctx context.Context) (bool, error) {
232-
return IsHCPDeleted(h, hcp), nil
288+
deleted, err := IsHCPDeleted(h, hcp)
289+
if err != nil {
290+
// Return the error to stop polling and propagate the error details
291+
return false, err
292+
}
293+
return deleted, nil
233294
})
234295
}
235296

236297
// NukeHostedCluster removes all resources associated with a HostedCluster
237298
func (h *HCHandler) NukeHostedCluster() error {
238299
// List of resource types to check
239300
log.Printf("\tNuking HostedCluster")
301+
302+
// First, handle HostedCluster resources in the clusters namespace
303+
if h.HostedCluster != nil {
304+
log.Printf("\tNUKE: Checking HostedCluster %s in namespace %s for finalizers", h.HostedCluster.Name, h.HostedCluster.Namespace)
305+
hc := &hypershiftv1.HostedCluster{}
306+
err := h.Client.Get(h.Ctx, types.NamespacedName{
307+
Name: h.HostedCluster.Name,
308+
Namespace: h.HostedCluster.Namespace,
309+
}, hc)
310+
if err == nil && len(hc.GetFinalizers()) > 0 {
311+
log.Printf("\tNUKE: Removing finalizers from HostedCluster %s", hc.Name)
312+
hc.SetFinalizers([]string{})
313+
if err := h.Client.Update(h.Ctx, hc); err != nil {
314+
return fmt.Errorf("\tNUKE: Error removing finalizers from HostedCluster %s: %v", hc.Name, err)
315+
}
316+
}
317+
}
318+
319+
// Then handle other resources in the HCP namespace
240320
resourceTypes := []struct {
241321
kind string
242322
gvk schema.GroupVersionKind
@@ -513,10 +593,10 @@ func handleDeploymentValidationFailure(ctx context.Context, ocClient client.Clie
513593
}
514594

515595
// IsHCPDeleted checks if a HostedControlPlane has been deleted
516-
func IsHCPDeleted(h *HCHandler, hcp *hypershiftv1.HostedControlPlane) bool {
596+
func IsHCPDeleted(h *HCHandler, hcp *hypershiftv1.HostedControlPlane) (bool, error) {
517597
if hcp == nil {
518598
log.Printf("\tNo HCP provided, assuming deleted")
519-
return true
599+
return true, nil
520600
}
521601
log.Printf("\tChecking if HCP %s is deleted...", hcp.Name)
522602
newHCP := &hypershiftv1.HostedControlPlane{}
@@ -526,20 +606,20 @@ func IsHCPDeleted(h *HCHandler, hcp *hypershiftv1.HostedControlPlane) bool {
526606
if err != nil {
527607
if apierrors.IsNotFound(err) {
528608
log.Printf("\tHCP %s is confirmed deleted", hcp.Name)
529-
return true
609+
return true, nil
530610
}
531611
log.Printf("\tHCP %s deletion check failed with error: %v", hcp.Name, err)
532-
return false
612+
return false, fmt.Errorf("failed to check HCP deletion: %w", err)
533613
}
534614
log.Printf("\tHCP %s still exists", hcp.Name)
535-
return false
615+
return false, nil
536616
}
537617

538618
// IsHCDeleted checks if a HostedCluster has been deleted
539-
func IsHCDeleted(h *HCHandler) bool {
619+
func IsHCDeleted(h *HCHandler) (bool, error) {
540620
if h.HostedCluster == nil {
541621
log.Printf("\tNo HostedCluster provided, assuming deleted")
542-
return true
622+
return true, nil
543623
}
544624
log.Printf("\tChecking if HC %s is deleted...", h.HostedCluster.Name)
545625
newHC := &hypershiftv1.HostedCluster{}
@@ -549,13 +629,13 @@ func IsHCDeleted(h *HCHandler) bool {
549629
if err != nil {
550630
if apierrors.IsNotFound(err) {
551631
log.Printf("\tHC %s is confirmed deleted", h.HostedCluster.Name)
552-
return true
632+
return true, nil
553633
}
554634
log.Printf("\tHC %s deletion check failed with error: %v", h.HostedCluster.Name, err)
555-
return false
635+
return false, fmt.Errorf("failed to check HC deletion: %w", err)
556636
}
557637
log.Printf("\tHC %s still exists", h.HostedCluster.Name)
558-
return false
638+
return false, nil
559639
}
560640

561641
// GetHCPNamespace returns the namespace for a HostedControlPlane

tests/e2e/lib/hcp/hcp_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,8 @@ func TestIsHCPDeleted(t *testing.T) {
218218
}
219219

220220
// Call IsHCPDeleted
221-
result := IsHCPDeleted(h, tt.hcp)
221+
result, err := IsHCPDeleted(h, tt.hcp)
222+
g.Expect(err).ToNot(gomega.HaveOccurred())
222223
g.Expect(result).To(gomega.Equal(tt.expectedResult))
223224
})
224225
}
@@ -277,7 +278,8 @@ func TestIsHCDeleted(t *testing.T) {
277278
}
278279

279280
// Call IsHCDeleted
280-
result := IsHCDeleted(h)
281+
result, err := IsHCDeleted(h)
282+
g.Expect(err).ToNot(gomega.HaveOccurred())
281283
g.Expect(result).To(gomega.Equal(tt.expectedResult))
282284
})
283285
}

0 commit comments

Comments
 (0)