Skip to content

Commit d61b0f6

Browse files
author
sivakami
committed
Change pod delete logic.
1 parent 7326c93 commit d61b0f6

File tree

2 files changed

+212
-9
lines changed

2 files changed

+212
-9
lines changed

test/integration/swiftv2/helpers/az_helpers.go

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package helpers
22

33
import (
4+
"context"
45
"fmt"
56
"os/exec"
67
"strings"
@@ -75,43 +76,191 @@ func EnsureNamespaceExists(kubeconfig, namespace string) error {
7576
return nil
7677
}
7778

78-
// DeletePod deletes a pod in the specified namespace
79+
// DeletePod deletes a pod in the specified namespace and waits for it to be fully removed
7980
func DeletePod(kubeconfig, namespace, podName string) error {
80-
cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "pod", podName, "-n", namespace, "--ignore-not-found=true")
81+
fmt.Printf("Deleting pod %s in namespace %s...\n", podName, namespace)
82+
83+
// Initiate pod deletion with context timeout
84+
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
85+
defer cancel()
86+
87+
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "delete", "pod", podName, "-n", namespace, "--ignore-not-found=true")
8188
out, err := cmd.CombinedOutput()
8289
if err != nil {
83-
return fmt.Errorf("failed to delete pod %s in namespace %s: %s\n%s", podName, namespace, err, string(out))
90+
if ctx.Err() == context.DeadlineExceeded {
91+
fmt.Printf("kubectl delete pod command timed out after 90s, attempting force delete...\n")
92+
} else {
93+
return fmt.Errorf("failed to delete pod %s in namespace %s: %s\n%s", podName, namespace, err, string(out))
94+
}
95+
}
96+
97+
// Wait for pod to be completely gone (critical for IP release)
98+
fmt.Printf("Waiting for pod %s to be fully removed...\n", podName)
99+
for attempt := 1; attempt <= 30; attempt++ {
100+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
101+
checkCmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "get", "pod", podName, "-n", namespace, "--ignore-not-found=true", "-o", "name")
102+
checkOut, _ := checkCmd.CombinedOutput()
103+
cancel()
104+
105+
if strings.TrimSpace(string(checkOut)) == "" {
106+
fmt.Printf("Pod %s fully removed after %d seconds\n", podName, attempt*2)
107+
// Extra wait to ensure IP reservation is released in DNC
108+
time.Sleep(5 * time.Second)
109+
return nil
110+
}
111+
112+
if attempt%5 == 0 {
113+
fmt.Printf("Pod %s still terminating (attempt %d/30)...\n", podName, attempt)
114+
}
115+
time.Sleep(2 * time.Second)
116+
}
117+
118+
// If pod still exists after 60 seconds, force delete
119+
fmt.Printf("Pod %s still exists after 60s, attempting force delete...\n", podName)
120+
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
121+
defer cancel()
122+
123+
forceCmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "delete", "pod", podName, "-n", namespace, "--grace-period=0", "--force", "--ignore-not-found=true")
124+
forceOut, forceErr := forceCmd.CombinedOutput()
125+
if forceErr != nil {
126+
fmt.Printf("Warning: Force delete failed: %s\n%s\n", forceErr, string(forceOut))
84127
}
128+
129+
// Wait a bit more for force delete to complete
130+
time.Sleep(10 * time.Second)
131+
fmt.Printf("Pod %s deletion completed (may have required force)\n", podName)
85132
return nil
86133
}
87134

88-
// DeletePodNetworkInstance deletes a PodNetworkInstance
135+
// DeletePodNetworkInstance deletes a PodNetworkInstance and waits for it to be removed
89136
func DeletePodNetworkInstance(kubeconfig, namespace, pniName string) error {
137+
fmt.Printf("Deleting PodNetworkInstance %s in namespace %s...\n", pniName, namespace)
138+
139+
// Initiate PNI deletion
90140
cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "podnetworkinstance", pniName, "-n", namespace, "--ignore-not-found=true")
91141
out, err := cmd.CombinedOutput()
92142
if err != nil {
93143
return fmt.Errorf("failed to delete PodNetworkInstance %s: %s\n%s", pniName, err, string(out))
94144
}
145+
146+
// Wait for PNI to be completely gone (it may take time for DNC to release reservations)
147+
fmt.Printf("Waiting for PodNetworkInstance %s to be fully removed...\n", pniName)
148+
for attempt := 1; attempt <= 60; attempt++ {
149+
checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "podnetworkinstance", pniName, "-n", namespace, "--ignore-not-found=true", "-o", "name")
150+
checkOut, _ := checkCmd.CombinedOutput()
151+
152+
if strings.TrimSpace(string(checkOut)) == "" {
153+
fmt.Printf("PodNetworkInstance %s fully removed after %d seconds\n", pniName, attempt*2)
154+
return nil
155+
}
156+
157+
if attempt%10 == 0 {
158+
// Check for ReservationInUse errors
159+
descCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "describe", "podnetworkinstance", pniName, "-n", namespace)
160+
descOut, _ := descCmd.CombinedOutput()
161+
descStr := string(descOut)
162+
163+
if strings.Contains(descStr, "ReservationInUse") {
164+
fmt.Printf("PNI %s still has active reservations (attempt %d/60). Waiting for DNC to release...\n", pniName, attempt)
165+
} else {
166+
fmt.Printf("PNI %s still terminating (attempt %d/60)...\n", pniName, attempt)
167+
}
168+
}
169+
time.Sleep(2 * time.Second)
170+
}
171+
172+
// If PNI still exists after 120 seconds, try to remove finalizers
173+
fmt.Printf("PNI %s still exists after 120s, attempting to remove finalizers...\n", pniName)
174+
patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "podnetworkinstance", pniName, "-n", namespace, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge")
175+
patchOut, patchErr := patchCmd.CombinedOutput()
176+
if patchErr != nil {
177+
fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut))
178+
} else {
179+
fmt.Printf("Finalizers removed, waiting for deletion...\n")
180+
time.Sleep(5 * time.Second)
181+
}
182+
183+
fmt.Printf("PodNetworkInstance %s deletion completed\n", pniName)
95184
return nil
96185
}
97186

98-
// DeletePodNetwork deletes a PodNetwork
187+
// DeletePodNetwork deletes a PodNetwork and waits for it to be removed
99188
func DeletePodNetwork(kubeconfig, pnName string) error {
189+
fmt.Printf("Deleting PodNetwork %s...\n", pnName)
190+
100191
cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "podnetwork", pnName, "--ignore-not-found=true")
101192
out, err := cmd.CombinedOutput()
102193
if err != nil {
103194
return fmt.Errorf("failed to delete PodNetwork %s: %s\n%s", pnName, err, string(out))
104195
}
196+
197+
// Wait for PN to be completely gone
198+
fmt.Printf("Waiting for PodNetwork %s to be fully removed...\n", pnName)
199+
for attempt := 1; attempt <= 30; attempt++ {
200+
checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "podnetwork", pnName, "--ignore-not-found=true", "-o", "name")
201+
checkOut, _ := checkCmd.CombinedOutput()
202+
203+
if strings.TrimSpace(string(checkOut)) == "" {
204+
fmt.Printf("PodNetwork %s fully removed after %d seconds\n", pnName, attempt*2)
205+
return nil
206+
}
207+
208+
if attempt%10 == 0 {
209+
fmt.Printf("PodNetwork %s still terminating (attempt %d/30)...\n", pnName, attempt)
210+
}
211+
time.Sleep(2 * time.Second)
212+
}
213+
214+
// Try to remove finalizers if still stuck
215+
fmt.Printf("PodNetwork %s still exists, attempting to remove finalizers...\n", pnName)
216+
patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "podnetwork", pnName, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge")
217+
patchOut, patchErr := patchCmd.CombinedOutput()
218+
if patchErr != nil {
219+
fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut))
220+
}
221+
222+
time.Sleep(5 * time.Second)
223+
fmt.Printf("PodNetwork %s deletion completed\n", pnName)
105224
return nil
106225
}
107226

108-
// DeleteNamespace deletes a namespace
227+
// DeleteNamespace deletes a namespace and waits for it to be removed
109228
func DeleteNamespace(kubeconfig, namespace string) error {
229+
fmt.Printf("Deleting namespace %s...\n", namespace)
230+
110231
cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "namespace", namespace, "--ignore-not-found=true")
111232
out, err := cmd.CombinedOutput()
112233
if err != nil {
113234
return fmt.Errorf("failed to delete namespace %s: %s\n%s", namespace, err, string(out))
114235
}
236+
237+
// Wait for namespace to be completely gone
238+
fmt.Printf("Waiting for namespace %s to be fully removed...\n", namespace)
239+
for attempt := 1; attempt <= 60; attempt++ {
240+
checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "namespace", namespace, "--ignore-not-found=true", "-o", "name")
241+
checkOut, _ := checkCmd.CombinedOutput()
242+
243+
if strings.TrimSpace(string(checkOut)) == "" {
244+
fmt.Printf("Namespace %s fully removed after %d seconds\n", namespace, attempt*2)
245+
return nil
246+
}
247+
248+
if attempt%15 == 0 {
249+
fmt.Printf("Namespace %s still terminating (attempt %d/60)...\n", namespace, attempt)
250+
}
251+
time.Sleep(2 * time.Second)
252+
}
253+
254+
// Try to remove finalizers if still stuck
255+
fmt.Printf("Namespace %s still exists, attempting to remove finalizers...\n", namespace)
256+
patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "namespace", namespace, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge")
257+
patchOut, patchErr := patchCmd.CombinedOutput()
258+
if patchErr != nil {
259+
fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut))
260+
}
261+
262+
time.Sleep(5 * time.Second)
263+
fmt.Printf("Namespace %s deletion completed\n", namespace)
115264
return nil
116265
}
117266

test/integration/swiftv2/longRunningCluster/datapath.go

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,14 +442,68 @@ func CreateAllScenarios(testScenarios TestScenarios) error {
442442
}
443443

444444
// DeleteAllScenarios deletes resources for all test scenarios
445+
// Strategy: Delete all pods first, then delete shared PNI/PN/Namespace resources
445446
func DeleteAllScenarios(testScenarios TestScenarios) error {
447+
// Phase 1: Delete all pods first
448+
fmt.Printf("\n=== Phase 1: Deleting all pods ===\n")
446449
for _, scenario := range testScenarios.Scenarios {
447-
fmt.Printf("\n=== Deleting scenario: %s ===\n", scenario.Name)
448-
err := DeleteScenarioResources(scenario, testScenarios.BuildID)
450+
kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster)
451+
vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_")
452+
vnetShort = strings.ReplaceAll(vnetShort, "_", "-")
453+
subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-")
454+
pnName := fmt.Sprintf("pn-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe)
455+
podName := fmt.Sprintf("pod-%s", scenario.PodNameSuffix)
456+
457+
fmt.Printf("Deleting pod for scenario: %s\n", scenario.Name)
458+
err := helpers.DeletePod(kubeconfig, pnName, podName)
449459
if err != nil {
450-
return err
460+
fmt.Printf("Warning: Failed to delete pod for scenario %s: %v\n", scenario.Name, err)
461+
}
462+
}
463+
464+
// Phase 2: Delete shared PNI/PN/Namespace resources (grouped by vnet/subnet/cluster)
465+
fmt.Printf("\n=== Phase 2: Deleting shared PNI/PN/Namespace resources ===\n")
466+
resourceGroups := make(map[string]bool)
467+
468+
for _, scenario := range testScenarios.Scenarios {
469+
kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster)
470+
vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_")
471+
vnetShort = strings.ReplaceAll(vnetShort, "_", "-")
472+
subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-")
473+
pnName := fmt.Sprintf("pn-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe)
474+
pniName := fmt.Sprintf("pni-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe)
475+
476+
// Create unique key for this vnet/subnet/cluster combination
477+
resourceKey := fmt.Sprintf("%s:%s", scenario.Cluster, pnName)
478+
479+
// Skip if we already deleted resources for this combination
480+
if resourceGroups[resourceKey] {
481+
continue
482+
}
483+
resourceGroups[resourceKey] = true
484+
485+
fmt.Printf("\nDeleting shared resources for %s/%s on %s\n", scenario.VnetName, scenario.SubnetName, scenario.Cluster)
486+
487+
// Delete PodNetworkInstance
488+
err := helpers.DeletePodNetworkInstance(kubeconfig, pnName, pniName)
489+
if err != nil {
490+
fmt.Printf("Warning: Failed to delete PNI %s: %v\n", pniName, err)
491+
}
492+
493+
// Delete PodNetwork
494+
err = helpers.DeletePodNetwork(kubeconfig, pnName)
495+
if err != nil {
496+
fmt.Printf("Warning: Failed to delete PN %s: %v\n", pnName, err)
497+
}
498+
499+
// Delete namespace
500+
err = helpers.DeleteNamespace(kubeconfig, pnName)
501+
if err != nil {
502+
fmt.Printf("Warning: Failed to delete namespace %s: %v\n", pnName, err)
451503
}
452504
}
505+
506+
fmt.Printf("\n=== All scenarios deleted ===\n")
453507
return nil
454508
}
455509

0 commit comments

Comments
 (0)