2929 echo "KARPENTER_NODE_ROLE: ${{ vars.KARPENTER_NODE_ROLE }}"
3030 echo "KARPENTER_INSTANCE_PROFILE: ${{ vars.KARPENTER_INSTANCE_PROFILE }}"
3131 echo "KARPENTER_NAMESPACE: ${{ vars.KARPENTER_NAMESPACE }}"
32- if [[ -z "${{ vars.KARPENTER_NODEPOOL_NAME }}" ]]; then
33- echo "WARNING: KARPENTER_NODEPOOL_NAME variable not found. Karpenter resources may not be deployed."
34- fi
35- if [[ -z "${{ vars.KARPENTER_NODE_ROLE }}" ]]; then
36- echo "WARNING: KARPENTER_NODE_ROLE variable not found. Karpenter resources may not be deployed."
37- fi
38- if [[ -z "${{ vars.KARPENTER_INSTANCE_PROFILE }}" ]]; then
39- echo "WARNING: KARPENTER_INSTANCE_PROFILE variable not found. Karpenter resources may not be deployed."
40- fi
41- if [[ -z "${{ vars.KARPENTER_NAMESPACE }}" ]]; then
42- echo "WARNING: KARPENTER_NAMESPACE variable not found. Karpenter resources may not be deployed."
43- fi
4432 if [[ -z "${{ vars.CLUSTER_NAME }}" ]]; then
4533 echo "ERROR: CLUSTER_NAME variable not found. Infrastructure may not be deployed."
4634 exit 1
@@ -72,81 +60,69 @@ jobs:
7260 continue-on-error : true
7361
7462 # ---------------------------
75- # Delete ArgoCD Applications
63+ # Delete ArgoCD Applications with timeout
7664 # ---------------------------
7765 - name : Delete ArgoCD Applications
7866 run : |
79- kubectl delete application ${{ vars.APP_NAME }} -n ${{ vars.ARGOCD_NAMESPACE }} --ignore-not-found
80- kubectl delete application kube-prometheus-stack -n ${{ vars.ARGOCD_NAMESPACE }} --ignore-not-found
67+ kubectl delete application ${{ vars.APP_NAME }} -n ${{ vars.ARGOCD_NAMESPACE }} --ignore-not-found --timeout=60s || true
68+ kubectl delete application kube-prometheus-stack -n ${{ vars.ARGOCD_NAMESPACE }} --ignore-not-found --timeout=60s || true
8169 continue-on-error : true
8270
8371 # ---------------------------
84- # Delete Karpenter Resources (Updated)
85- # ---------------------------
86-
87- # Replace the Karpenter cleanup section in your workflow with this enhanced version
88-
89- # ---------------------------
90- # Scale down workloads FIRST
72+ # Scale down workloads FIRST with timeouts
9173 # ---------------------------
9274 - name : Scale down all workloads before cleanup
9375 run : |
9476 echo "Scaling down all deployments and deleting services to trigger LB cleanup..."
9577
96- # Scale down ALL deployments across ALL namespaces (not just app namespace)
78+ # Scale down ALL deployments across ALL namespaces with timeout
9779 kubectl get deployments --all-namespaces -o json | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' | while read namespace deployment; do
9880 echo "Scaling down deployment $deployment in namespace $namespace"
99- kubectl scale deployment $deployment --replicas=0 -n $namespace || true
81+ kubectl scale deployment $deployment --replicas=0 -n $namespace --timeout=30s || true
10082 done
10183
10284 # Scale down daemonsets that might be running
10385 kubectl get daemonsets --all-namespaces -o json | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' | while read namespace daemonset; do
10486 echo "Deleting daemonset $daemonset in namespace $namespace"
105- kubectl delete daemonset $daemonset -n $namespace --ignore-not-found || true
87+ kubectl delete daemonset $daemonset -n $namespace --ignore-not-found --timeout=60s || true
10688 done
10789
10890 # Delete ALL services of type LoadBalancer IMMEDIATELY
10991 echo "Deleting LoadBalancer services..."
11092 kubectl get services --all-namespaces -o json | jq -r '.items[] | select(.spec.type=="LoadBalancer") | "\(.metadata.namespace) \(.metadata.name)"' | while read namespace service; do
11193 echo "Deleting service $service in namespace $namespace"
112- kubectl delete service $service -n $namespace --ignore-not-found || true
94+ kubectl delete service $service -n $namespace --ignore-not-found --timeout=60s || true
11395 done
96+
97+ # Wait for LoadBalancers to be cleaned up
98+ echo "Waiting for LoadBalancers to be cleaned up..."
99+ sleep 60
114100
101+ # ---------------------------
102+ # Clean up Karpenter Resources with enhanced error handling
103+ # ---------------------------
115104 - name : Delete Karpenter Provisioners and AWSNodeTemplates
116105 run : |
117106 echo "Deleting Karpenter Provisioners..."
118- kubectl delete provisioner ${{ vars.KARPENTER_NODEPOOL_NAME }} --ignore-not-found -n ${{ vars.KARPENTER_NAMESPACE }} || true
119- kubectl delete provisioner --all -n ${{ vars.KARPENTER_NAMESPACE }} --ignore-not-found || true
120-
107+ kubectl delete provisioner ${{ vars.KARPENTER_NODEPOOL_NAME }} --ignore-not-found -n ${{ vars.KARPENTER_NAMESPACE }} --timeout=60s || true
108+ kubectl delete provisioner --all -n ${{ vars.KARPENTER_NAMESPACE }} --ignore-not-found --timeout=60s || true
109+
121110 echo "Deleting Karpenter AWSNodeTemplates..."
122- kubectl delete awsnodetemplate ${{ vars.KARPENTER_NODECLASS_NAME }} --ignore-not-found -n ${{ vars.KARPENTER_NAMESPACE }} || true
123- kubectl delete awsnodetemplate --all -n ${{ vars.KARPENTER_NAMESPACE }} --ignore-not-found || true
124-
125- echo "Waiting for resources to be cleaned up..."
126- sleep 10
127- continue-on-error : true
128-
129- - name : Clean up Karpenter CRDs and Webhooks
130- run : |
131- echo "Deleting Karpenter CRDs..."
132- kubectl delete crd provisioners.karpenter.sh --ignore-not-found || true
133- kubectl delete crd awsnodetemplates.karpenter.k8s.aws --ignore-not-found || true
111+ kubectl delete awsnodetemplate ${{ vars.KARPENTER_NODECLASS_NAME }} --ignore-not-found -n ${{ vars.KARPENTER_NAMESPACE }} --timeout=60s || true
112+ kubectl delete awsnodetemplate --all -n ${{ vars.KARPENTER_NAMESPACE }} --ignore-not-found --timeout=60s || true
134113
135- echo "Deleting Karpenter webhooks..."
136- kubectl delete validatingwebhookconfiguration defaulting.webhook.karpenter.sh --ignore-not-found || true
137- kubectl delete validatingwebhookconfiguration validation.webhook.karpenter.sh --ignore-not-found || true
138- kubectl delete mutatingwebhookconfiguration defaulting.webhook.karpenter.sh --ignore-not-found || true
114+ echo "Deleting new Karpenter resources (NodePools, EC2NodeClasses)..."
115+ kubectl delete nodepool --all --ignore-not-found --timeout=60s || true
116+ kubectl delete ec2nodeclass --all --ignore-not-found --timeout=60s || true
139117
140- echo "Deleting Karpenter finalizers if stuck..."
141- kubectl patch crd provisioners.karpenter.sh -p '{"metadata":{"finalizers":[]}}' --type=merge || true
142- kubectl patch crd awsnodetemplates.karpenter.k8s.aws -p '{"metadata":{"finalizers":[]}}' --type=merge || true
118+ echo "Waiting for resources to be cleaned up..."
119+ sleep 30
143120 continue-on-error : true
144121
145-
146122 - name : Uninstall Karpenter Helm Release
147123 run : |
148124 echo "Uninstalling Karpenter Helm release..."
149- helm uninstall karpenter -n ${{ vars.KARPENTER_NAMESPACE }} || true
125+ helm uninstall karpenter -n ${{ vars.KARPENTER_NAMESPACE }} --timeout=300s || true
150126
151127 echo "Waiting for pods to terminate..."
152128 kubectl wait --for=delete pod -l app.kubernetes.io/name=karpenter -n ${{ vars.KARPENTER_NAMESPACE }} --timeout=120s || true
@@ -158,82 +134,115 @@ jobs:
158134 - name : Clean up Karpenter CRDs and Webhooks
159135 run : |
160136 echo "Deleting Karpenter CRDs..."
161- kubectl delete crd nodepools .karpenter.sh --ignore-not-found || true
162- kubectl delete crd provisioners .karpenter.sh --ignore-not-found || true
163- kubectl delete crd awsnodetemplates .karpenter.k8s.aws --ignore-not-found || true
164- kubectl delete crd ec2nodeclasses.karpenter.k8s.aws --ignore-not-found || true
137+ kubectl delete crd provisioners .karpenter.sh --ignore-not-found --timeout=60s || true
138+ kubectl delete crd awsnodetemplates .karpenter.k8s.aws --ignore-not-found --timeout=60s || true
139+ kubectl delete crd nodepools .karpenter.sh --ignore-not-found --timeout=60s || true
140+ kubectl delete crd ec2nodeclasses.karpenter.k8s.aws --ignore-not-found --timeout=60s || true
165141
166142 echo "Deleting Karpenter webhooks..."
167143 kubectl delete validatingwebhookconfiguration defaulting.webhook.karpenter.sh --ignore-not-found || true
168144 kubectl delete validatingwebhookconfiguration validation.webhook.karpenter.sh --ignore-not-found || true
169145 kubectl delete mutatingwebhookconfiguration defaulting.webhook.karpenter.sh --ignore-not-found || true
170146
171- echo "Deleting Karpenter finalizers if stuck..."
147+ echo "Removing finalizers from stuck CRDs..."
148+ kubectl patch crd provisioners.karpenter.sh -p '{"metadata":{"finalizers":[]}}' --type=merge || true
149+ kubectl patch crd awsnodetemplates.karpenter.k8s.aws -p '{"metadata":{"finalizers":[]}}' --type=merge || true
172150 kubectl patch crd nodepools.karpenter.sh -p '{"metadata":{"finalizers":[]}}' --type=merge || true
173151 kubectl patch crd ec2nodeclasses.karpenter.k8s.aws -p '{"metadata":{"finalizers":[]}}' --type=merge || true
174152 continue-on-error : true
175153
176154 # ---------------------------
177- # Uninstall Other Helm Releases
155+ # Uninstall Other Helm Releases with timeouts
178156 # ---------------------------
179157 - name : Uninstall Helm Releases
180158 run : |
181- helm uninstall ${{ vars.APP_NAME }} -n ${{ vars.APP_NAMESPACE }} || true
182- helm uninstall kube-prometheus-stack -n ${{ vars.MONITORING_NAMESPACE }} || true
183- helm uninstall ingress-nginx -n ingress-nginx || true
184- helm uninstall argocd -n ${{ vars.ARGOCD_NAMESPACE }} || true
159+ helm uninstall ${{ vars.APP_NAME }} -n ${{ vars.APP_NAMESPACE }} --timeout=300s || true
160+ helm uninstall kube-prometheus-stack -n ${{ vars.MONITORING_NAMESPACE }} --timeout=300s || true
161+ helm uninstall ingress-nginx -n ingress-nginx --timeout=300s || true
162+ helm uninstall argocd -n ${{ vars.ARGOCD_NAMESPACE }} --timeout=300s || true
185163 continue-on-error : true
186164
187165 # ---------------------------
188- # Delete Namespaces
189- # ---------------------------
190- - name : Delete Namespaces
191- run : |
192- kubectl delete namespace ${{ vars.APP_NAMESPACE }} --ignore-not-found
193- kubectl delete namespace ${{ vars.MONITORING_NAMESPACE }} --ignore-not-found
194- kubectl delete namespace ${{ vars.ARGOCD_NAMESPACE }} --ignore-not-found
195- kubectl delete namespace ingress-nginx --ignore-not-found
196-
197- # Delete Karpenter namespace last and force if needed
198- kubectl delete namespace ${{ vars.KARPENTER_NAMESPACE }} --ignore-not-found --timeout=60s || true
199- kubectl delete namespace ${{ vars.KARPENTER_NAMESPACE }} --force --grace-period=0 --ignore-not-found || true
200- continue-on-error : true
201-
202- # ---------------------------
203- # Delete CRDs (Prometheus & Grafana)
166+ # Delete CRDs (Prometheus & Grafana) before namespace deletion
204167 # ---------------------------
205168 - name : Delete Monitoring CRDs
206169 run : |
207- kubectl get crd -o name | grep -E 'prometheus|grafana|alertmanager|servicemonitor|prometheusrule' | xargs -r kubectl delete || true
170+ echo "Deleting monitoring CRDs..."
171+ kubectl get crd -o name | grep -E 'prometheus|grafana|alertmanager|servicemonitor|prometheusrule' | xargs -r kubectl delete --timeout=60s || true
172+
173+ echo "Deleting ArgoCD CRDs..."
174+ kubectl get crd -o name | grep 'argoproj.io' | xargs -r kubectl delete --timeout=60s || true
208175 continue-on-error : true
209176
210177 # ---------------------------
211- # Cleanup PVCs & PVs
178+ # Cleanup PVCs & PVs before namespace deletion
212179 # ---------------------------
213180 - name : Cleanup Persistent Storage
214181 run : |
215- kubectl delete pvc --all -A || true
216- kubectl delete pv --all || true
182+ echo "Deleting PVCs..."
183+ kubectl delete pvc --all -A --timeout=120s || true
184+ echo "Deleting PVs..."
185+ kubectl delete pv --all --timeout=120s || true
217186 continue-on-error : true
218187
219188 # ---------------------------
220- # Final cleanup verification
189+ # Delete Namespaces with FORCE cleanup for stuck ones
221190 # ---------------------------
222- - name : Verify Karpenter cleanup
191+ - name : Delete Namespaces with Force Cleanup
223192 run : |
224- echo "Verifying Karpenter cleanup..."
225- kubectl get pods -n ${{ vars.KARPENTER_NAMESPACE }} || echo "Karpenter namespace not found (expected)"
226- kubectl get crd | grep karpenter || echo "No Karpenter CRDs found (expected)"
227- kubectl get validatingwebhookconfiguration | grep karpenter || echo "No Karpenter webhooks found (expected)"
228- kubectl get mutatingwebhookconfiguration | grep karpenter || echo "No Karpenter webhooks found (expected)"
229- helm list -n ${{ vars.KARPENTER_NAMESPACE }} || echo "No Helm releases in karpenter namespace (expected)"
193+ echo "Deleting namespaces with proper cleanup..."
194+
195+ # Function to force delete a namespace if it gets stuck
196+ force_delete_namespace() {
197+ local ns=$1
198+ echo "Processing namespace: $ns"
199+
200+ if kubectl get namespace $ns --ignore-not-found 2>/dev/null; then
201+ # Try normal deletion first with timeout
202+ kubectl delete namespace $ns --ignore-not-found --timeout=120s || {
203+ echo "Normal deletion failed for $ns, trying force deletion..."
204+
205+ # Remove finalizers and force delete
206+ kubectl get namespace $ns -o json | \
207+ jq '.spec.finalizers = []' | \
208+ kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f - || true
209+
210+ # Wait a moment
211+ sleep 10
212+
213+ # Verify deletion
214+ if kubectl get namespace $ns --ignore-not-found 2>/dev/null; then
215+ echo "WARNING: Namespace $ns still exists after force deletion"
216+ else
217+ echo "Successfully force deleted namespace $ns"
218+ fi
219+ }
220+ else
221+ echo "Namespace $ns does not exist"
222+ fi
223+ }
224+
225+ # Delete namespaces one by one with force cleanup
226+ for ns in ${{ vars.APP_NAMESPACE }} ${{ vars.MONITORING_NAMESPACE }} ${{ vars.ARGOCD_NAMESPACE }} ingress-nginx ${{ vars.KARPENTER_NAMESPACE }}; do
227+ force_delete_namespace $ns
228+ done
229+
230+ echo "Final namespace check:"
231+ kubectl get namespaces || true
230232 continue-on-error : true
231233
232234 # ---------------------------
233235 # Wait for cleanup to complete
234236 # ---------------------------
235- - name : Wait for cleanup
236- run : sleep 30
237+ - name : Wait for cleanup and verify
238+ run : |
239+ echo "Waiting for cleanup to complete..."
240+ sleep 60
241+
242+ echo "Verifying cleanup..."
243+ kubectl get pods -n ${{ vars.KARPENTER_NAMESPACE }} 2>/dev/null || echo "Karpenter namespace not found (expected)"
244+ kubectl get crd | grep karpenter || echo "No Karpenter CRDs found (expected)"
245+ kubectl get namespaces | grep -E "${{ vars.APP_NAMESPACE }}|${{ vars.MONITORING_NAMESPACE }}|${{ vars.ARGOCD_NAMESPACE }}" || echo "Target namespaces deleted (expected)"
237246
238247 # ---------------------------
239248 # Terraform Destroy
@@ -267,4 +276,4 @@ jobs:
267276 gh variable delete KARPENTER_NAMESPACE --repo $GITHUB_REPOSITORY || true
268277 env :
269278 GITHUB_TOKEN : ${{ secrets.PAT_GITHUB }}
270- continue-on-error : true
279+ continue-on-error : true
0 commit comments