Skip to content

Commit 35e1811

Browse files
authored
feat: improve TensorFusion workload finalizer handling (#107)
1 parent 22ae477 commit 35e1811

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

internal/controller/tensorfusionworkload_controller.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,19 +73,29 @@ func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl
7373
return ctrl.Result{}, err
7474
}
7575

76-
// First, handle pods with finalizers that need GPU resource cleanup
7776
podList := &corev1.PodList{}
7877
if err := r.List(ctx, podList,
7978
client.InNamespace(req.Namespace),
8079
client.MatchingLabels{constants.WorkloadKey: workload.Name}); err != nil {
8180
return ctrl.Result{}, fmt.Errorf("list pods: %w", err)
8281
}
8382

84-
hasdeletion := false
83+
deleted, err := utils.HandleFinalizer(ctx, workload, r.Client, func(ctx context.Context, _ *tfv1.TensorFusionWorkload) (bool, error) {
84+
// check if all pods are deleted
85+
return len(podList.Items) == 0, nil
86+
})
87+
if err != nil {
88+
return ctrl.Result{}, fmt.Errorf("handle finalizer: %w", err)
89+
}
90+
if deleted {
91+
return ctrl.Result{}, nil
92+
}
93+
94+
// Handle pods with finalizers that need GPU resource cleanup
95+
hasDeletion := false
8596
// Process pods with our finalizer
8697
for i := range podList.Items {
8798
pod := &podList.Items[i]
88-
8999
// Handle our GPU resource cleanup finalizer
90100
deleted, err := utils.HandleFinalizer(ctx, pod, r.Client, func(ctx context.Context, obj *corev1.Pod) (bool, error) {
91101
return r.handlePodGPUCleanup(ctx, pod, workload)
@@ -94,10 +104,10 @@ func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl
94104
if err != nil {
95105
return ctrl.Result{}, err
96106
}
97-
hasdeletion = hasdeletion || deleted
107+
hasDeletion = hasDeletion || deleted
98108
}
99109

100-
if hasdeletion {
110+
if hasDeletion {
101111
return ctrl.Result{Requeue: true, RequeueAfter: constants.PendingRequeueDuration}, nil
102112
}
103113

internal/controller/tensorfusionworkload_controller_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,16 @@ var _ = Describe("TensorFusionWorkload Controller", func() {
112112

113113
AfterEach(func() {
114114
// Clean up workload resources
115+
115116
resource := &tensorfusionaiv1.TensorFusionWorkload{}
116117
err := k8sClient.Get(ctx, typeNamespacedName, resource)
117118
if err == nil {
119+
By("remove finalizers from workload")
120+
if len(resource.Finalizers) > 0 {
121+
resource.Finalizers = []string{}
122+
Expect(k8sClient.Update(ctx, resource)).To(Succeed())
123+
}
124+
118125
By("Cleaning up the test workload")
119126
Expect(k8sClient.Delete(ctx, resource)).To(Succeed())
120127
}

0 commit comments

Comments
 (0)