fix(webhook): enabledReplicas hash key should be returned even if annotations is nil and add test (#139)

0x5457 · web-flow · commit 4b57ac29a73a · 2025-04-22T13:38:46.000+08:00
diff --git a/internal/webhook/v1/pod_counter.go b/internal/webhook/v1/pod_counter.go
@@ -26,11 +26,11 @@ func getOrGenerateKey(pod *corev1.Pod) string {
 	}
 	// Try to use pod-template-hash if present
 	if hash, ok := pod.Labels["pod-template-hash"]; ok && hash != "" {
-		return hash
+		return fmt.Sprintf("%s/tf-counter-%s", constants.Domain, hash)
 	}
 
 	// Fallback to object hash
-	return utils.GetObjectHash(pod)
+	return fmt.Sprintf("%s/tf-counter-%s", constants.Domain, utils.GetObjectHash(pod))
 }
 
 // Get gets the counter value from the owner annotation by key
@@ -49,11 +49,11 @@ func (c *TensorFusionPodCounter) Get(ctx context.Context, pod *corev1.Pod) (int3
 	}
 	annotations := ownerObj.GetAnnotations()
 	if annotations == nil {
-		return 0, "", nil
+		return 0, key, nil
 	}
 	val, ok := annotations[key]
 	if !ok || val == "" {
-		return 0, "", nil
+		return 0, key, nil
 	}
 	count, err := strconv.ParseInt(val, 10, 32)
 	if err != nil {
diff --git a/internal/webhook/v1/pod_webhook_test.go b/internal/webhook/v1/pod_webhook_test.go
@@ -19,6 +19,7 @@ package v1
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"net/http"
 
 	tfv1 "github.com/NexusGPU/tensor-fusion/api/v1"
@@ -27,6 +28,7 @@ import (
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	admissionv1 "k8s.io/api/admission/v1"
+	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -283,6 +285,141 @@ var _ = Describe("TensorFusionPodMutator", func() {
 		})
 	})
 
+	Context("Handle with EnabledReplicas", func() {
+		It("should only patch enabledReplicas pods", func() {
+			// Create a ReplicaSet as the owner for the pod
+			replicaSet := &appsv1.ReplicaSet{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-rs",
+					Namespace: "default",
+				},
+				Spec: appsv1.ReplicaSetSpec{
+					Selector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{
+							"app": "test-app",
+						},
+					},
+					Template: corev1.PodTemplateSpec{
+						ObjectMeta: metav1.ObjectMeta{
+							Labels: map[string]string{
+								"app": "test-app",
+							},
+						},
+						Spec: corev1.PodSpec{
+							Containers: []corev1.Container{
+								{
+									Name:  "test-container",
+									Image: "test-image",
+								},
+							},
+						},
+					},
+				},
+			}
+
+			Expect(k8sclient.Create(ctx, replicaSet)).To(Succeed())
+
+			// Get the ReplicaSet to obtain its UID
+			createdReplicaSet := &appsv1.ReplicaSet{}
+			Expect(k8sclient.Get(ctx, client.ObjectKey{Namespace: "default", Name: "test-rs"}, createdReplicaSet)).To(Succeed())
+			replicaSetUID := createdReplicaSet.GetUID()
+
+			// Create a workload profile
+			workloadProfile := &tfv1.WorkloadProfile{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-profile-enabled-replicas",
+					Namespace: "default",
+				},
+				Spec: tfv1.WorkloadProfileSpec{
+					PoolName: "mock",
+					Resources: tfv1.Resources{
+						Requests: tfv1.Resource{
+							Tflops: resource.MustParse("10"),
+							Vram:   resource.MustParse("1Gi"),
+						},
+						Limits: tfv1.Resource{
+							Tflops: resource.MustParse("100"),
+							Vram:   resource.MustParse("16Gi"),
+						},
+					},
+				},
+			}
+			Expect(k8sclient.Create(ctx, workloadProfile)).To(Succeed())
+
+			// Create a pod with TF resources and owner reference
+			trueVal := true
+			enabledReplicas := int32(1)
+
+			pod := &corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace:    "default",
+					GenerateName: "test-pod-enabled-replicas-",
+					Labels: map[string]string{
+						constants.TensorFusionEnabledLabelKey: "true",
+						"pod-template-hash":                   "test-hash",
+					},
+					Annotations: map[string]string{
+						constants.GpuPoolKey:                            "mock",
+						constants.WorkloadProfileAnnotation:             "test-profile-enabled-replicas",
+						constants.InjectContainerAnnotation:             "main",
+						constants.WorkloadKey:                           "test-workload",
+						constants.TensorFusionEnabledReplicasAnnotation: fmt.Sprintf("%d", enabledReplicas), // Using the correct constant
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							APIVersion: "apps/v1",
+							Kind:       "ReplicaSet",
+							Name:       "test-rs",
+							UID:        replicaSetUID,
+							Controller: &trueVal,
+						},
+					},
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name:  "main",
+							Image: "test-image",
+						},
+					},
+				},
+			}
+
+			podBytes, err := json.Marshal(pod)
+			Expect(err).NotTo(HaveOccurred())
+
+			req := admission.Request{
+				AdmissionRequest: admissionv1.AdmissionRequest{
+					Object: runtime.RawExtension{
+						Raw: podBytes,
+					},
+					Operation: admissionv1.Create,
+				},
+			}
+
+			resp := mutator.Handle(ctx, req)
+			// First call: Pod mutation should occur since enabledReplicas is 1,
+			// so the response should be allowed and contain patches
+			Expect(resp.Allowed).To(BeTrue())
+			Expect(resp.Patches).NotTo(BeEmpty())
+
+			counter := &TensorFusionPodCounter{Client: k8sclient}
+			count, _, err := counter.Get(ctx, pod)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(count).To(Equal(int32(1)))
+
+			resp = mutator.Handle(ctx, req)
+			// Second call: Pod should be ignored since it's been processed already,
+			// so the response should be allowed but patches should be empty
+			Expect(resp.Allowed).To(BeTrue())
+			Expect(resp.Patches).To(BeEmpty())
+
+			// Clean up
+			Expect(k8sclient.Delete(ctx, replicaSet)).To(Succeed())
+			Expect(k8sclient.Delete(ctx, workloadProfile)).To(Succeed())
+		})
+	})
+
 	Context("ParseTensorFusionInfo", func() {
 		It("should correctly parse TF requirements from pod annotations", func() {
 			// Set up a workload profile for testing
@@ -316,8 +453,9 @@ var _ = Describe("TensorFusionPodMutator", func() {
 						constants.WorkloadProfileAnnotation: "test-profile-parse-tf-resources",
 						constants.WorkloadKey:               "test-workload",
 						// override tflops request
-						constants.TFLOPSRequestAnnotation:   "20",
-						constants.InjectContainerAnnotation: "test-container",
+						constants.TFLOPSRequestAnnotation:               "20",
+						constants.InjectContainerAnnotation:             "test-container",
+						constants.TensorFusionEnabledReplicasAnnotation: "3",
 					},
 				},
 				Spec: corev1.PodSpec{
@@ -337,6 +475,7 @@ var _ = Describe("TensorFusionPodMutator", func() {
 			Expect(tfInfo.Profile.Resources.Requests.Vram.String()).To(Equal("1Gi"))
 			Expect(tfInfo.Profile.Resources.Limits.Tflops.String()).To(Equal("100"))
 			Expect(tfInfo.Profile.Resources.Limits.Vram.String()).To(Equal("16Gi"))
+			Expect(*tfInfo.EnabledReplicas).To(Equal(int32(3)))
 		})
 	})
 
diff --git a/internal/webhook/v1/webhook_suite_test.go b/internal/webhook/v1/webhook_suite_test.go
@@ -153,7 +153,6 @@ var _ = BeforeSuite(func() {
 		if err != nil {
 			return err
 		}
-
 		return conn.Close()
 	}).Should(Succeed())
 })

Original file line number	Diff line number	Diff line change
`@@ -26,11 +26,11 @@ func getOrGenerateKey(pod *corev1.Pod) string {`
`26`	`26`	`}`
`27`	`27`	`// Try to use pod-template-hash if present`
`28`	`28`	`if hash, ok := pod.Labels["pod-template-hash"]; ok && hash != "" {`
`29`		`- return hash`
	`29`	`+ return fmt.Sprintf("%s/tf-counter-%s", constants.Domain, hash)`
`30`	`30`	`}`
`31`	`31`
`32`	`32`	`// Fallback to object hash`
`33`		`- return utils.GetObjectHash(pod)`
	`33`	`+ return fmt.Sprintf("%s/tf-counter-%s", constants.Domain, utils.GetObjectHash(pod))`
`34`	`34`	`}`
`35`	`35`
`36`	`36`	`// Get gets the counter value from the owner annotation by key`
`@@ -49,11 +49,11 @@ func (c TensorFusionPodCounter) Get(ctx context.Context, pod corev1.Pod) (int3`
`49`	`49`	`}`
`50`	`50`	`annotations := ownerObj.GetAnnotations()`
`51`	`51`	`if annotations == nil {`
`52`		`- return 0, "", nil`
	`52`	`+ return 0, key, nil`
`53`	`53`	`}`
`54`	`54`	`val, ok := annotations[key]`
`55`	`55`	`if !ok \|\| val == "" {`
`56`		`- return 0, "", nil`
	`56`	`+ return 0, key, nil`
`57`	`57`	`}`
`58`	`58`	`count, err := strconv.ParseInt(val, 10, 32)`
`59`	`59`	`if err != nil {`
Original file line number	Diff line number	Diff line change
`@@ -153,7 +153,6 @@ var _ = BeforeSuite(func() {`
`153`	`153`	`if err != nil {`
`154`	`154`	`return err`
`155`	`155`	`}`
`156`		`-`
`157`	`156`	`return conn.Close()`
`158`	`157`	`}).Should(Succeed())`
`159`	`158`	`})`